├── Build.py ├── Readme.md ├── encoding-indexes.js ├── encoding.js ├── package ├── LICENSE ├── NOTICE ├── README.md ├── lib.typ ├── pintora.js ├── pintorita.svg ├── pintorita.typ └── typst.toml └── runtime.esm.js /Build.py: -------------------------------------------------------------------------------- 1 | # /// script 2 | # requires-python = "==3.9" 3 | # dependencies = ["quickjs","jsmin"] 4 | # /// 5 | 6 | import quickjs 7 | from jsmin import jsmin 8 | from pathlib import Path 9 | 10 | TS1 = """ 11 | sequenceDiagram 12 | title: Sequence Diagram Example 13 | autonumber 14 | participant [ User] 15 | User->>Pintora: Draw me a sequence diagram（with DSL） 16 | activate Pintora 17 | Pintora->>Pintora: Parse DSL, draw diagram 18 | alt DSL is correct 19 | Pintora->>User: Return the drawn diagram 20 | else DSL is incorrect 21 | Pintora->>User: Return error message 22 | end 23 | deactivate Pintora 24 | @note left of Pintora 25 | Different output formats according to render targets 26 | 1. In browser side. output SVG or Canvas 27 | 2. In Node.js side. output PNG file 28 | @end_note 29 | """ 30 | 31 | TS2 = """ 32 | mindmap 33 | @param layoutDirection TB 34 | @param { 35 | l1NodeBgColor #2B7A5D 36 | l1NodeTextColor #fff 37 | l2NodeBgColor #26946C 38 | l2NodeTextColor #fff 39 | nodeBgColor #67B599 40 | textColor #fff 41 | } 42 | + UML Diagrams 43 | ++ Behavior Diagrams 44 | +++ Sequence Diagram 45 | +++ State Diagram 46 | +++ Activity Diagram 47 | ++ Structural Diagrams 48 | +++ Class Diagram 49 | +++ Component Diagram 50 | """ 51 | 52 | 53 | TS3= """ 54 | componentDiagram 55 | title: Component Diagram Example 56 | package "@pintora/core" { 57 | () GraphicsIR 58 | () IRenderer 59 | () IDiagram 60 | [Diagram Registry] as registry 61 | } 62 | package "@pintora/diagrams" { 63 | [...Multiple Diagrams...] as diagrams 64 | [diagrams] 65 | [diagrams] --> IDiagram : implements 66 | } 67 | package "@pintora/renderer" { 68 | () "render()" as renderFn 69 | [SVGRender] 70 | [CanvasRender] 71 | [SVGRender] --> IRenderer : implements 72 | [CanvasRender] --> IRenderer : implements 73 | IRenderer ..> GraphicsIR : accepts 74 | } 75 | package "@pintora/standalone" { 76 | [standalone] 77 | } 78 | [IDiagram] --> GraphicsIR : generate 79 | [standalone] --> registry : register all of @pintora/diagrams 80 | [@pintora/standalone] --> [@pintora/diagrams] : import 81 | [standalone] --> renderFn : call with GraphicsIR 82 | 83 | """ 84 | 85 | 86 | qj = quickjs.Context() 87 | file = [] 88 | def qjeval(instr): 89 | qj.eval(instr) 90 | file.append(instr) 91 | 92 | 93 | pintora = Path('runtime.esm.js') 94 | encoding = Path('encoding.js') 95 | encodingIdx = Path('encoding-indexes.js') 96 | 97 | 98 | 99 | qjeval(''' 100 | class ConsoleStub { 101 | constructor() { 102 | this.logHistory = []; 103 | this.errorHistory = []; 104 | this.warnHistory = []; 105 | } 106 | 107 | log(...args) { 108 | const message = args.join(' '); 109 | this.logHistory.push(message); 110 | } 111 | 112 | error(...args) { 113 | const message = args.join(' '); 114 | this.errorHistory.push(message); 115 | } 116 | 117 | warn(...args) { 118 | const message = args.join(' '); 119 | this.warnHistory.push(message); 120 | } 121 | } 122 | 123 | var console = new ConsoleStub(); 124 | 125 | 126 | 127 | 128 | ''') 129 | 130 | import re 131 | 132 | qjeval(encodingIdx.read_text(encoding="UTF-8")) 133 | qjeval(encoding.read_text(encoding="UTF-8")) 134 | 135 | 136 | QJSFIXED = re.sub(r"export\s*\{.*\}","//EXPORTS arn't SUPPORTED", 137 | pintora.read_text(encoding="UTF-8") 138 | .replace("import.meta.url",'""'), 139 | flags = re.MULTILINE|re.DOTALL) 140 | 141 | # print("\n".join(QJSFIXED.split("\n")[63152:63158])) 142 | qjeval(QJSFIXED) 143 | 144 | 145 | 146 | 147 | 148 | qjeval(""" 149 | var document = new Document() 150 | var csrc = document.createElement("div") 151 | csrc.dataset=[]; 152 | var rslt = document.createElement("svg") 153 | 154 | csrc.dataset['renderer'] 155 | 156 | 157 | function PintoraRender(e, t = "default", A = "Source Code Pro, sans-serif") { 158 | csrc.dataset.theme = t; 159 | var n = config; 160 | if (n.core.defaultFontFamily = A, configApi.setConfig(n), runtime_default.setConfig(n), console = new ConsoleStub, csrc.innerText = e, pintoraStandalone.renderContentOf(csrc, { 161 | resultContainer: rslt 162 | }), "" === rslt.innerHTML) throw errorString = "\\n " + String(console.warnHistory.slice(-1)), new Error(errorString); 163 | return rslt.firstChild.setAttribute("xmlns", "http://www.w3.org/2000/svg"), rslt.innerHTML 164 | } 165 | 166 | """) 167 | Path("package/pintora.js").write_text(jsmin("\n".join(file)),encoding="UTF-8") 168 | 169 | 170 | 171 | Render=qj.eval("PintoraRender") 172 | 173 | print(Render(TS2)) 174 | 175 | print(qj.eval("")) 176 | 177 | 178 | # print(Render(TS3)) 179 | 180 | # print(qj.eval("pintoraStandalone.renderTo(randStr,{container:rslt,config:null})")) 181 | 182 | # #look at logs and errors: 183 | # print(qj.eval("console.logHistory.join()")) 184 | # print(qj.eval("console.warnHistory.join()")) 185 | # print(qj.eval("console.errorHistory.join()")) 186 | 187 | # # get output 188 | # print(qj.eval("rslt.innerHTML")) 189 | # Path("output.svg").write_text(qj.eval("rslt.innerHTML"),encoding="UTF-8") 190 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # How to build 2 | Requirements: 3 | - uv - this now uses uv as the python package manager. 4 | - npm - to get the package 5 | - git - to clone this 6 | 7 | first clone this repo. 8 | 9 | to update pintora get the latest source from: 10 | - https://www.npmjs.com/package/@pintora/target-wintercg 11 | 12 | I use the command like the following to download the package 13 | ``` 14 | npm pack @pintora/target-wintercg@0.1.4 15 | ``` 16 | 17 | get the "runtime.esm.js" 18 | and replace it with the copy in the git repo. 19 | 20 | then run 21 | ``` 22 | uv run build.py 23 | ``` 24 | 25 | This will place an updated copy of pintora.js in the package. -------------------------------------------------------------------------------- /encoding.js: -------------------------------------------------------------------------------- 1 | // This is free and unencumbered software released into the public domain. 2 | // See LICENSE.md for more information. 3 | 4 | /** 5 | * @fileoverview Global |this| required for resolving indexes in node. 6 | * @suppress {globalThis} 7 | */ 8 | (function(global) { 9 | 'use strict'; 10 | 11 | // If we're in node require encoding-indexes and attach it to the global. 12 | if (typeof module !== "undefined" && module.exports && 13 | !global["encoding-indexes"]) { 14 | global["encoding-indexes"] = 15 | require("./encoding-indexes.js")["encoding-indexes"]; 16 | } 17 | 18 | // 19 | // Utilities 20 | // 21 | 22 | /** 23 | * @param {number} a The number to test. 24 | * @param {number} min The minimum value in the range, inclusive. 25 | * @param {number} max The maximum value in the range, inclusive. 26 | * @return {boolean} True if a >= min and a <= max. 27 | */ 28 | function inRange(a, min, max) { 29 | return min <= a && a <= max; 30 | } 31 | 32 | /** 33 | * @param {!Array.<*>} array The array to check. 34 | * @param {*} item The item to look for in the array. 35 | * @return {boolean} True if the item appears in the array. 36 | */ 37 | function includes(array, item) { 38 | return array.indexOf(item) !== -1; 39 | } 40 | 41 | var floor = Math.floor; 42 | 43 | /** 44 | * @param {*} o 45 | * @return {Object} 46 | */ 47 | function ToDictionary(o) { 48 | if (o === undefined) return {}; 49 | if (o === Object(o)) return o; 50 | throw TypeError('Could not convert argument to dictionary'); 51 | } 52 | 53 | /** 54 | * @param {string} string Input string of UTF-16 code units. 55 | * @return {!Array.} Code points. 56 | */ 57 | function stringToCodePoints(string) { 58 | // https://heycam.github.io/webidl/#dfn-obtain-unicode 59 | 60 | // 1. Let S be the DOMString value. 61 | var s = String(string); 62 | 63 | // 2. Let n be the length of S. 64 | var n = s.length; 65 | 66 | // 3. Initialize i to 0. 67 | var i = 0; 68 | 69 | // 4. Initialize U to be an empty sequence of Unicode characters. 70 | var u = []; 71 | 72 | // 5. While i < n: 73 | while (i < n) { 74 | 75 | // 1. Let c be the code unit in S at index i. 76 | var c = s.charCodeAt(i); 77 | 78 | // 2. Depending on the value of c: 79 | 80 | // c < 0xD800 or c > 0xDFFF 81 | if (c < 0xD800 || c > 0xDFFF) { 82 | // Append to U the Unicode character with code point c. 83 | u.push(c); 84 | } 85 | 86 | // 0xDC00 ≤ c ≤ 0xDFFF 87 | else if (0xDC00 <= c && c <= 0xDFFF) { 88 | // Append to U a U+FFFD REPLACEMENT CHARACTER. 89 | u.push(0xFFFD); 90 | } 91 | 92 | // 0xD800 ≤ c ≤ 0xDBFF 93 | else if (0xD800 <= c && c <= 0xDBFF) { 94 | // 1. If i = n−1, then append to U a U+FFFD REPLACEMENT 95 | // CHARACTER. 96 | if (i === n - 1) { 97 | u.push(0xFFFD); 98 | } 99 | // 2. Otherwise, i < n−1: 100 | else { 101 | // 1. Let d be the code unit in S at index i+1. 102 | var d = s.charCodeAt(i + 1); 103 | 104 | // 2. If 0xDC00 ≤ d ≤ 0xDFFF, then: 105 | if (0xDC00 <= d && d <= 0xDFFF) { 106 | // 1. Let a be c & 0x3FF. 107 | var a = c & 0x3FF; 108 | 109 | // 2. Let b be d & 0x3FF. 110 | var b = d & 0x3FF; 111 | 112 | // 3. Append to U the Unicode character with code point 113 | // 2^16+2^10*a+b. 114 | u.push(0x10000 + (a << 10) + b); 115 | 116 | // 4. Set i to i+1. 117 | i += 1; 118 | } 119 | 120 | // 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a 121 | // U+FFFD REPLACEMENT CHARACTER. 122 | else { 123 | u.push(0xFFFD); 124 | } 125 | } 126 | } 127 | 128 | // 3. Set i to i+1. 129 | i += 1; 130 | } 131 | 132 | // 6. Return U. 133 | return u; 134 | } 135 | 136 | /** 137 | * @param {!Array.} code_points Array of code points. 138 | * @return {string} string String of UTF-16 code units. 139 | */ 140 | function codePointsToString(code_points) { 141 | var s = ''; 142 | for (var i = 0; i < code_points.length; ++i) { 143 | var cp = code_points[i]; 144 | if (cp <= 0xFFFF) { 145 | s += String.fromCharCode(cp); 146 | } else { 147 | cp -= 0x10000; 148 | s += String.fromCharCode((cp >> 10) + 0xD800, 149 | (cp & 0x3FF) + 0xDC00); 150 | } 151 | } 152 | return s; 153 | } 154 | 155 | 156 | // 157 | // Implementation of Encoding specification 158 | // https://encoding.spec.whatwg.org/ 159 | // 160 | 161 | // 162 | // 4. Terminology 163 | // 164 | 165 | /** 166 | * An ASCII byte is a byte in the range 0x00 to 0x7F, inclusive. 167 | * @param {number} a The number to test. 168 | * @return {boolean} True if a is in the range 0x00 to 0x7F, inclusive. 169 | */ 170 | function isASCIIByte(a) { 171 | return 0x00 <= a && a <= 0x7F; 172 | } 173 | 174 | /** 175 | * An ASCII code point is a code point in the range U+0000 to 176 | * U+007F, inclusive. 177 | */ 178 | var isASCIICodePoint = isASCIIByte; 179 | 180 | 181 | /** 182 | * End-of-stream is a special token that signifies no more tokens 183 | * are in the stream. 184 | * @const 185 | */ var end_of_stream = -1; 186 | 187 | /** 188 | * A stream represents an ordered sequence of tokens. 189 | * 190 | * @constructor 191 | * @param {!(Array.|Uint8Array)} tokens Array of tokens that provide 192 | * the stream. 193 | */ 194 | function Stream(tokens) { 195 | /** @type {!Array.} */ 196 | this.tokens = [].slice.call(tokens); 197 | // Reversed as push/pop is more efficient than shift/unshift. 198 | this.tokens.reverse(); 199 | } 200 | 201 | Stream.prototype = { 202 | /** 203 | * @return {boolean} True if end-of-stream has been hit. 204 | */ 205 | endOfStream: function() { 206 | return !this.tokens.length; 207 | }, 208 | 209 | /** 210 | * When a token is read from a stream, the first token in the 211 | * stream must be returned and subsequently removed, and 212 | * end-of-stream must be returned otherwise. 213 | * 214 | * @return {number} Get the next token from the stream, or 215 | * end_of_stream. 216 | */ 217 | read: function() { 218 | if (!this.tokens.length) 219 | return end_of_stream; 220 | return this.tokens.pop(); 221 | }, 222 | 223 | /** 224 | * When one or more tokens are prepended to a stream, those tokens 225 | * must be inserted, in given order, before the first token in the 226 | * stream. 227 | * 228 | * @param {(number|!Array.)} token The token(s) to prepend to the 229 | * stream. 230 | */ 231 | prepend: function(token) { 232 | if (Array.isArray(token)) { 233 | var tokens = /**@type {!Array.}*/(token); 234 | while (tokens.length) 235 | this.tokens.push(tokens.pop()); 236 | } else { 237 | this.tokens.push(token); 238 | } 239 | }, 240 | 241 | /** 242 | * When one or more tokens are pushed to a stream, those tokens 243 | * must be inserted, in given order, after the last token in the 244 | * stream. 245 | * 246 | * @param {(number|!Array.)} token The tokens(s) to push to the 247 | * stream. 248 | */ 249 | push: function(token) { 250 | if (Array.isArray(token)) { 251 | var tokens = /**@type {!Array.}*/(token); 252 | while (tokens.length) 253 | this.tokens.unshift(tokens.shift()); 254 | } else { 255 | this.tokens.unshift(token); 256 | } 257 | } 258 | }; 259 | 260 | // 261 | // 5. Encodings 262 | // 263 | 264 | // 5.1 Encoders and decoders 265 | 266 | /** @const */ 267 | var finished = -1; 268 | 269 | /** 270 | * @param {boolean} fatal If true, decoding errors raise an exception. 271 | * @param {number=} opt_code_point Override the standard fallback code point. 272 | * @return {number} The code point to insert on a decoding error. 273 | */ 274 | function decoderError(fatal, opt_code_point) { 275 | if (fatal) 276 | throw TypeError('Decoder error'); 277 | return opt_code_point || 0xFFFD; 278 | } 279 | 280 | /** 281 | * @param {number} code_point The code point that could not be encoded. 282 | * @return {number} Always throws, no value is actually returned. 283 | */ 284 | function encoderError(code_point) { 285 | throw TypeError('The code point ' + code_point + ' could not be encoded.'); 286 | } 287 | 288 | /** @interface */ 289 | function Decoder() {} 290 | Decoder.prototype = { 291 | /** 292 | * @param {Stream} stream The stream of bytes being decoded. 293 | * @param {number} bite The next byte read from the stream. 294 | * @return {?(number|!Array.)} The next code point(s) 295 | * decoded, or null if not enough data exists in the input 296 | * stream to decode a complete code point, or |finished|. 297 | */ 298 | handler: function(stream, bite) {} 299 | }; 300 | 301 | /** @interface */ 302 | function Encoder() {} 303 | Encoder.prototype = { 304 | /** 305 | * @param {Stream} stream The stream of code points being encoded. 306 | * @param {number} code_point Next code point read from the stream. 307 | * @return {(number|!Array.)} Byte(s) to emit, or |finished|. 308 | */ 309 | handler: function(stream, code_point) {} 310 | }; 311 | 312 | // 5.2 Names and labels 313 | 314 | // TODO: Define @typedef for Encoding: {name:string,labels:Array.} 315 | // https://github.com/google/closure-compiler/issues/247 316 | 317 | /** 318 | * @param {string} label The encoding label. 319 | * @return {?{name:string,labels:Array.}} 320 | */ 321 | function getEncoding(label) { 322 | // 1. Remove any leading and trailing ASCII whitespace from label. 323 | label = String(label).trim().toLowerCase(); 324 | 325 | // 2. If label is an ASCII case-insensitive match for any of the 326 | // labels listed in the table below, return the corresponding 327 | // encoding, and failure otherwise. 328 | if (Object.prototype.hasOwnProperty.call(label_to_encoding, label)) { 329 | return label_to_encoding[label]; 330 | } 331 | return null; 332 | } 333 | 334 | /** 335 | * Encodings table: https://encoding.spec.whatwg.org/encodings.json 336 | * @const 337 | * @type {!Array.<{ 338 | * heading: string, 339 | * encodings: Array.<{name:string,labels:Array.}> 340 | * }>} 341 | */ 342 | var encodings = [ 343 | { 344 | "encodings": [ 345 | { 346 | "labels": [ 347 | "unicode-1-1-utf-8", 348 | "utf-8", 349 | "utf8" 350 | ], 351 | "name": "UTF-8" 352 | } 353 | ], 354 | "heading": "The Encoding" 355 | }, 356 | { 357 | "encodings": [ 358 | { 359 | "labels": [ 360 | "866", 361 | "cp866", 362 | "csibm866", 363 | "ibm866" 364 | ], 365 | "name": "IBM866" 366 | }, 367 | { 368 | "labels": [ 369 | "csisolatin2", 370 | "iso-8859-2", 371 | "iso-ir-101", 372 | "iso8859-2", 373 | "iso88592", 374 | "iso_8859-2", 375 | "iso_8859-2:1987", 376 | "l2", 377 | "latin2" 378 | ], 379 | "name": "ISO-8859-2" 380 | }, 381 | { 382 | "labels": [ 383 | "csisolatin3", 384 | "iso-8859-3", 385 | "iso-ir-109", 386 | "iso8859-3", 387 | "iso88593", 388 | "iso_8859-3", 389 | "iso_8859-3:1988", 390 | "l3", 391 | "latin3" 392 | ], 393 | "name": "ISO-8859-3" 394 | }, 395 | { 396 | "labels": [ 397 | "csisolatin4", 398 | "iso-8859-4", 399 | "iso-ir-110", 400 | "iso8859-4", 401 | "iso88594", 402 | "iso_8859-4", 403 | "iso_8859-4:1988", 404 | "l4", 405 | "latin4" 406 | ], 407 | "name": "ISO-8859-4" 408 | }, 409 | { 410 | "labels": [ 411 | "csisolatincyrillic", 412 | "cyrillic", 413 | "iso-8859-5", 414 | "iso-ir-144", 415 | "iso8859-5", 416 | "iso88595", 417 | "iso_8859-5", 418 | "iso_8859-5:1988" 419 | ], 420 | "name": "ISO-8859-5" 421 | }, 422 | { 423 | "labels": [ 424 | "arabic", 425 | "asmo-708", 426 | "csiso88596e", 427 | "csiso88596i", 428 | "csisolatinarabic", 429 | "ecma-114", 430 | "iso-8859-6", 431 | "iso-8859-6-e", 432 | "iso-8859-6-i", 433 | "iso-ir-127", 434 | "iso8859-6", 435 | "iso88596", 436 | "iso_8859-6", 437 | "iso_8859-6:1987" 438 | ], 439 | "name": "ISO-8859-6" 440 | }, 441 | { 442 | "labels": [ 443 | "csisolatingreek", 444 | "ecma-118", 445 | "elot_928", 446 | "greek", 447 | "greek8", 448 | "iso-8859-7", 449 | "iso-ir-126", 450 | "iso8859-7", 451 | "iso88597", 452 | "iso_8859-7", 453 | "iso_8859-7:1987", 454 | "sun_eu_greek" 455 | ], 456 | "name": "ISO-8859-7" 457 | }, 458 | { 459 | "labels": [ 460 | "csiso88598e", 461 | "csisolatinhebrew", 462 | "hebrew", 463 | "iso-8859-8", 464 | "iso-8859-8-e", 465 | "iso-ir-138", 466 | "iso8859-8", 467 | "iso88598", 468 | "iso_8859-8", 469 | "iso_8859-8:1988", 470 | "visual" 471 | ], 472 | "name": "ISO-8859-8" 473 | }, 474 | { 475 | "labels": [ 476 | "csiso88598i", 477 | "iso-8859-8-i", 478 | "logical" 479 | ], 480 | "name": "ISO-8859-8-I" 481 | }, 482 | { 483 | "labels": [ 484 | "csisolatin6", 485 | "iso-8859-10", 486 | "iso-ir-157", 487 | "iso8859-10", 488 | "iso885910", 489 | "l6", 490 | "latin6" 491 | ], 492 | "name": "ISO-8859-10" 493 | }, 494 | { 495 | "labels": [ 496 | "iso-8859-13", 497 | "iso8859-13", 498 | "iso885913" 499 | ], 500 | "name": "ISO-8859-13" 501 | }, 502 | { 503 | "labels": [ 504 | "iso-8859-14", 505 | "iso8859-14", 506 | "iso885914" 507 | ], 508 | "name": "ISO-8859-14" 509 | }, 510 | { 511 | "labels": [ 512 | "csisolatin9", 513 | "iso-8859-15", 514 | "iso8859-15", 515 | "iso885915", 516 | "iso_8859-15", 517 | "l9" 518 | ], 519 | "name": "ISO-8859-15" 520 | }, 521 | { 522 | "labels": [ 523 | "iso-8859-16" 524 | ], 525 | "name": "ISO-8859-16" 526 | }, 527 | { 528 | "labels": [ 529 | "cskoi8r", 530 | "koi", 531 | "koi8", 532 | "koi8-r", 533 | "koi8_r" 534 | ], 535 | "name": "KOI8-R" 536 | }, 537 | { 538 | "labels": [ 539 | "koi8-ru", 540 | "koi8-u" 541 | ], 542 | "name": "KOI8-U" 543 | }, 544 | { 545 | "labels": [ 546 | "csmacintosh", 547 | "mac", 548 | "macintosh", 549 | "x-mac-roman" 550 | ], 551 | "name": "macintosh" 552 | }, 553 | { 554 | "labels": [ 555 | "dos-874", 556 | "iso-8859-11", 557 | "iso8859-11", 558 | "iso885911", 559 | "tis-620", 560 | "windows-874" 561 | ], 562 | "name": "windows-874" 563 | }, 564 | { 565 | "labels": [ 566 | "cp1250", 567 | "windows-1250", 568 | "x-cp1250" 569 | ], 570 | "name": "windows-1250" 571 | }, 572 | { 573 | "labels": [ 574 | "cp1251", 575 | "windows-1251", 576 | "x-cp1251" 577 | ], 578 | "name": "windows-1251" 579 | }, 580 | { 581 | "labels": [ 582 | "ansi_x3.4-1968", 583 | "ascii", 584 | "cp1252", 585 | "cp819", 586 | "csisolatin1", 587 | "ibm819", 588 | "iso-8859-1", 589 | "iso-ir-100", 590 | "iso8859-1", 591 | "iso88591", 592 | "iso_8859-1", 593 | "iso_8859-1:1987", 594 | "l1", 595 | "latin1", 596 | "us-ascii", 597 | "windows-1252", 598 | "x-cp1252" 599 | ], 600 | "name": "windows-1252" 601 | }, 602 | { 603 | "labels": [ 604 | "cp1253", 605 | "windows-1253", 606 | "x-cp1253" 607 | ], 608 | "name": "windows-1253" 609 | }, 610 | { 611 | "labels": [ 612 | "cp1254", 613 | "csisolatin5", 614 | "iso-8859-9", 615 | "iso-ir-148", 616 | "iso8859-9", 617 | "iso88599", 618 | "iso_8859-9", 619 | "iso_8859-9:1989", 620 | "l5", 621 | "latin5", 622 | "windows-1254", 623 | "x-cp1254" 624 | ], 625 | "name": "windows-1254" 626 | }, 627 | { 628 | "labels": [ 629 | "cp1255", 630 | "windows-1255", 631 | "x-cp1255" 632 | ], 633 | "name": "windows-1255" 634 | }, 635 | { 636 | "labels": [ 637 | "cp1256", 638 | "windows-1256", 639 | "x-cp1256" 640 | ], 641 | "name": "windows-1256" 642 | }, 643 | { 644 | "labels": [ 645 | "cp1257", 646 | "windows-1257", 647 | "x-cp1257" 648 | ], 649 | "name": "windows-1257" 650 | }, 651 | { 652 | "labels": [ 653 | "cp1258", 654 | "windows-1258", 655 | "x-cp1258" 656 | ], 657 | "name": "windows-1258" 658 | }, 659 | { 660 | "labels": [ 661 | "x-mac-cyrillic", 662 | "x-mac-ukrainian" 663 | ], 664 | "name": "x-mac-cyrillic" 665 | } 666 | ], 667 | "heading": "Legacy single-byte encodings" 668 | }, 669 | { 670 | "encodings": [ 671 | { 672 | "labels": [ 673 | "chinese", 674 | "csgb2312", 675 | "csiso58gb231280", 676 | "gb2312", 677 | "gb_2312", 678 | "gb_2312-80", 679 | "gbk", 680 | "iso-ir-58", 681 | "x-gbk" 682 | ], 683 | "name": "GBK" 684 | }, 685 | { 686 | "labels": [ 687 | "gb18030" 688 | ], 689 | "name": "gb18030" 690 | } 691 | ], 692 | "heading": "Legacy multi-byte Chinese (simplified) encodings" 693 | }, 694 | { 695 | "encodings": [ 696 | { 697 | "labels": [ 698 | "big5", 699 | "big5-hkscs", 700 | "cn-big5", 701 | "csbig5", 702 | "x-x-big5" 703 | ], 704 | "name": "Big5" 705 | } 706 | ], 707 | "heading": "Legacy multi-byte Chinese (traditional) encodings" 708 | }, 709 | { 710 | "encodings": [ 711 | { 712 | "labels": [ 713 | "cseucpkdfmtjapanese", 714 | "euc-jp", 715 | "x-euc-jp" 716 | ], 717 | "name": "EUC-JP" 718 | }, 719 | { 720 | "labels": [ 721 | "csiso2022jp", 722 | "iso-2022-jp" 723 | ], 724 | "name": "ISO-2022-JP" 725 | }, 726 | { 727 | "labels": [ 728 | "csshiftjis", 729 | "ms932", 730 | "ms_kanji", 731 | "shift-jis", 732 | "shift_jis", 733 | "sjis", 734 | "windows-31j", 735 | "x-sjis" 736 | ], 737 | "name": "Shift_JIS" 738 | } 739 | ], 740 | "heading": "Legacy multi-byte Japanese encodings" 741 | }, 742 | { 743 | "encodings": [ 744 | { 745 | "labels": [ 746 | "cseuckr", 747 | "csksc56011987", 748 | "euc-kr", 749 | "iso-ir-149", 750 | "korean", 751 | "ks_c_5601-1987", 752 | "ks_c_5601-1989", 753 | "ksc5601", 754 | "ksc_5601", 755 | "windows-949" 756 | ], 757 | "name": "EUC-KR" 758 | } 759 | ], 760 | "heading": "Legacy multi-byte Korean encodings" 761 | }, 762 | { 763 | "encodings": [ 764 | { 765 | "labels": [ 766 | "csiso2022kr", 767 | "hz-gb-2312", 768 | "iso-2022-cn", 769 | "iso-2022-cn-ext", 770 | "iso-2022-kr" 771 | ], 772 | "name": "replacement" 773 | }, 774 | { 775 | "labels": [ 776 | "utf-16be" 777 | ], 778 | "name": "UTF-16BE" 779 | }, 780 | { 781 | "labels": [ 782 | "utf-16", 783 | "utf-16le" 784 | ], 785 | "name": "UTF-16LE" 786 | }, 787 | { 788 | "labels": [ 789 | "x-user-defined" 790 | ], 791 | "name": "x-user-defined" 792 | } 793 | ], 794 | "heading": "Legacy miscellaneous encodings" 795 | } 796 | ]; 797 | 798 | // Label to encoding registry. 799 | /** @type {Object.}>} */ 800 | var label_to_encoding = {}; 801 | encodings.forEach(function(category) { 802 | category.encodings.forEach(function(encoding) { 803 | encoding.labels.forEach(function(label) { 804 | label_to_encoding[label] = encoding; 805 | }); 806 | }); 807 | }); 808 | 809 | // Registry of of encoder/decoder factories, by encoding name. 810 | /** @type {Object.} */ 811 | var encoders = {}; 812 | /** @type {Object.} */ 813 | var decoders = {}; 814 | 815 | // 816 | // 6. Indexes 817 | // 818 | 819 | /** 820 | * @param {number} pointer The |pointer| to search for. 821 | * @param {(!Array.|undefined)} index The |index| to search within. 822 | * @return {?number} The code point corresponding to |pointer| in |index|, 823 | * or null if |code point| is not in |index|. 824 | */ 825 | function indexCodePointFor(pointer, index) { 826 | if (!index) return null; 827 | return index[pointer] || null; 828 | } 829 | 830 | /** 831 | * @param {number} code_point The |code point| to search for. 832 | * @param {!Array.} index The |index| to search within. 833 | * @return {?number} The first pointer corresponding to |code point| in 834 | * |index|, or null if |code point| is not in |index|. 835 | */ 836 | function indexPointerFor(code_point, index) { 837 | var pointer = index.indexOf(code_point); 838 | return pointer === -1 ? null : pointer; 839 | } 840 | 841 | /** 842 | * @param {string} name Name of the index. 843 | * @return {(!Array.|!Array.>)} 844 | * */ 845 | function index(name) { 846 | if (!('encoding-indexes' in global)) { 847 | throw Error("Indexes missing." + 848 | " Did you forget to include encoding-indexes.js first?"); 849 | } 850 | return global['encoding-indexes'][name]; 851 | } 852 | 853 | /** 854 | * @param {number} pointer The |pointer| to search for in the gb18030 index. 855 | * @return {?number} The code point corresponding to |pointer| in |index|, 856 | * or null if |code point| is not in the gb18030 index. 857 | */ 858 | function indexGB18030RangesCodePointFor(pointer) { 859 | // 1. If pointer is greater than 39419 and less than 189000, or 860 | // pointer is greater than 1237575, return null. 861 | if ((pointer > 39419 && pointer < 189000) || (pointer > 1237575)) 862 | return null; 863 | 864 | // 2. If pointer is 7457, return code point U+E7C7. 865 | if (pointer === 7457) return 0xE7C7; 866 | 867 | // 3. Let offset be the last pointer in index gb18030 ranges that 868 | // is equal to or less than pointer and let code point offset be 869 | // its corresponding code point. 870 | var offset = 0; 871 | var code_point_offset = 0; 872 | var idx = index('gb18030-ranges'); 873 | var i; 874 | for (i = 0; i < idx.length; ++i) { 875 | /** @type {!Array.} */ 876 | var entry = idx[i]; 877 | if (entry[0] <= pointer) { 878 | offset = entry[0]; 879 | code_point_offset = entry[1]; 880 | } else { 881 | break; 882 | } 883 | } 884 | 885 | // 4. Return a code point whose value is code point offset + 886 | // pointer − offset. 887 | return code_point_offset + pointer - offset; 888 | } 889 | 890 | /** 891 | * @param {number} code_point The |code point| to locate in the gb18030 index. 892 | * @return {number} The first pointer corresponding to |code point| in the 893 | * gb18030 index. 894 | */ 895 | function indexGB18030RangesPointerFor(code_point) { 896 | // 1. If code point is U+E7C7, return pointer 7457. 897 | if (code_point === 0xE7C7) return 7457; 898 | 899 | // 2. Let offset be the last code point in index gb18030 ranges 900 | // that is equal to or less than code point and let pointer offset 901 | // be its corresponding pointer. 902 | var offset = 0; 903 | var pointer_offset = 0; 904 | var idx = index('gb18030-ranges'); 905 | var i; 906 | for (i = 0; i < idx.length; ++i) { 907 | /** @type {!Array.} */ 908 | var entry = idx[i]; 909 | if (entry[1] <= code_point) { 910 | offset = entry[1]; 911 | pointer_offset = entry[0]; 912 | } else { 913 | break; 914 | } 915 | } 916 | 917 | // 3. Return a pointer whose value is pointer offset + code point 918 | // − offset. 919 | return pointer_offset + code_point - offset; 920 | } 921 | 922 | /** 923 | * @param {number} code_point The |code_point| to search for in the Shift_JIS 924 | * index. 925 | * @return {?number} The code point corresponding to |pointer| in |index|, 926 | * or null if |code point| is not in the Shift_JIS index. 927 | */ 928 | function indexShiftJISPointerFor(code_point) { 929 | // 1. Let index be index jis0208 excluding all entries whose 930 | // pointer is in the range 8272 to 8835, inclusive. 931 | shift_jis_index = shift_jis_index || 932 | index('jis0208').map(function(code_point, pointer) { 933 | return inRange(pointer, 8272, 8835) ? null : code_point; 934 | }); 935 | var index_ = shift_jis_index; 936 | 937 | // 2. Return the index pointer for code point in index. 938 | return index_.indexOf(code_point); 939 | } 940 | var shift_jis_index; 941 | 942 | /** 943 | * @param {number} code_point The |code_point| to search for in the big5 944 | * index. 945 | * @return {?number} The code point corresponding to |pointer| in |index|, 946 | * or null if |code point| is not in the big5 index. 947 | */ 948 | function indexBig5PointerFor(code_point) { 949 | // 1. Let index be index Big5 excluding all entries whose pointer 950 | big5_index_no_hkscs = big5_index_no_hkscs || 951 | index('big5').map(function(code_point, pointer) { 952 | return (pointer < (0xA1 - 0x81) * 157) ? null : code_point; 953 | }); 954 | var index_ = big5_index_no_hkscs; 955 | 956 | // 2. If code point is U+2550, U+255E, U+2561, U+256A, U+5341, or 957 | // U+5345, return the last pointer corresponding to code point in 958 | // index. 959 | if (code_point === 0x2550 || code_point === 0x255E || 960 | code_point === 0x2561 || code_point === 0x256A || 961 | code_point === 0x5341 || code_point === 0x5345) { 962 | return index_.lastIndexOf(code_point); 963 | } 964 | 965 | // 3. Return the index pointer for code point in index. 966 | return indexPointerFor(code_point, index_); 967 | } 968 | var big5_index_no_hkscs; 969 | 970 | // 971 | // 8. API 972 | // 973 | 974 | /** @const */ var DEFAULT_ENCODING = 'utf-8'; 975 | 976 | // 8.1 Interface TextDecoder 977 | 978 | /** 979 | * @constructor 980 | * @param {string=} label The label of the encoding; 981 | * defaults to 'utf-8'. 982 | * @param {Object=} options 983 | */ 984 | function TextDecoder(label, options) { 985 | // Web IDL conventions 986 | if (!(this instanceof TextDecoder)) 987 | throw TypeError('Called as a function. Did you forget \'new\'?'); 988 | label = label !== undefined ? String(label) : DEFAULT_ENCODING; 989 | options = ToDictionary(options); 990 | 991 | // A TextDecoder object has an associated encoding, decoder, 992 | // stream, ignore BOM flag (initially unset), BOM seen flag 993 | // (initially unset), error mode (initially replacement), and do 994 | // not flush flag (initially unset). 995 | 996 | /** @private */ 997 | this._encoding = null; 998 | /** @private @type {?Decoder} */ 999 | this._decoder = null; 1000 | /** @private @type {boolean} */ 1001 | this._ignoreBOM = false; 1002 | /** @private @type {boolean} */ 1003 | this._BOMseen = false; 1004 | /** @private @type {string} */ 1005 | this._error_mode = 'replacement'; 1006 | /** @private @type {boolean} */ 1007 | this._do_not_flush = false; 1008 | 1009 | 1010 | // 1. Let encoding be the result of getting an encoding from 1011 | // label. 1012 | var encoding = getEncoding(label); 1013 | 1014 | // 2. If encoding is failure or replacement, throw a RangeError. 1015 | if (encoding === null || encoding.name === 'replacement') 1016 | throw RangeError('Unknown encoding: ' + label); 1017 | if (!decoders[encoding.name]) { 1018 | throw Error('Decoder not present.' + 1019 | ' Did you forget to include encoding-indexes.js first?'); 1020 | } 1021 | 1022 | // 3. Let dec be a new TextDecoder object. 1023 | var dec = this; 1024 | 1025 | // 4. Set dec's encoding to encoding. 1026 | dec._encoding = encoding; 1027 | 1028 | // 5. If options's fatal member is true, set dec's error mode to 1029 | // fatal. 1030 | if (Boolean(options['fatal'])) 1031 | dec._error_mode = 'fatal'; 1032 | 1033 | // 6. If options's ignoreBOM member is true, set dec's ignore BOM 1034 | // flag. 1035 | if (Boolean(options['ignoreBOM'])) 1036 | dec._ignoreBOM = true; 1037 | 1038 | // For pre-ES5 runtimes: 1039 | if (!Object.defineProperty) { 1040 | this.encoding = dec._encoding.name.toLowerCase(); 1041 | this.fatal = dec._error_mode === 'fatal'; 1042 | this.ignoreBOM = dec._ignoreBOM; 1043 | } 1044 | 1045 | // 7. Return dec. 1046 | return dec; 1047 | } 1048 | 1049 | if (Object.defineProperty) { 1050 | // The encoding attribute's getter must return encoding's name. 1051 | Object.defineProperty(TextDecoder.prototype, 'encoding', { 1052 | /** @this {TextDecoder} */ 1053 | get: function() { return this._encoding.name.toLowerCase(); } 1054 | }); 1055 | 1056 | // The fatal attribute's getter must return true if error mode 1057 | // is fatal, and false otherwise. 1058 | Object.defineProperty(TextDecoder.prototype, 'fatal', { 1059 | /** @this {TextDecoder} */ 1060 | get: function() { return this._error_mode === 'fatal'; } 1061 | }); 1062 | 1063 | // The ignoreBOM attribute's getter must return true if ignore 1064 | // BOM flag is set, and false otherwise. 1065 | Object.defineProperty(TextDecoder.prototype, 'ignoreBOM', { 1066 | /** @this {TextDecoder} */ 1067 | get: function() { return this._ignoreBOM; } 1068 | }); 1069 | } 1070 | 1071 | /** 1072 | * @param {BufferSource=} input The buffer of bytes to decode. 1073 | * @param {Object=} options 1074 | * @return {string} The decoded string. 1075 | */ 1076 | TextDecoder.prototype.decode = function decode(input, options) { 1077 | var bytes; 1078 | if (typeof input === 'object' && input instanceof ArrayBuffer) { 1079 | bytes = new Uint8Array(input); 1080 | } else if (typeof input === 'object' && 'buffer' in input && 1081 | input.buffer instanceof ArrayBuffer) { 1082 | bytes = new Uint8Array(input.buffer, 1083 | input.byteOffset, 1084 | input.byteLength); 1085 | } else { 1086 | bytes = new Uint8Array(0); 1087 | } 1088 | 1089 | options = ToDictionary(options); 1090 | 1091 | // 1. If the do not flush flag is unset, set decoder to a new 1092 | // encoding's decoder, set stream to a new stream, and unset the 1093 | // BOM seen flag. 1094 | if (!this._do_not_flush) { 1095 | this._decoder = decoders[this._encoding.name]({ 1096 | fatal: this._error_mode === 'fatal'}); 1097 | this._BOMseen = false; 1098 | } 1099 | 1100 | // 2. If options's stream is true, set the do not flush flag, and 1101 | // unset the do not flush flag otherwise. 1102 | this._do_not_flush = Boolean(options['stream']); 1103 | 1104 | // 3. If input is given, push a copy of input to stream. 1105 | // TODO: Align with spec algorithm - maintain stream on instance. 1106 | var input_stream = new Stream(bytes); 1107 | 1108 | // 4. Let output be a new stream. 1109 | var output = []; 1110 | 1111 | /** @type {?(number|!Array.)} */ 1112 | var result; 1113 | 1114 | // 5. While true: 1115 | while (true) { 1116 | // 1. Let token be the result of reading from stream. 1117 | var token = input_stream.read(); 1118 | 1119 | // 2. If token is end-of-stream and the do not flush flag is 1120 | // set, return output, serialized. 1121 | // TODO: Align with spec algorithm. 1122 | if (token === end_of_stream) 1123 | break; 1124 | 1125 | // 3. Otherwise, run these subsubsteps: 1126 | 1127 | // 1. Let result be the result of processing token for decoder, 1128 | // stream, output, and error mode. 1129 | result = this._decoder.handler(input_stream, token); 1130 | 1131 | // 2. If result is finished, return output, serialized. 1132 | if (result === finished) 1133 | break; 1134 | 1135 | if (result !== null) { 1136 | if (Array.isArray(result)) 1137 | output.push.apply(output, /**@type {!Array.}*/(result)); 1138 | else 1139 | output.push(result); 1140 | } 1141 | 1142 | // 3. Otherwise, if result is error, throw a TypeError. 1143 | // (Thrown in handler) 1144 | 1145 | // 4. Otherwise, do nothing. 1146 | } 1147 | // TODO: Align with spec algorithm. 1148 | if (!this._do_not_flush) { 1149 | do { 1150 | result = this._decoder.handler(input_stream, input_stream.read()); 1151 | if (result === finished) 1152 | break; 1153 | if (result === null) 1154 | continue; 1155 | if (Array.isArray(result)) 1156 | output.push.apply(output, /**@type {!Array.}*/(result)); 1157 | else 1158 | output.push(result); 1159 | } while (!input_stream.endOfStream()); 1160 | this._decoder = null; 1161 | } 1162 | 1163 | // A TextDecoder object also has an associated serialize stream 1164 | // algorithm... 1165 | /** 1166 | * @param {!Array.} stream 1167 | * @return {string} 1168 | * @this {TextDecoder} 1169 | */ 1170 | function serializeStream(stream) { 1171 | // 1. Let token be the result of reading from stream. 1172 | // (Done in-place on array, rather than as a stream) 1173 | 1174 | // 2. If encoding is UTF-8, UTF-16BE, or UTF-16LE, and ignore 1175 | // BOM flag and BOM seen flag are unset, run these subsubsteps: 1176 | if (includes(['UTF-8', 'UTF-16LE', 'UTF-16BE'], this._encoding.name) && 1177 | !this._ignoreBOM && !this._BOMseen) { 1178 | if (stream.length > 0 && stream[0] === 0xFEFF) { 1179 | // 1. If token is U+FEFF, set BOM seen flag. 1180 | this._BOMseen = true; 1181 | stream.shift(); 1182 | } else if (stream.length > 0) { 1183 | // 2. Otherwise, if token is not end-of-stream, set BOM seen 1184 | // flag and append token to stream. 1185 | this._BOMseen = true; 1186 | } else { 1187 | // 3. Otherwise, if token is not end-of-stream, append token 1188 | // to output. 1189 | // (no-op) 1190 | } 1191 | } 1192 | // 4. Otherwise, return output. 1193 | return codePointsToString(stream); 1194 | } 1195 | 1196 | return serializeStream.call(this, output); 1197 | }; 1198 | 1199 | // 8.2 Interface TextEncoder 1200 | 1201 | /** 1202 | * @constructor 1203 | * @param {string=} label The label of the encoding. NONSTANDARD. 1204 | * @param {Object=} options NONSTANDARD. 1205 | */ 1206 | function TextEncoder(label, options) { 1207 | // Web IDL conventions 1208 | if (!(this instanceof TextEncoder)) 1209 | throw TypeError('Called as a function. Did you forget \'new\'?'); 1210 | options = ToDictionary(options); 1211 | 1212 | // A TextEncoder object has an associated encoding and encoder. 1213 | 1214 | /** @private */ 1215 | this._encoding = null; 1216 | /** @private @type {?Encoder} */ 1217 | this._encoder = null; 1218 | 1219 | // Non-standard 1220 | /** @private @type {boolean} */ 1221 | this._do_not_flush = false; 1222 | /** @private @type {string} */ 1223 | this._fatal = Boolean(options['fatal']) ? 'fatal' : 'replacement'; 1224 | 1225 | // 1. Let enc be a new TextEncoder object. 1226 | var enc = this; 1227 | 1228 | // 2. Set enc's encoding to UTF-8's encoder. 1229 | if (Boolean(options['NONSTANDARD_allowLegacyEncoding'])) { 1230 | // NONSTANDARD behavior. 1231 | label = label !== undefined ? String(label) : DEFAULT_ENCODING; 1232 | var encoding = getEncoding(label); 1233 | if (encoding === null || encoding.name === 'replacement') 1234 | throw RangeError('Unknown encoding: ' + label); 1235 | if (!encoders[encoding.name]) { 1236 | throw Error('Encoder not present.' + 1237 | ' Did you forget to include encoding-indexes.js first?'); 1238 | } 1239 | enc._encoding = encoding; 1240 | } else { 1241 | // Standard behavior. 1242 | enc._encoding = getEncoding('utf-8'); 1243 | 1244 | if (label !== undefined && 'console' in global) { 1245 | console.warn('TextEncoder constructor called with encoding label, ' 1246 | + 'which is ignored.'); 1247 | } 1248 | } 1249 | 1250 | // For pre-ES5 runtimes: 1251 | if (!Object.defineProperty) 1252 | this.encoding = enc._encoding.name.toLowerCase(); 1253 | 1254 | // 3. Return enc. 1255 | return enc; 1256 | } 1257 | 1258 | if (Object.defineProperty) { 1259 | // The encoding attribute's getter must return encoding's name. 1260 | Object.defineProperty(TextEncoder.prototype, 'encoding', { 1261 | /** @this {TextEncoder} */ 1262 | get: function() { return this._encoding.name.toLowerCase(); } 1263 | }); 1264 | } 1265 | 1266 | /** 1267 | * @param {string=} opt_string The string to encode. 1268 | * @param {Object=} options 1269 | * @return {!Uint8Array} Encoded bytes, as a Uint8Array. 1270 | */ 1271 | TextEncoder.prototype.encode = function encode(opt_string, options) { 1272 | opt_string = opt_string === undefined ? '' : String(opt_string); 1273 | options = ToDictionary(options); 1274 | 1275 | // NOTE: This option is nonstandard. None of the encodings 1276 | // permitted for encoding (i.e. UTF-8, UTF-16) are stateful when 1277 | // the input is a USVString so streaming is not necessary. 1278 | if (!this._do_not_flush) 1279 | this._encoder = encoders[this._encoding.name]({ 1280 | fatal: this._fatal === 'fatal'}); 1281 | this._do_not_flush = Boolean(options['stream']); 1282 | 1283 | // 1. Convert input to a stream. 1284 | var input = new Stream(stringToCodePoints(opt_string)); 1285 | 1286 | // 2. Let output be a new stream 1287 | var output = []; 1288 | 1289 | /** @type {?(number|!Array.)} */ 1290 | var result; 1291 | // 3. While true, run these substeps: 1292 | while (true) { 1293 | // 1. Let token be the result of reading from input. 1294 | var token = input.read(); 1295 | if (token === end_of_stream) 1296 | break; 1297 | // 2. Let result be the result of processing token for encoder, 1298 | // input, output. 1299 | result = this._encoder.handler(input, token); 1300 | if (result === finished) 1301 | break; 1302 | if (Array.isArray(result)) 1303 | output.push.apply(output, /**@type {!Array.}*/(result)); 1304 | else 1305 | output.push(result); 1306 | } 1307 | // TODO: Align with spec algorithm. 1308 | if (!this._do_not_flush) { 1309 | while (true) { 1310 | result = this._encoder.handler(input, input.read()); 1311 | if (result === finished) 1312 | break; 1313 | if (Array.isArray(result)) 1314 | output.push.apply(output, /**@type {!Array.}*/(result)); 1315 | else 1316 | output.push(result); 1317 | } 1318 | this._encoder = null; 1319 | } 1320 | // 3. If result is finished, convert output into a byte sequence, 1321 | // and then return a Uint8Array object wrapping an ArrayBuffer 1322 | // containing output. 1323 | return new Uint8Array(output); 1324 | }; 1325 | 1326 | 1327 | // 1328 | // 9. The encoding 1329 | // 1330 | 1331 | // 9.1 utf-8 1332 | 1333 | // 9.1.1 utf-8 decoder 1334 | /** 1335 | * @constructor 1336 | * @implements {Decoder} 1337 | * @param {{fatal: boolean}} options 1338 | */ 1339 | function UTF8Decoder(options) { 1340 | var fatal = options.fatal; 1341 | 1342 | // utf-8's decoder's has an associated utf-8 code point, utf-8 1343 | // bytes seen, and utf-8 bytes needed (all initially 0), a utf-8 1344 | // lower boundary (initially 0x80), and a utf-8 upper boundary 1345 | // (initially 0xBF). 1346 | var /** @type {number} */ utf8_code_point = 0, 1347 | /** @type {number} */ utf8_bytes_seen = 0, 1348 | /** @type {number} */ utf8_bytes_needed = 0, 1349 | /** @type {number} */ utf8_lower_boundary = 0x80, 1350 | /** @type {number} */ utf8_upper_boundary = 0xBF; 1351 | 1352 | /** 1353 | * @param {Stream} stream The stream of bytes being decoded. 1354 | * @param {number} bite The next byte read from the stream. 1355 | * @return {?(number|!Array.)} The next code point(s) 1356 | * decoded, or null if not enough data exists in the input 1357 | * stream to decode a complete code point. 1358 | */ 1359 | this.handler = function(stream, bite) { 1360 | // 1. If byte is end-of-stream and utf-8 bytes needed is not 0, 1361 | // set utf-8 bytes needed to 0 and return error. 1362 | if (bite === end_of_stream && utf8_bytes_needed !== 0) { 1363 | utf8_bytes_needed = 0; 1364 | return decoderError(fatal); 1365 | } 1366 | 1367 | // 2. If byte is end-of-stream, return finished. 1368 | if (bite === end_of_stream) 1369 | return finished; 1370 | 1371 | // 3. If utf-8 bytes needed is 0, based on byte: 1372 | if (utf8_bytes_needed === 0) { 1373 | 1374 | // 0x00 to 0x7F 1375 | if (inRange(bite, 0x00, 0x7F)) { 1376 | // Return a code point whose value is byte. 1377 | return bite; 1378 | } 1379 | 1380 | // 0xC2 to 0xDF 1381 | else if (inRange(bite, 0xC2, 0xDF)) { 1382 | // 1. Set utf-8 bytes needed to 1. 1383 | utf8_bytes_needed = 1; 1384 | 1385 | // 2. Set UTF-8 code point to byte & 0x1F. 1386 | utf8_code_point = bite & 0x1F; 1387 | } 1388 | 1389 | // 0xE0 to 0xEF 1390 | else if (inRange(bite, 0xE0, 0xEF)) { 1391 | // 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0. 1392 | if (bite === 0xE0) 1393 | utf8_lower_boundary = 0xA0; 1394 | // 2. If byte is 0xED, set utf-8 upper boundary to 0x9F. 1395 | if (bite === 0xED) 1396 | utf8_upper_boundary = 0x9F; 1397 | // 3. Set utf-8 bytes needed to 2. 1398 | utf8_bytes_needed = 2; 1399 | // 4. Set UTF-8 code point to byte & 0xF. 1400 | utf8_code_point = bite & 0xF; 1401 | } 1402 | 1403 | // 0xF0 to 0xF4 1404 | else if (inRange(bite, 0xF0, 0xF4)) { 1405 | // 1. If byte is 0xF0, set utf-8 lower boundary to 0x90. 1406 | if (bite === 0xF0) 1407 | utf8_lower_boundary = 0x90; 1408 | // 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F. 1409 | if (bite === 0xF4) 1410 | utf8_upper_boundary = 0x8F; 1411 | // 3. Set utf-8 bytes needed to 3. 1412 | utf8_bytes_needed = 3; 1413 | // 4. Set UTF-8 code point to byte & 0x7. 1414 | utf8_code_point = bite & 0x7; 1415 | } 1416 | 1417 | // Otherwise 1418 | else { 1419 | // Return error. 1420 | return decoderError(fatal); 1421 | } 1422 | 1423 | // Return continue. 1424 | return null; 1425 | } 1426 | 1427 | // 4. If byte is not in the range utf-8 lower boundary to utf-8 1428 | // upper boundary, inclusive, run these substeps: 1429 | if (!inRange(bite, utf8_lower_boundary, utf8_upper_boundary)) { 1430 | 1431 | // 1. Set utf-8 code point, utf-8 bytes needed, and utf-8 1432 | // bytes seen to 0, set utf-8 lower boundary to 0x80, and set 1433 | // utf-8 upper boundary to 0xBF. 1434 | utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0; 1435 | utf8_lower_boundary = 0x80; 1436 | utf8_upper_boundary = 0xBF; 1437 | 1438 | // 2. Prepend byte to stream. 1439 | stream.prepend(bite); 1440 | 1441 | // 3. Return error. 1442 | return decoderError(fatal); 1443 | } 1444 | 1445 | // 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary 1446 | // to 0xBF. 1447 | utf8_lower_boundary = 0x80; 1448 | utf8_upper_boundary = 0xBF; 1449 | 1450 | // 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte & 1451 | // 0x3F) 1452 | utf8_code_point = (utf8_code_point << 6) | (bite & 0x3F); 1453 | 1454 | // 7. Increase utf-8 bytes seen by one. 1455 | utf8_bytes_seen += 1; 1456 | 1457 | // 8. If utf-8 bytes seen is not equal to utf-8 bytes needed, 1458 | // continue. 1459 | if (utf8_bytes_seen !== utf8_bytes_needed) 1460 | return null; 1461 | 1462 | // 9. Let code point be utf-8 code point. 1463 | var code_point = utf8_code_point; 1464 | 1465 | // 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes 1466 | // seen to 0. 1467 | utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0; 1468 | 1469 | // 11. Return a code point whose value is code point. 1470 | return code_point; 1471 | }; 1472 | } 1473 | 1474 | // 9.1.2 utf-8 encoder 1475 | /** 1476 | * @constructor 1477 | * @implements {Encoder} 1478 | * @param {{fatal: boolean}} options 1479 | */ 1480 | function UTF8Encoder(options) { 1481 | var fatal = options.fatal; 1482 | /** 1483 | * @param {Stream} stream Input stream. 1484 | * @param {number} code_point Next code point read from the stream. 1485 | * @return {(number|!Array.)} Byte(s) to emit. 1486 | */ 1487 | this.handler = function(stream, code_point) { 1488 | // 1. If code point is end-of-stream, return finished. 1489 | if (code_point === end_of_stream) 1490 | return finished; 1491 | 1492 | // 2. If code point is an ASCII code point, return a byte whose 1493 | // value is code point. 1494 | if (isASCIICodePoint(code_point)) 1495 | return code_point; 1496 | 1497 | // 3. Set count and offset based on the range code point is in: 1498 | var count, offset; 1499 | // U+0080 to U+07FF, inclusive: 1500 | if (inRange(code_point, 0x0080, 0x07FF)) { 1501 | // 1 and 0xC0 1502 | count = 1; 1503 | offset = 0xC0; 1504 | } 1505 | // U+0800 to U+FFFF, inclusive: 1506 | else if (inRange(code_point, 0x0800, 0xFFFF)) { 1507 | // 2 and 0xE0 1508 | count = 2; 1509 | offset = 0xE0; 1510 | } 1511 | // U+10000 to U+10FFFF, inclusive: 1512 | else if (inRange(code_point, 0x10000, 0x10FFFF)) { 1513 | // 3 and 0xF0 1514 | count = 3; 1515 | offset = 0xF0; 1516 | } 1517 | 1518 | // 4. Let bytes be a byte sequence whose first byte is (code 1519 | // point >> (6 × count)) + offset. 1520 | var bytes = [(code_point >> (6 * count)) + offset]; 1521 | 1522 | // 5. Run these substeps while count is greater than 0: 1523 | while (count > 0) { 1524 | 1525 | // 1. Set temp to code point >> (6 × (count − 1)). 1526 | var temp = code_point >> (6 * (count - 1)); 1527 | 1528 | // 2. Append to bytes 0x80 | (temp & 0x3F). 1529 | bytes.push(0x80 | (temp & 0x3F)); 1530 | 1531 | // 3. Decrease count by one. 1532 | count -= 1; 1533 | } 1534 | 1535 | // 6. Return bytes bytes, in order. 1536 | return bytes; 1537 | }; 1538 | } 1539 | 1540 | /** @param {{fatal: boolean}} options */ 1541 | encoders['UTF-8'] = function(options) { 1542 | return new UTF8Encoder(options); 1543 | }; 1544 | /** @param {{fatal: boolean}} options */ 1545 | decoders['UTF-8'] = function(options) { 1546 | return new UTF8Decoder(options); 1547 | }; 1548 | 1549 | // 1550 | // 10. Legacy single-byte encodings 1551 | // 1552 | 1553 | // 10.1 single-byte decoder 1554 | /** 1555 | * @constructor 1556 | * @implements {Decoder} 1557 | * @param {!Array.} index The encoding index. 1558 | * @param {{fatal: boolean}} options 1559 | */ 1560 | function SingleByteDecoder(index, options) { 1561 | var fatal = options.fatal; 1562 | /** 1563 | * @param {Stream} stream The stream of bytes being decoded. 1564 | * @param {number} bite The next byte read from the stream. 1565 | * @return {?(number|!Array.)} The next code point(s) 1566 | * decoded, or null if not enough data exists in the input 1567 | * stream to decode a complete code point. 1568 | */ 1569 | this.handler = function(stream, bite) { 1570 | // 1. If byte is end-of-stream, return finished. 1571 | if (bite === end_of_stream) 1572 | return finished; 1573 | 1574 | // 2. If byte is an ASCII byte, return a code point whose value 1575 | // is byte. 1576 | if (isASCIIByte(bite)) 1577 | return bite; 1578 | 1579 | // 3. Let code point be the index code point for byte − 0x80 in 1580 | // index single-byte. 1581 | var code_point = index[bite - 0x80]; 1582 | 1583 | // 4. If code point is null, return error. 1584 | if (code_point === null) 1585 | return decoderError(fatal); 1586 | 1587 | // 5. Return a code point whose value is code point. 1588 | return code_point; 1589 | }; 1590 | } 1591 | 1592 | // 10.2 single-byte encoder 1593 | /** 1594 | * @constructor 1595 | * @implements {Encoder} 1596 | * @param {!Array.} index The encoding index. 1597 | * @param {{fatal: boolean}} options 1598 | */ 1599 | function SingleByteEncoder(index, options) { 1600 | var fatal = options.fatal; 1601 | /** 1602 | * @param {Stream} stream Input stream. 1603 | * @param {number} code_point Next code point read from the stream. 1604 | * @return {(number|!Array.)} Byte(s) to emit. 1605 | */ 1606 | this.handler = function(stream, code_point) { 1607 | // 1. If code point is end-of-stream, return finished. 1608 | if (code_point === end_of_stream) 1609 | return finished; 1610 | 1611 | // 2. If code point is an ASCII code point, return a byte whose 1612 | // value is code point. 1613 | if (isASCIICodePoint(code_point)) 1614 | return code_point; 1615 | 1616 | // 3. Let pointer be the index pointer for code point in index 1617 | // single-byte. 1618 | var pointer = indexPointerFor(code_point, index); 1619 | 1620 | // 4. If pointer is null, return error with code point. 1621 | if (pointer === null) 1622 | encoderError(code_point); 1623 | 1624 | // 5. Return a byte whose value is pointer + 0x80. 1625 | return pointer + 0x80; 1626 | }; 1627 | } 1628 | 1629 | (function() { 1630 | if (!('encoding-indexes' in global)) 1631 | return; 1632 | encodings.forEach(function(category) { 1633 | if (category.heading !== 'Legacy single-byte encodings') 1634 | return; 1635 | category.encodings.forEach(function(encoding) { 1636 | var name = encoding.name; 1637 | var idx = index(name.toLowerCase()); 1638 | /** @param {{fatal: boolean}} options */ 1639 | decoders[name] = function(options) { 1640 | return new SingleByteDecoder(idx, options); 1641 | }; 1642 | /** @param {{fatal: boolean}} options */ 1643 | encoders[name] = function(options) { 1644 | return new SingleByteEncoder(idx, options); 1645 | }; 1646 | }); 1647 | }); 1648 | }()); 1649 | 1650 | // 1651 | // 11. Legacy multi-byte Chinese (simplified) encodings 1652 | // 1653 | 1654 | // 11.1 gbk 1655 | 1656 | // 11.1.1 gbk decoder 1657 | // gbk's decoder is gb18030's decoder. 1658 | /** @param {{fatal: boolean}} options */ 1659 | decoders['GBK'] = function(options) { 1660 | return new GB18030Decoder(options); 1661 | }; 1662 | 1663 | // 11.1.2 gbk encoder 1664 | // gbk's encoder is gb18030's encoder with its gbk flag set. 1665 | /** @param {{fatal: boolean}} options */ 1666 | encoders['GBK'] = function(options) { 1667 | return new GB18030Encoder(options, true); 1668 | }; 1669 | 1670 | // 11.2 gb18030 1671 | 1672 | // 11.2.1 gb18030 decoder 1673 | /** 1674 | * @constructor 1675 | * @implements {Decoder} 1676 | * @param {{fatal: boolean}} options 1677 | */ 1678 | function GB18030Decoder(options) { 1679 | var fatal = options.fatal; 1680 | // gb18030's decoder has an associated gb18030 first, gb18030 1681 | // second, and gb18030 third (all initially 0x00). 1682 | var /** @type {number} */ gb18030_first = 0x00, 1683 | /** @type {number} */ gb18030_second = 0x00, 1684 | /** @type {number} */ gb18030_third = 0x00; 1685 | /** 1686 | * @param {Stream} stream The stream of bytes being decoded. 1687 | * @param {number} bite The next byte read from the stream. 1688 | * @return {?(number|!Array.)} The next code point(s) 1689 | * decoded, or null if not enough data exists in the input 1690 | * stream to decode a complete code point. 1691 | */ 1692 | this.handler = function(stream, bite) { 1693 | // 1. If byte is end-of-stream and gb18030 first, gb18030 1694 | // second, and gb18030 third are 0x00, return finished. 1695 | if (bite === end_of_stream && gb18030_first === 0x00 && 1696 | gb18030_second === 0x00 && gb18030_third === 0x00) { 1697 | return finished; 1698 | } 1699 | // 2. If byte is end-of-stream, and gb18030 first, gb18030 1700 | // second, or gb18030 third is not 0x00, set gb18030 first, 1701 | // gb18030 second, and gb18030 third to 0x00, and return error. 1702 | if (bite === end_of_stream && 1703 | (gb18030_first !== 0x00 || gb18030_second !== 0x00 || 1704 | gb18030_third !== 0x00)) { 1705 | gb18030_first = 0x00; 1706 | gb18030_second = 0x00; 1707 | gb18030_third = 0x00; 1708 | decoderError(fatal); 1709 | } 1710 | var code_point; 1711 | // 3. If gb18030 third is not 0x00, run these substeps: 1712 | if (gb18030_third !== 0x00) { 1713 | // 1. Let code point be null. 1714 | code_point = null; 1715 | // 2. If byte is in the range 0x30 to 0x39, inclusive, set 1716 | // code point to the index gb18030 ranges code point for 1717 | // (((gb18030 first − 0x81) × 10 + gb18030 second − 0x30) × 1718 | // 126 + gb18030 third − 0x81) × 10 + byte − 0x30. 1719 | if (inRange(bite, 0x30, 0x39)) { 1720 | code_point = indexGB18030RangesCodePointFor( 1721 | (((gb18030_first - 0x81) * 10 + gb18030_second - 0x30) * 126 + 1722 | gb18030_third - 0x81) * 10 + bite - 0x30); 1723 | } 1724 | 1725 | // 3. Let buffer be a byte sequence consisting of gb18030 1726 | // second, gb18030 third, and byte, in order. 1727 | var buffer = [gb18030_second, gb18030_third, bite]; 1728 | 1729 | // 4. Set gb18030 first, gb18030 second, and gb18030 third to 1730 | // 0x00. 1731 | gb18030_first = 0x00; 1732 | gb18030_second = 0x00; 1733 | gb18030_third = 0x00; 1734 | 1735 | // 5. If code point is null, prepend buffer to stream and 1736 | // return error. 1737 | if (code_point === null) { 1738 | stream.prepend(buffer); 1739 | return decoderError(fatal); 1740 | } 1741 | 1742 | // 6. Return a code point whose value is code point. 1743 | return code_point; 1744 | } 1745 | 1746 | // 4. If gb18030 second is not 0x00, run these substeps: 1747 | if (gb18030_second !== 0x00) { 1748 | 1749 | // 1. If byte is in the range 0x81 to 0xFE, inclusive, set 1750 | // gb18030 third to byte and return continue. 1751 | if (inRange(bite, 0x81, 0xFE)) { 1752 | gb18030_third = bite; 1753 | return null; 1754 | } 1755 | 1756 | // 2. Prepend gb18030 second followed by byte to stream, set 1757 | // gb18030 first and gb18030 second to 0x00, and return error. 1758 | stream.prepend([gb18030_second, bite]); 1759 | gb18030_first = 0x00; 1760 | gb18030_second = 0x00; 1761 | return decoderError(fatal); 1762 | } 1763 | 1764 | // 5. If gb18030 first is not 0x00, run these substeps: 1765 | if (gb18030_first !== 0x00) { 1766 | 1767 | // 1. If byte is in the range 0x30 to 0x39, inclusive, set 1768 | // gb18030 second to byte and return continue. 1769 | if (inRange(bite, 0x30, 0x39)) { 1770 | gb18030_second = bite; 1771 | return null; 1772 | } 1773 | 1774 | // 2. Let lead be gb18030 first, let pointer be null, and set 1775 | // gb18030 first to 0x00. 1776 | var lead = gb18030_first; 1777 | var pointer = null; 1778 | gb18030_first = 0x00; 1779 | 1780 | // 3. Let offset be 0x40 if byte is less than 0x7F and 0x41 1781 | // otherwise. 1782 | var offset = bite < 0x7F ? 0x40 : 0x41; 1783 | 1784 | // 4. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80 1785 | // to 0xFE, inclusive, set pointer to (lead − 0x81) × 190 + 1786 | // (byte − offset). 1787 | if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFE)) 1788 | pointer = (lead - 0x81) * 190 + (bite - offset); 1789 | 1790 | // 5. Let code point be null if pointer is null and the index 1791 | // code point for pointer in index gb18030 otherwise. 1792 | code_point = pointer === null ? null : 1793 | indexCodePointFor(pointer, index('gb18030')); 1794 | 1795 | // 6. If code point is null and byte is an ASCII byte, prepend 1796 | // byte to stream. 1797 | if (code_point === null && isASCIIByte(bite)) 1798 | stream.prepend(bite); 1799 | 1800 | // 7. If code point is null, return error. 1801 | if (code_point === null) 1802 | return decoderError(fatal); 1803 | 1804 | // 8. Return a code point whose value is code point. 1805 | return code_point; 1806 | } 1807 | 1808 | // 6. If byte is an ASCII byte, return a code point whose value 1809 | // is byte. 1810 | if (isASCIIByte(bite)) 1811 | return bite; 1812 | 1813 | // 7. If byte is 0x80, return code point U+20AC. 1814 | if (bite === 0x80) 1815 | return 0x20AC; 1816 | 1817 | // 8. If byte is in the range 0x81 to 0xFE, inclusive, set 1818 | // gb18030 first to byte and return continue. 1819 | if (inRange(bite, 0x81, 0xFE)) { 1820 | gb18030_first = bite; 1821 | return null; 1822 | } 1823 | 1824 | // 9. Return error. 1825 | return decoderError(fatal); 1826 | }; 1827 | } 1828 | 1829 | // 11.2.2 gb18030 encoder 1830 | /** 1831 | * @constructor 1832 | * @implements {Encoder} 1833 | * @param {{fatal: boolean}} options 1834 | * @param {boolean=} gbk_flag 1835 | */ 1836 | function GB18030Encoder(options, gbk_flag) { 1837 | var fatal = options.fatal; 1838 | // gb18030's decoder has an associated gbk flag (initially unset). 1839 | /** 1840 | * @param {Stream} stream Input stream. 1841 | * @param {number} code_point Next code point read from the stream. 1842 | * @return {(number|!Array.)} Byte(s) to emit. 1843 | */ 1844 | this.handler = function(stream, code_point) { 1845 | // 1. If code point is end-of-stream, return finished. 1846 | if (code_point === end_of_stream) 1847 | return finished; 1848 | 1849 | // 2. If code point is an ASCII code point, return a byte whose 1850 | // value is code point. 1851 | if (isASCIICodePoint(code_point)) 1852 | return code_point; 1853 | 1854 | // 3. If code point is U+E5E5, return error with code point. 1855 | if (code_point === 0xE5E5) 1856 | return encoderError(code_point); 1857 | 1858 | // 4. If the gbk flag is set and code point is U+20AC, return 1859 | // byte 0x80. 1860 | if (gbk_flag && code_point === 0x20AC) 1861 | return 0x80; 1862 | 1863 | // 5. Let pointer be the index pointer for code point in index 1864 | // gb18030. 1865 | var pointer = indexPointerFor(code_point, index('gb18030')); 1866 | 1867 | // 6. If pointer is not null, run these substeps: 1868 | if (pointer !== null) { 1869 | 1870 | // 1. Let lead be floor(pointer / 190) + 0x81. 1871 | var lead = floor(pointer / 190) + 0x81; 1872 | 1873 | // 2. Let trail be pointer % 190. 1874 | var trail = pointer % 190; 1875 | 1876 | // 3. Let offset be 0x40 if trail is less than 0x3F and 0x41 otherwise. 1877 | var offset = trail < 0x3F ? 0x40 : 0x41; 1878 | 1879 | // 4. Return two bytes whose values are lead and trail + offset. 1880 | return [lead, trail + offset]; 1881 | } 1882 | 1883 | // 7. If gbk flag is set, return error with code point. 1884 | if (gbk_flag) 1885 | return encoderError(code_point); 1886 | 1887 | // 8. Set pointer to the index gb18030 ranges pointer for code 1888 | // point. 1889 | pointer = indexGB18030RangesPointerFor(code_point); 1890 | 1891 | // 9. Let byte1 be floor(pointer / 10 / 126 / 10). 1892 | var byte1 = floor(pointer / 10 / 126 / 10); 1893 | 1894 | // 10. Set pointer to pointer − byte1 × 10 × 126 × 10. 1895 | pointer = pointer - byte1 * 10 * 126 * 10; 1896 | 1897 | // 11. Let byte2 be floor(pointer / 10 / 126). 1898 | var byte2 = floor(pointer / 10 / 126); 1899 | 1900 | // 12. Set pointer to pointer − byte2 × 10 × 126. 1901 | pointer = pointer - byte2 * 10 * 126; 1902 | 1903 | // 13. Let byte3 be floor(pointer / 10). 1904 | var byte3 = floor(pointer / 10); 1905 | 1906 | // 14. Let byte4 be pointer − byte3 × 10. 1907 | var byte4 = pointer - byte3 * 10; 1908 | 1909 | // 15. Return four bytes whose values are byte1 + 0x81, byte2 + 1910 | // 0x30, byte3 + 0x81, byte4 + 0x30. 1911 | return [byte1 + 0x81, 1912 | byte2 + 0x30, 1913 | byte3 + 0x81, 1914 | byte4 + 0x30]; 1915 | }; 1916 | } 1917 | 1918 | /** @param {{fatal: boolean}} options */ 1919 | encoders['gb18030'] = function(options) { 1920 | return new GB18030Encoder(options); 1921 | }; 1922 | /** @param {{fatal: boolean}} options */ 1923 | decoders['gb18030'] = function(options) { 1924 | return new GB18030Decoder(options); 1925 | }; 1926 | 1927 | 1928 | // 1929 | // 12. Legacy multi-byte Chinese (traditional) encodings 1930 | // 1931 | 1932 | // 12.1 Big5 1933 | 1934 | // 12.1.1 Big5 decoder 1935 | /** 1936 | * @constructor 1937 | * @implements {Decoder} 1938 | * @param {{fatal: boolean}} options 1939 | */ 1940 | function Big5Decoder(options) { 1941 | var fatal = options.fatal; 1942 | // Big5's decoder has an associated Big5 lead (initially 0x00). 1943 | var /** @type {number} */ Big5_lead = 0x00; 1944 | 1945 | /** 1946 | * @param {Stream} stream The stream of bytes being decoded. 1947 | * @param {number} bite The next byte read from the stream. 1948 | * @return {?(number|!Array.)} The next code point(s) 1949 | * decoded, or null if not enough data exists in the input 1950 | * stream to decode a complete code point. 1951 | */ 1952 | this.handler = function(stream, bite) { 1953 | // 1. If byte is end-of-stream and Big5 lead is not 0x00, set 1954 | // Big5 lead to 0x00 and return error. 1955 | if (bite === end_of_stream && Big5_lead !== 0x00) { 1956 | Big5_lead = 0x00; 1957 | return decoderError(fatal); 1958 | } 1959 | 1960 | // 2. If byte is end-of-stream and Big5 lead is 0x00, return 1961 | // finished. 1962 | if (bite === end_of_stream && Big5_lead === 0x00) 1963 | return finished; 1964 | 1965 | // 3. If Big5 lead is not 0x00, let lead be Big5 lead, let 1966 | // pointer be null, set Big5 lead to 0x00, and then run these 1967 | // substeps: 1968 | if (Big5_lead !== 0x00) { 1969 | var lead = Big5_lead; 1970 | var pointer = null; 1971 | Big5_lead = 0x00; 1972 | 1973 | // 1. Let offset be 0x40 if byte is less than 0x7F and 0x62 1974 | // otherwise. 1975 | var offset = bite < 0x7F ? 0x40 : 0x62; 1976 | 1977 | // 2. If byte is in the range 0x40 to 0x7E, inclusive, or 0xA1 1978 | // to 0xFE, inclusive, set pointer to (lead − 0x81) × 157 + 1979 | // (byte − offset). 1980 | if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0xA1, 0xFE)) 1981 | pointer = (lead - 0x81) * 157 + (bite - offset); 1982 | 1983 | // 3. If there is a row in the table below whose first column 1984 | // is pointer, return the two code points listed in its second 1985 | // column 1986 | // Pointer | Code points 1987 | // --------+-------------- 1988 | // 1133 | U+00CA U+0304 1989 | // 1135 | U+00CA U+030C 1990 | // 1164 | U+00EA U+0304 1991 | // 1166 | U+00EA U+030C 1992 | switch (pointer) { 1993 | case 1133: return [0x00CA, 0x0304]; 1994 | case 1135: return [0x00CA, 0x030C]; 1995 | case 1164: return [0x00EA, 0x0304]; 1996 | case 1166: return [0x00EA, 0x030C]; 1997 | } 1998 | 1999 | // 4. Let code point be null if pointer is null and the index 2000 | // code point for pointer in index Big5 otherwise. 2001 | var code_point = (pointer === null) ? null : 2002 | indexCodePointFor(pointer, index('big5')); 2003 | 2004 | // 5. If code point is null and byte is an ASCII byte, prepend 2005 | // byte to stream. 2006 | if (code_point === null && isASCIIByte(bite)) 2007 | stream.prepend(bite); 2008 | 2009 | // 6. If code point is null, return error. 2010 | if (code_point === null) 2011 | return decoderError(fatal); 2012 | 2013 | // 7. Return a code point whose value is code point. 2014 | return code_point; 2015 | } 2016 | 2017 | // 4. If byte is an ASCII byte, return a code point whose value 2018 | // is byte. 2019 | if (isASCIIByte(bite)) 2020 | return bite; 2021 | 2022 | // 5. If byte is in the range 0x81 to 0xFE, inclusive, set Big5 2023 | // lead to byte and return continue. 2024 | if (inRange(bite, 0x81, 0xFE)) { 2025 | Big5_lead = bite; 2026 | return null; 2027 | } 2028 | 2029 | // 6. Return error. 2030 | return decoderError(fatal); 2031 | }; 2032 | } 2033 | 2034 | // 12.1.2 Big5 encoder 2035 | /** 2036 | * @constructor 2037 | * @implements {Encoder} 2038 | * @param {{fatal: boolean}} options 2039 | */ 2040 | function Big5Encoder(options) { 2041 | var fatal = options.fatal; 2042 | /** 2043 | * @param {Stream} stream Input stream. 2044 | * @param {number} code_point Next code point read from the stream. 2045 | * @return {(number|!Array.)} Byte(s) to emit. 2046 | */ 2047 | this.handler = function(stream, code_point) { 2048 | // 1. If code point is end-of-stream, return finished. 2049 | if (code_point === end_of_stream) 2050 | return finished; 2051 | 2052 | // 2. If code point is an ASCII code point, return a byte whose 2053 | // value is code point. 2054 | if (isASCIICodePoint(code_point)) 2055 | return code_point; 2056 | 2057 | // 3. Let pointer be the index Big5 pointer for code point. 2058 | var pointer = indexBig5PointerFor(code_point); 2059 | 2060 | // 4. If pointer is null, return error with code point. 2061 | if (pointer === null) 2062 | return encoderError(code_point); 2063 | 2064 | // 5. Let lead be floor(pointer / 157) + 0x81. 2065 | var lead = floor(pointer / 157) + 0x81; 2066 | 2067 | // 6. If lead is less than 0xA1, return error with code point. 2068 | if (lead < 0xA1) 2069 | return encoderError(code_point); 2070 | 2071 | // 7. Let trail be pointer % 157. 2072 | var trail = pointer % 157; 2073 | 2074 | // 8. Let offset be 0x40 if trail is less than 0x3F and 0x62 2075 | // otherwise. 2076 | var offset = trail < 0x3F ? 0x40 : 0x62; 2077 | 2078 | // Return two bytes whose values are lead and trail + offset. 2079 | return [lead, trail + offset]; 2080 | }; 2081 | } 2082 | 2083 | /** @param {{fatal: boolean}} options */ 2084 | encoders['Big5'] = function(options) { 2085 | return new Big5Encoder(options); 2086 | }; 2087 | /** @param {{fatal: boolean}} options */ 2088 | decoders['Big5'] = function(options) { 2089 | return new Big5Decoder(options); 2090 | }; 2091 | 2092 | 2093 | // 2094 | // 13. Legacy multi-byte Japanese encodings 2095 | // 2096 | 2097 | // 13.1 euc-jp 2098 | 2099 | // 13.1.1 euc-jp decoder 2100 | /** 2101 | * @constructor 2102 | * @implements {Decoder} 2103 | * @param {{fatal: boolean}} options 2104 | */ 2105 | function EUCJPDecoder(options) { 2106 | var fatal = options.fatal; 2107 | 2108 | // euc-jp's decoder has an associated euc-jp jis0212 flag 2109 | // (initially unset) and euc-jp lead (initially 0x00). 2110 | var /** @type {boolean} */ eucjp_jis0212_flag = false, 2111 | /** @type {number} */ eucjp_lead = 0x00; 2112 | 2113 | /** 2114 | * @param {Stream} stream The stream of bytes being decoded. 2115 | * @param {number} bite The next byte read from the stream. 2116 | * @return {?(number|!Array.)} The next code point(s) 2117 | * decoded, or null if not enough data exists in the input 2118 | * stream to decode a complete code point. 2119 | */ 2120 | this.handler = function(stream, bite) { 2121 | // 1. If byte is end-of-stream and euc-jp lead is not 0x00, set 2122 | // euc-jp lead to 0x00, and return error. 2123 | if (bite === end_of_stream && eucjp_lead !== 0x00) { 2124 | eucjp_lead = 0x00; 2125 | return decoderError(fatal); 2126 | } 2127 | 2128 | // 2. If byte is end-of-stream and euc-jp lead is 0x00, return 2129 | // finished. 2130 | if (bite === end_of_stream && eucjp_lead === 0x00) 2131 | return finished; 2132 | 2133 | // 3. If euc-jp lead is 0x8E and byte is in the range 0xA1 to 2134 | // 0xDF, inclusive, set euc-jp lead to 0x00 and return a code 2135 | // point whose value is 0xFF61 − 0xA1 + byte. 2136 | if (eucjp_lead === 0x8E && inRange(bite, 0xA1, 0xDF)) { 2137 | eucjp_lead = 0x00; 2138 | return 0xFF61 - 0xA1 + bite; 2139 | } 2140 | 2141 | // 4. If euc-jp lead is 0x8F and byte is in the range 0xA1 to 2142 | // 0xFE, inclusive, set the euc-jp jis0212 flag, set euc-jp lead 2143 | // to byte, and return continue. 2144 | if (eucjp_lead === 0x8F && inRange(bite, 0xA1, 0xFE)) { 2145 | eucjp_jis0212_flag = true; 2146 | eucjp_lead = bite; 2147 | return null; 2148 | } 2149 | 2150 | // 5. If euc-jp lead is not 0x00, let lead be euc-jp lead, set 2151 | // euc-jp lead to 0x00, and run these substeps: 2152 | if (eucjp_lead !== 0x00) { 2153 | var lead = eucjp_lead; 2154 | eucjp_lead = 0x00; 2155 | 2156 | // 1. Let code point be null. 2157 | var code_point = null; 2158 | 2159 | // 2. If lead and byte are both in the range 0xA1 to 0xFE, 2160 | // inclusive, set code point to the index code point for (lead 2161 | // − 0xA1) × 94 + byte − 0xA1 in index jis0208 if the euc-jp 2162 | // jis0212 flag is unset and in index jis0212 otherwise. 2163 | if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) { 2164 | code_point = indexCodePointFor( 2165 | (lead - 0xA1) * 94 + (bite - 0xA1), 2166 | index(!eucjp_jis0212_flag ? 'jis0208' : 'jis0212')); 2167 | } 2168 | 2169 | // 3. Unset the euc-jp jis0212 flag. 2170 | eucjp_jis0212_flag = false; 2171 | 2172 | // 4. If byte is not in the range 0xA1 to 0xFE, inclusive, 2173 | // prepend byte to stream. 2174 | if (!inRange(bite, 0xA1, 0xFE)) 2175 | stream.prepend(bite); 2176 | 2177 | // 5. If code point is null, return error. 2178 | if (code_point === null) 2179 | return decoderError(fatal); 2180 | 2181 | // 6. Return a code point whose value is code point. 2182 | return code_point; 2183 | } 2184 | 2185 | // 6. If byte is an ASCII byte, return a code point whose value 2186 | // is byte. 2187 | if (isASCIIByte(bite)) 2188 | return bite; 2189 | 2190 | // 7. If byte is 0x8E, 0x8F, or in the range 0xA1 to 0xFE, 2191 | // inclusive, set euc-jp lead to byte and return continue. 2192 | if (bite === 0x8E || bite === 0x8F || inRange(bite, 0xA1, 0xFE)) { 2193 | eucjp_lead = bite; 2194 | return null; 2195 | } 2196 | 2197 | // 8. Return error. 2198 | return decoderError(fatal); 2199 | }; 2200 | } 2201 | 2202 | // 13.1.2 euc-jp encoder 2203 | /** 2204 | * @constructor 2205 | * @implements {Encoder} 2206 | * @param {{fatal: boolean}} options 2207 | */ 2208 | function EUCJPEncoder(options) { 2209 | var fatal = options.fatal; 2210 | /** 2211 | * @param {Stream} stream Input stream. 2212 | * @param {number} code_point Next code point read from the stream. 2213 | * @return {(number|!Array.)} Byte(s) to emit. 2214 | */ 2215 | this.handler = function(stream, code_point) { 2216 | // 1. If code point is end-of-stream, return finished. 2217 | if (code_point === end_of_stream) 2218 | return finished; 2219 | 2220 | // 2. If code point is an ASCII code point, return a byte whose 2221 | // value is code point. 2222 | if (isASCIICodePoint(code_point)) 2223 | return code_point; 2224 | 2225 | // 3. If code point is U+00A5, return byte 0x5C. 2226 | if (code_point === 0x00A5) 2227 | return 0x5C; 2228 | 2229 | // 4. If code point is U+203E, return byte 0x7E. 2230 | if (code_point === 0x203E) 2231 | return 0x7E; 2232 | 2233 | // 5. If code point is in the range U+FF61 to U+FF9F, inclusive, 2234 | // return two bytes whose values are 0x8E and code point − 2235 | // 0xFF61 + 0xA1. 2236 | if (inRange(code_point, 0xFF61, 0xFF9F)) 2237 | return [0x8E, code_point - 0xFF61 + 0xA1]; 2238 | 2239 | // 6. If code point is U+2212, set it to U+FF0D. 2240 | if (code_point === 0x2212) 2241 | code_point = 0xFF0D; 2242 | 2243 | // 7. Let pointer be the index pointer for code point in index 2244 | // jis0208. 2245 | var pointer = indexPointerFor(code_point, index('jis0208')); 2246 | 2247 | // 8. If pointer is null, return error with code point. 2248 | if (pointer === null) 2249 | return encoderError(code_point); 2250 | 2251 | // 9. Let lead be floor(pointer / 94) + 0xA1. 2252 | var lead = floor(pointer / 94) + 0xA1; 2253 | 2254 | // 10. Let trail be pointer % 94 + 0xA1. 2255 | var trail = pointer % 94 + 0xA1; 2256 | 2257 | // 11. Return two bytes whose values are lead and trail. 2258 | return [lead, trail]; 2259 | }; 2260 | } 2261 | 2262 | /** @param {{fatal: boolean}} options */ 2263 | encoders['EUC-JP'] = function(options) { 2264 | return new EUCJPEncoder(options); 2265 | }; 2266 | /** @param {{fatal: boolean}} options */ 2267 | decoders['EUC-JP'] = function(options) { 2268 | return new EUCJPDecoder(options); 2269 | }; 2270 | 2271 | // 13.2 iso-2022-jp 2272 | 2273 | // 13.2.1 iso-2022-jp decoder 2274 | /** 2275 | * @constructor 2276 | * @implements {Decoder} 2277 | * @param {{fatal: boolean}} options 2278 | */ 2279 | function ISO2022JPDecoder(options) { 2280 | var fatal = options.fatal; 2281 | /** @enum */ 2282 | var states = { 2283 | ASCII: 0, 2284 | Roman: 1, 2285 | Katakana: 2, 2286 | LeadByte: 3, 2287 | TrailByte: 4, 2288 | EscapeStart: 5, 2289 | Escape: 6 2290 | }; 2291 | // iso-2022-jp's decoder has an associated iso-2022-jp decoder 2292 | // state (initially ASCII), iso-2022-jp decoder output state 2293 | // (initially ASCII), iso-2022-jp lead (initially 0x00), and 2294 | // iso-2022-jp output flag (initially unset). 2295 | var /** @type {number} */ iso2022jp_decoder_state = states.ASCII, 2296 | /** @type {number} */ iso2022jp_decoder_output_state = states.ASCII, 2297 | /** @type {number} */ iso2022jp_lead = 0x00, 2298 | /** @type {boolean} */ iso2022jp_output_flag = false; 2299 | /** 2300 | * @param {Stream} stream The stream of bytes being decoded. 2301 | * @param {number} bite The next byte read from the stream. 2302 | * @return {?(number|!Array.)} The next code point(s) 2303 | * decoded, or null if not enough data exists in the input 2304 | * stream to decode a complete code point. 2305 | */ 2306 | this.handler = function(stream, bite) { 2307 | // switching on iso-2022-jp decoder state: 2308 | switch (iso2022jp_decoder_state) { 2309 | default: 2310 | case states.ASCII: 2311 | // ASCII 2312 | // Based on byte: 2313 | 2314 | // 0x1B 2315 | if (bite === 0x1B) { 2316 | // Set iso-2022-jp decoder state to escape start and return 2317 | // continue. 2318 | iso2022jp_decoder_state = states.EscapeStart; 2319 | return null; 2320 | } 2321 | 2322 | // 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B 2323 | if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E 2324 | && bite !== 0x0F && bite !== 0x1B) { 2325 | // Unset the iso-2022-jp output flag and return a code point 2326 | // whose value is byte. 2327 | iso2022jp_output_flag = false; 2328 | return bite; 2329 | } 2330 | 2331 | // end-of-stream 2332 | if (bite === end_of_stream) { 2333 | // Return finished. 2334 | return finished; 2335 | } 2336 | 2337 | // Otherwise 2338 | // Unset the iso-2022-jp output flag and return error. 2339 | iso2022jp_output_flag = false; 2340 | return decoderError(fatal); 2341 | 2342 | case states.Roman: 2343 | // Roman 2344 | // Based on byte: 2345 | 2346 | // 0x1B 2347 | if (bite === 0x1B) { 2348 | // Set iso-2022-jp decoder state to escape start and return 2349 | // continue. 2350 | iso2022jp_decoder_state = states.EscapeStart; 2351 | return null; 2352 | } 2353 | 2354 | // 0x5C 2355 | if (bite === 0x5C) { 2356 | // Unset the iso-2022-jp output flag and return code point 2357 | // U+00A5. 2358 | iso2022jp_output_flag = false; 2359 | return 0x00A5; 2360 | } 2361 | 2362 | // 0x7E 2363 | if (bite === 0x7E) { 2364 | // Unset the iso-2022-jp output flag and return code point 2365 | // U+203E. 2366 | iso2022jp_output_flag = false; 2367 | return 0x203E; 2368 | } 2369 | 2370 | // 0x00 to 0x7F, excluding 0x0E, 0x0F, 0x1B, 0x5C, and 0x7E 2371 | if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E && bite !== 0x0F 2372 | && bite !== 0x1B && bite !== 0x5C && bite !== 0x7E) { 2373 | // Unset the iso-2022-jp output flag and return a code point 2374 | // whose value is byte. 2375 | iso2022jp_output_flag = false; 2376 | return bite; 2377 | } 2378 | 2379 | // end-of-stream 2380 | if (bite === end_of_stream) { 2381 | // Return finished. 2382 | return finished; 2383 | } 2384 | 2385 | // Otherwise 2386 | // Unset the iso-2022-jp output flag and return error. 2387 | iso2022jp_output_flag = false; 2388 | return decoderError(fatal); 2389 | 2390 | case states.Katakana: 2391 | // Katakana 2392 | // Based on byte: 2393 | 2394 | // 0x1B 2395 | if (bite === 0x1B) { 2396 | // Set iso-2022-jp decoder state to escape start and return 2397 | // continue. 2398 | iso2022jp_decoder_state = states.EscapeStart; 2399 | return null; 2400 | } 2401 | 2402 | // 0x21 to 0x5F 2403 | if (inRange(bite, 0x21, 0x5F)) { 2404 | // Unset the iso-2022-jp output flag and return a code point 2405 | // whose value is 0xFF61 − 0x21 + byte. 2406 | iso2022jp_output_flag = false; 2407 | return 0xFF61 - 0x21 + bite; 2408 | } 2409 | 2410 | // end-of-stream 2411 | if (bite === end_of_stream) { 2412 | // Return finished. 2413 | return finished; 2414 | } 2415 | 2416 | // Otherwise 2417 | // Unset the iso-2022-jp output flag and return error. 2418 | iso2022jp_output_flag = false; 2419 | return decoderError(fatal); 2420 | 2421 | case states.LeadByte: 2422 | // Lead byte 2423 | // Based on byte: 2424 | 2425 | // 0x1B 2426 | if (bite === 0x1B) { 2427 | // Set iso-2022-jp decoder state to escape start and return 2428 | // continue. 2429 | iso2022jp_decoder_state = states.EscapeStart; 2430 | return null; 2431 | } 2432 | 2433 | // 0x21 to 0x7E 2434 | if (inRange(bite, 0x21, 0x7E)) { 2435 | // Unset the iso-2022-jp output flag, set iso-2022-jp lead 2436 | // to byte, iso-2022-jp decoder state to trail byte, and 2437 | // return continue. 2438 | iso2022jp_output_flag = false; 2439 | iso2022jp_lead = bite; 2440 | iso2022jp_decoder_state = states.TrailByte; 2441 | return null; 2442 | } 2443 | 2444 | // end-of-stream 2445 | if (bite === end_of_stream) { 2446 | // Return finished. 2447 | return finished; 2448 | } 2449 | 2450 | // Otherwise 2451 | // Unset the iso-2022-jp output flag and return error. 2452 | iso2022jp_output_flag = false; 2453 | return decoderError(fatal); 2454 | 2455 | case states.TrailByte: 2456 | // Trail byte 2457 | // Based on byte: 2458 | 2459 | // 0x1B 2460 | if (bite === 0x1B) { 2461 | // Set iso-2022-jp decoder state to escape start and return 2462 | // continue. 2463 | iso2022jp_decoder_state = states.EscapeStart; 2464 | return decoderError(fatal); 2465 | } 2466 | 2467 | // 0x21 to 0x7E 2468 | if (inRange(bite, 0x21, 0x7E)) { 2469 | // 1. Set the iso-2022-jp decoder state to lead byte. 2470 | iso2022jp_decoder_state = states.LeadByte; 2471 | 2472 | // 2. Let pointer be (iso-2022-jp lead − 0x21) × 94 + byte − 0x21. 2473 | var pointer = (iso2022jp_lead - 0x21) * 94 + bite - 0x21; 2474 | 2475 | // 3. Let code point be the index code point for pointer in 2476 | // index jis0208. 2477 | var code_point = indexCodePointFor(pointer, index('jis0208')); 2478 | 2479 | // 4. If code point is null, return error. 2480 | if (code_point === null) 2481 | return decoderError(fatal); 2482 | 2483 | // 5. Return a code point whose value is code point. 2484 | return code_point; 2485 | } 2486 | 2487 | // end-of-stream 2488 | if (bite === end_of_stream) { 2489 | // Set the iso-2022-jp decoder state to lead byte, prepend 2490 | // byte to stream, and return error. 2491 | iso2022jp_decoder_state = states.LeadByte; 2492 | stream.prepend(bite); 2493 | return decoderError(fatal); 2494 | } 2495 | 2496 | // Otherwise 2497 | // Set iso-2022-jp decoder state to lead byte and return 2498 | // error. 2499 | iso2022jp_decoder_state = states.LeadByte; 2500 | return decoderError(fatal); 2501 | 2502 | case states.EscapeStart: 2503 | // Escape start 2504 | 2505 | // 1. If byte is either 0x24 or 0x28, set iso-2022-jp lead to 2506 | // byte, iso-2022-jp decoder state to escape, and return 2507 | // continue. 2508 | if (bite === 0x24 || bite === 0x28) { 2509 | iso2022jp_lead = bite; 2510 | iso2022jp_decoder_state = states.Escape; 2511 | return null; 2512 | } 2513 | 2514 | // 2. Prepend byte to stream. 2515 | stream.prepend(bite); 2516 | 2517 | // 3. Unset the iso-2022-jp output flag, set iso-2022-jp 2518 | // decoder state to iso-2022-jp decoder output state, and 2519 | // return error. 2520 | iso2022jp_output_flag = false; 2521 | iso2022jp_decoder_state = iso2022jp_decoder_output_state; 2522 | return decoderError(fatal); 2523 | 2524 | case states.Escape: 2525 | // Escape 2526 | 2527 | // 1. Let lead be iso-2022-jp lead and set iso-2022-jp lead to 2528 | // 0x00. 2529 | var lead = iso2022jp_lead; 2530 | iso2022jp_lead = 0x00; 2531 | 2532 | // 2. Let state be null. 2533 | var state = null; 2534 | 2535 | // 3. If lead is 0x28 and byte is 0x42, set state to ASCII. 2536 | if (lead === 0x28 && bite === 0x42) 2537 | state = states.ASCII; 2538 | 2539 | // 4. If lead is 0x28 and byte is 0x4A, set state to Roman. 2540 | if (lead === 0x28 && bite === 0x4A) 2541 | state = states.Roman; 2542 | 2543 | // 5. If lead is 0x28 and byte is 0x49, set state to Katakana. 2544 | if (lead === 0x28 && bite === 0x49) 2545 | state = states.Katakana; 2546 | 2547 | // 6. If lead is 0x24 and byte is either 0x40 or 0x42, set 2548 | // state to lead byte. 2549 | if (lead === 0x24 && (bite === 0x40 || bite === 0x42)) 2550 | state = states.LeadByte; 2551 | 2552 | // 7. If state is non-null, run these substeps: 2553 | if (state !== null) { 2554 | // 1. Set iso-2022-jp decoder state and iso-2022-jp decoder 2555 | // output state to states. 2556 | iso2022jp_decoder_state = iso2022jp_decoder_state = state; 2557 | 2558 | // 2. Let output flag be the iso-2022-jp output flag. 2559 | var output_flag = iso2022jp_output_flag; 2560 | 2561 | // 3. Set the iso-2022-jp output flag. 2562 | iso2022jp_output_flag = true; 2563 | 2564 | // 4. Return continue, if output flag is unset, and error 2565 | // otherwise. 2566 | return !output_flag ? null : decoderError(fatal); 2567 | } 2568 | 2569 | // 8. Prepend lead and byte to stream. 2570 | stream.prepend([lead, bite]); 2571 | 2572 | // 9. Unset the iso-2022-jp output flag, set iso-2022-jp 2573 | // decoder state to iso-2022-jp decoder output state and 2574 | // return error. 2575 | iso2022jp_output_flag = false; 2576 | iso2022jp_decoder_state = iso2022jp_decoder_output_state; 2577 | return decoderError(fatal); 2578 | } 2579 | }; 2580 | } 2581 | 2582 | // 13.2.2 iso-2022-jp encoder 2583 | /** 2584 | * @constructor 2585 | * @implements {Encoder} 2586 | * @param {{fatal: boolean}} options 2587 | */ 2588 | function ISO2022JPEncoder(options) { 2589 | var fatal = options.fatal; 2590 | // iso-2022-jp's encoder has an associated iso-2022-jp encoder 2591 | // state which is one of ASCII, Roman, and jis0208 (initially 2592 | // ASCII). 2593 | /** @enum */ 2594 | var states = { 2595 | ASCII: 0, 2596 | Roman: 1, 2597 | jis0208: 2 2598 | }; 2599 | var /** @type {number} */ iso2022jp_state = states.ASCII; 2600 | /** 2601 | * @param {Stream} stream Input stream. 2602 | * @param {number} code_point Next code point read from the stream. 2603 | * @return {(number|!Array.)} Byte(s) to emit. 2604 | */ 2605 | this.handler = function(stream, code_point) { 2606 | // 1. If code point is end-of-stream and iso-2022-jp encoder 2607 | // state is not ASCII, prepend code point to stream, set 2608 | // iso-2022-jp encoder state to ASCII, and return three bytes 2609 | // 0x1B 0x28 0x42. 2610 | if (code_point === end_of_stream && 2611 | iso2022jp_state !== states.ASCII) { 2612 | stream.prepend(code_point); 2613 | iso2022jp_state = states.ASCII; 2614 | return [0x1B, 0x28, 0x42]; 2615 | } 2616 | 2617 | // 2. If code point is end-of-stream and iso-2022-jp encoder 2618 | // state is ASCII, return finished. 2619 | if (code_point === end_of_stream && iso2022jp_state === states.ASCII) 2620 | return finished; 2621 | 2622 | // 3. If ISO-2022-JP encoder state is ASCII or Roman, and code 2623 | // point is U+000E, U+000F, or U+001B, return error with U+FFFD. 2624 | if ((iso2022jp_state === states.ASCII || 2625 | iso2022jp_state === states.Roman) && 2626 | (code_point === 0x000E || code_point === 0x000F || 2627 | code_point === 0x001B)) { 2628 | return encoderError(0xFFFD); 2629 | } 2630 | 2631 | // 4. If iso-2022-jp encoder state is ASCII and code point is an 2632 | // ASCII code point, return a byte whose value is code point. 2633 | if (iso2022jp_state === states.ASCII && 2634 | isASCIICodePoint(code_point)) 2635 | return code_point; 2636 | 2637 | // 5. If iso-2022-jp encoder state is Roman and code point is an 2638 | // ASCII code point, excluding U+005C and U+007E, or is U+00A5 2639 | // or U+203E, run these substeps: 2640 | if (iso2022jp_state === states.Roman && 2641 | ((isASCIICodePoint(code_point) && 2642 | code_point !== 0x005C && code_point !== 0x007E) || 2643 | (code_point == 0x00A5 || code_point == 0x203E))) { 2644 | 2645 | // 1. If code point is an ASCII code point, return a byte 2646 | // whose value is code point. 2647 | if (isASCIICodePoint(code_point)) 2648 | return code_point; 2649 | 2650 | // 2. If code point is U+00A5, return byte 0x5C. 2651 | if (code_point === 0x00A5) 2652 | return 0x5C; 2653 | 2654 | // 3. If code point is U+203E, return byte 0x7E. 2655 | if (code_point === 0x203E) 2656 | return 0x7E; 2657 | } 2658 | 2659 | // 6. If code point is an ASCII code point, and iso-2022-jp 2660 | // encoder state is not ASCII, prepend code point to stream, set 2661 | // iso-2022-jp encoder state to ASCII, and return three bytes 2662 | // 0x1B 0x28 0x42. 2663 | if (isASCIICodePoint(code_point) && 2664 | iso2022jp_state !== states.ASCII) { 2665 | stream.prepend(code_point); 2666 | iso2022jp_state = states.ASCII; 2667 | return [0x1B, 0x28, 0x42]; 2668 | } 2669 | 2670 | // 7. If code point is either U+00A5 or U+203E, and iso-2022-jp 2671 | // encoder state is not Roman, prepend code point to stream, set 2672 | // iso-2022-jp encoder state to Roman, and return three bytes 2673 | // 0x1B 0x28 0x4A. 2674 | if ((code_point === 0x00A5 || code_point === 0x203E) && 2675 | iso2022jp_state !== states.Roman) { 2676 | stream.prepend(code_point); 2677 | iso2022jp_state = states.Roman; 2678 | return [0x1B, 0x28, 0x4A]; 2679 | } 2680 | 2681 | // 8. If code point is U+2212, set it to U+FF0D. 2682 | if (code_point === 0x2212) 2683 | code_point = 0xFF0D; 2684 | 2685 | // 9. Let pointer be the index pointer for code point in index 2686 | // jis0208. 2687 | var pointer = indexPointerFor(code_point, index('jis0208')); 2688 | 2689 | // 10. If pointer is null, return error with code point. 2690 | if (pointer === null) 2691 | return encoderError(code_point); 2692 | 2693 | // 11. If iso-2022-jp encoder state is not jis0208, prepend code 2694 | // point to stream, set iso-2022-jp encoder state to jis0208, 2695 | // and return three bytes 0x1B 0x24 0x42. 2696 | if (iso2022jp_state !== states.jis0208) { 2697 | stream.prepend(code_point); 2698 | iso2022jp_state = states.jis0208; 2699 | return [0x1B, 0x24, 0x42]; 2700 | } 2701 | 2702 | // 12. Let lead be floor(pointer / 94) + 0x21. 2703 | var lead = floor(pointer / 94) + 0x21; 2704 | 2705 | // 13. Let trail be pointer % 94 + 0x21. 2706 | var trail = pointer % 94 + 0x21; 2707 | 2708 | // 14. Return two bytes whose values are lead and trail. 2709 | return [lead, trail]; 2710 | }; 2711 | } 2712 | 2713 | /** @param {{fatal: boolean}} options */ 2714 | encoders['ISO-2022-JP'] = function(options) { 2715 | return new ISO2022JPEncoder(options); 2716 | }; 2717 | /** @param {{fatal: boolean}} options */ 2718 | decoders['ISO-2022-JP'] = function(options) { 2719 | return new ISO2022JPDecoder(options); 2720 | }; 2721 | 2722 | // 13.3 Shift_JIS 2723 | 2724 | // 13.3.1 Shift_JIS decoder 2725 | /** 2726 | * @constructor 2727 | * @implements {Decoder} 2728 | * @param {{fatal: boolean}} options 2729 | */ 2730 | function ShiftJISDecoder(options) { 2731 | var fatal = options.fatal; 2732 | // Shift_JIS's decoder has an associated Shift_JIS lead (initially 2733 | // 0x00). 2734 | var /** @type {number} */ Shift_JIS_lead = 0x00; 2735 | /** 2736 | * @param {Stream} stream The stream of bytes being decoded. 2737 | * @param {number} bite The next byte read from the stream. 2738 | * @return {?(number|!Array.)} The next code point(s) 2739 | * decoded, or null if not enough data exists in the input 2740 | * stream to decode a complete code point. 2741 | */ 2742 | this.handler = function(stream, bite) { 2743 | // 1. If byte is end-of-stream and Shift_JIS lead is not 0x00, 2744 | // set Shift_JIS lead to 0x00 and return error. 2745 | if (bite === end_of_stream && Shift_JIS_lead !== 0x00) { 2746 | Shift_JIS_lead = 0x00; 2747 | return decoderError(fatal); 2748 | } 2749 | 2750 | // 2. If byte is end-of-stream and Shift_JIS lead is 0x00, 2751 | // return finished. 2752 | if (bite === end_of_stream && Shift_JIS_lead === 0x00) 2753 | return finished; 2754 | 2755 | // 3. If Shift_JIS lead is not 0x00, let lead be Shift_JIS lead, 2756 | // let pointer be null, set Shift_JIS lead to 0x00, and then run 2757 | // these substeps: 2758 | if (Shift_JIS_lead !== 0x00) { 2759 | var lead = Shift_JIS_lead; 2760 | var pointer = null; 2761 | Shift_JIS_lead = 0x00; 2762 | 2763 | // 1. Let offset be 0x40, if byte is less than 0x7F, and 0x41 2764 | // otherwise. 2765 | var offset = (bite < 0x7F) ? 0x40 : 0x41; 2766 | 2767 | // 2. Let lead offset be 0x81, if lead is less than 0xA0, and 2768 | // 0xC1 otherwise. 2769 | var lead_offset = (lead < 0xA0) ? 0x81 : 0xC1; 2770 | 2771 | // 3. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80 2772 | // to 0xFC, inclusive, set pointer to (lead − lead offset) × 2773 | // 188 + byte − offset. 2774 | if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFC)) 2775 | pointer = (lead - lead_offset) * 188 + bite - offset; 2776 | 2777 | // 4. If pointer is in the range 8836 to 10715, inclusive, 2778 | // return a code point whose value is 0xE000 − 8836 + pointer. 2779 | if (inRange(pointer, 8836, 10715)) 2780 | return 0xE000 - 8836 + pointer; 2781 | 2782 | // 5. Let code point be null, if pointer is null, and the 2783 | // index code point for pointer in index jis0208 otherwise. 2784 | var code_point = (pointer === null) ? null : 2785 | indexCodePointFor(pointer, index('jis0208')); 2786 | 2787 | // 6. If code point is null and byte is an ASCII byte, prepend 2788 | // byte to stream. 2789 | if (code_point === null && isASCIIByte(bite)) 2790 | stream.prepend(bite); 2791 | 2792 | // 7. If code point is null, return error. 2793 | if (code_point === null) 2794 | return decoderError(fatal); 2795 | 2796 | // 8. Return a code point whose value is code point. 2797 | return code_point; 2798 | } 2799 | 2800 | // 4. If byte is an ASCII byte or 0x80, return a code point 2801 | // whose value is byte. 2802 | if (isASCIIByte(bite) || bite === 0x80) 2803 | return bite; 2804 | 2805 | // 5. If byte is in the range 0xA1 to 0xDF, inclusive, return a 2806 | // code point whose value is 0xFF61 − 0xA1 + byte. 2807 | if (inRange(bite, 0xA1, 0xDF)) 2808 | return 0xFF61 - 0xA1 + bite; 2809 | 2810 | // 6. If byte is in the range 0x81 to 0x9F, inclusive, or 0xE0 2811 | // to 0xFC, inclusive, set Shift_JIS lead to byte and return 2812 | // continue. 2813 | if (inRange(bite, 0x81, 0x9F) || inRange(bite, 0xE0, 0xFC)) { 2814 | Shift_JIS_lead = bite; 2815 | return null; 2816 | } 2817 | 2818 | // 7. Return error. 2819 | return decoderError(fatal); 2820 | }; 2821 | } 2822 | 2823 | // 13.3.2 Shift_JIS encoder 2824 | /** 2825 | * @constructor 2826 | * @implements {Encoder} 2827 | * @param {{fatal: boolean}} options 2828 | */ 2829 | function ShiftJISEncoder(options) { 2830 | var fatal = options.fatal; 2831 | /** 2832 | * @param {Stream} stream Input stream. 2833 | * @param {number} code_point Next code point read from the stream. 2834 | * @return {(number|!Array.)} Byte(s) to emit. 2835 | */ 2836 | this.handler = function(stream, code_point) { 2837 | // 1. If code point is end-of-stream, return finished. 2838 | if (code_point === end_of_stream) 2839 | return finished; 2840 | 2841 | // 2. If code point is an ASCII code point or U+0080, return a 2842 | // byte whose value is code point. 2843 | if (isASCIICodePoint(code_point) || code_point === 0x0080) 2844 | return code_point; 2845 | 2846 | // 3. If code point is U+00A5, return byte 0x5C. 2847 | if (code_point === 0x00A5) 2848 | return 0x5C; 2849 | 2850 | // 4. If code point is U+203E, return byte 0x7E. 2851 | if (code_point === 0x203E) 2852 | return 0x7E; 2853 | 2854 | // 5. If code point is in the range U+FF61 to U+FF9F, inclusive, 2855 | // return a byte whose value is code point − 0xFF61 + 0xA1. 2856 | if (inRange(code_point, 0xFF61, 0xFF9F)) 2857 | return code_point - 0xFF61 + 0xA1; 2858 | 2859 | // 6. If code point is U+2212, set it to U+FF0D. 2860 | if (code_point === 0x2212) 2861 | code_point = 0xFF0D; 2862 | 2863 | // 7. Let pointer be the index Shift_JIS pointer for code point. 2864 | var pointer = indexShiftJISPointerFor(code_point); 2865 | 2866 | // 8. If pointer is null, return error with code point. 2867 | if (pointer === null) 2868 | return encoderError(code_point); 2869 | 2870 | // 9. Let lead be floor(pointer / 188). 2871 | var lead = floor(pointer / 188); 2872 | 2873 | // 10. Let lead offset be 0x81, if lead is less than 0x1F, and 2874 | // 0xC1 otherwise. 2875 | var lead_offset = (lead < 0x1F) ? 0x81 : 0xC1; 2876 | 2877 | // 11. Let trail be pointer % 188. 2878 | var trail = pointer % 188; 2879 | 2880 | // 12. Let offset be 0x40, if trail is less than 0x3F, and 0x41 2881 | // otherwise. 2882 | var offset = (trail < 0x3F) ? 0x40 : 0x41; 2883 | 2884 | // 13. Return two bytes whose values are lead + lead offset and 2885 | // trail + offset. 2886 | return [lead + lead_offset, trail + offset]; 2887 | }; 2888 | } 2889 | 2890 | /** @param {{fatal: boolean}} options */ 2891 | encoders['Shift_JIS'] = function(options) { 2892 | return new ShiftJISEncoder(options); 2893 | }; 2894 | /** @param {{fatal: boolean}} options */ 2895 | decoders['Shift_JIS'] = function(options) { 2896 | return new ShiftJISDecoder(options); 2897 | }; 2898 | 2899 | // 2900 | // 14. Legacy multi-byte Korean encodings 2901 | // 2902 | 2903 | // 14.1 euc-kr 2904 | 2905 | // 14.1.1 euc-kr decoder 2906 | /** 2907 | * @constructor 2908 | * @implements {Decoder} 2909 | * @param {{fatal: boolean}} options 2910 | */ 2911 | function EUCKRDecoder(options) { 2912 | var fatal = options.fatal; 2913 | 2914 | // euc-kr's decoder has an associated euc-kr lead (initially 0x00). 2915 | var /** @type {number} */ euckr_lead = 0x00; 2916 | /** 2917 | * @param {Stream} stream The stream of bytes being decoded. 2918 | * @param {number} bite The next byte read from the stream. 2919 | * @return {?(number|!Array.)} The next code point(s) 2920 | * decoded, or null if not enough data exists in the input 2921 | * stream to decode a complete code point. 2922 | */ 2923 | this.handler = function(stream, bite) { 2924 | // 1. If byte is end-of-stream and euc-kr lead is not 0x00, set 2925 | // euc-kr lead to 0x00 and return error. 2926 | if (bite === end_of_stream && euckr_lead !== 0) { 2927 | euckr_lead = 0x00; 2928 | return decoderError(fatal); 2929 | } 2930 | 2931 | // 2. If byte is end-of-stream and euc-kr lead is 0x00, return 2932 | // finished. 2933 | if (bite === end_of_stream && euckr_lead === 0) 2934 | return finished; 2935 | 2936 | // 3. If euc-kr lead is not 0x00, let lead be euc-kr lead, let 2937 | // pointer be null, set euc-kr lead to 0x00, and then run these 2938 | // substeps: 2939 | if (euckr_lead !== 0x00) { 2940 | var lead = euckr_lead; 2941 | var pointer = null; 2942 | euckr_lead = 0x00; 2943 | 2944 | // 1. If byte is in the range 0x41 to 0xFE, inclusive, set 2945 | // pointer to (lead − 0x81) × 190 + (byte − 0x41). 2946 | if (inRange(bite, 0x41, 0xFE)) 2947 | pointer = (lead - 0x81) * 190 + (bite - 0x41); 2948 | 2949 | // 2. Let code point be null, if pointer is null, and the 2950 | // index code point for pointer in index euc-kr otherwise. 2951 | var code_point = (pointer === null) 2952 | ? null : indexCodePointFor(pointer, index('euc-kr')); 2953 | 2954 | // 3. If code point is null and byte is an ASCII byte, prepend 2955 | // byte to stream. 2956 | if (pointer === null && isASCIIByte(bite)) 2957 | stream.prepend(bite); 2958 | 2959 | // 4. If code point is null, return error. 2960 | if (code_point === null) 2961 | return decoderError(fatal); 2962 | 2963 | // 5. Return a code point whose value is code point. 2964 | return code_point; 2965 | } 2966 | 2967 | // 4. If byte is an ASCII byte, return a code point whose value 2968 | // is byte. 2969 | if (isASCIIByte(bite)) 2970 | return bite; 2971 | 2972 | // 5. If byte is in the range 0x81 to 0xFE, inclusive, set 2973 | // euc-kr lead to byte and return continue. 2974 | if (inRange(bite, 0x81, 0xFE)) { 2975 | euckr_lead = bite; 2976 | return null; 2977 | } 2978 | 2979 | // 6. Return error. 2980 | return decoderError(fatal); 2981 | }; 2982 | } 2983 | 2984 | // 14.1.2 euc-kr encoder 2985 | /** 2986 | * @constructor 2987 | * @implements {Encoder} 2988 | * @param {{fatal: boolean}} options 2989 | */ 2990 | function EUCKREncoder(options) { 2991 | var fatal = options.fatal; 2992 | /** 2993 | * @param {Stream} stream Input stream. 2994 | * @param {number} code_point Next code point read from the stream. 2995 | * @return {(number|!Array.)} Byte(s) to emit. 2996 | */ 2997 | this.handler = function(stream, code_point) { 2998 | // 1. If code point is end-of-stream, return finished. 2999 | if (code_point === end_of_stream) 3000 | return finished; 3001 | 3002 | // 2. If code point is an ASCII code point, return a byte whose 3003 | // value is code point. 3004 | if (isASCIICodePoint(code_point)) 3005 | return code_point; 3006 | 3007 | // 3. Let pointer be the index pointer for code point in index 3008 | // euc-kr. 3009 | var pointer = indexPointerFor(code_point, index('euc-kr')); 3010 | 3011 | // 4. If pointer is null, return error with code point. 3012 | if (pointer === null) 3013 | return encoderError(code_point); 3014 | 3015 | // 5. Let lead be floor(pointer / 190) + 0x81. 3016 | var lead = floor(pointer / 190) + 0x81; 3017 | 3018 | // 6. Let trail be pointer % 190 + 0x41. 3019 | var trail = (pointer % 190) + 0x41; 3020 | 3021 | // 7. Return two bytes whose values are lead and trail. 3022 | return [lead, trail]; 3023 | }; 3024 | } 3025 | 3026 | /** @param {{fatal: boolean}} options */ 3027 | encoders['EUC-KR'] = function(options) { 3028 | return new EUCKREncoder(options); 3029 | }; 3030 | /** @param {{fatal: boolean}} options */ 3031 | decoders['EUC-KR'] = function(options) { 3032 | return new EUCKRDecoder(options); 3033 | }; 3034 | 3035 | 3036 | // 3037 | // 15. Legacy miscellaneous encodings 3038 | // 3039 | 3040 | // 15.1 replacement 3041 | 3042 | // Not needed - API throws RangeError 3043 | 3044 | // 15.2 Common infrastructure for utf-16be and utf-16le 3045 | 3046 | /** 3047 | * @param {number} code_unit 3048 | * @param {boolean} utf16be 3049 | * @return {!Array.} bytes 3050 | */ 3051 | function convertCodeUnitToBytes(code_unit, utf16be) { 3052 | // 1. Let byte1 be code unit >> 8. 3053 | var byte1 = code_unit >> 8; 3054 | 3055 | // 2. Let byte2 be code unit & 0x00FF. 3056 | var byte2 = code_unit & 0x00FF; 3057 | 3058 | // 3. Then return the bytes in order: 3059 | // utf-16be flag is set: byte1, then byte2. 3060 | if (utf16be) 3061 | return [byte1, byte2]; 3062 | // utf-16be flag is unset: byte2, then byte1. 3063 | return [byte2, byte1]; 3064 | } 3065 | 3066 | // 15.2.1 shared utf-16 decoder 3067 | /** 3068 | * @constructor 3069 | * @implements {Decoder} 3070 | * @param {boolean} utf16_be True if big-endian, false if little-endian. 3071 | * @param {{fatal: boolean}} options 3072 | */ 3073 | function UTF16Decoder(utf16_be, options) { 3074 | var fatal = options.fatal; 3075 | var /** @type {?number} */ utf16_lead_byte = null, 3076 | /** @type {?number} */ utf16_lead_surrogate = null; 3077 | /** 3078 | * @param {Stream} stream The stream of bytes being decoded. 3079 | * @param {number} bite The next byte read from the stream. 3080 | * @return {?(number|!Array.)} The next code point(s) 3081 | * decoded, or null if not enough data exists in the input 3082 | * stream to decode a complete code point. 3083 | */ 3084 | this.handler = function(stream, bite) { 3085 | // 1. If byte is end-of-stream and either utf-16 lead byte or 3086 | // utf-16 lead surrogate is not null, set utf-16 lead byte and 3087 | // utf-16 lead surrogate to null, and return error. 3088 | if (bite === end_of_stream && (utf16_lead_byte !== null || 3089 | utf16_lead_surrogate !== null)) { 3090 | return decoderError(fatal); 3091 | } 3092 | 3093 | // 2. If byte is end-of-stream and utf-16 lead byte and utf-16 3094 | // lead surrogate are null, return finished. 3095 | if (bite === end_of_stream && utf16_lead_byte === null && 3096 | utf16_lead_surrogate === null) { 3097 | return finished; 3098 | } 3099 | 3100 | // 3. If utf-16 lead byte is null, set utf-16 lead byte to byte 3101 | // and return continue. 3102 | if (utf16_lead_byte === null) { 3103 | utf16_lead_byte = bite; 3104 | return null; 3105 | } 3106 | 3107 | // 4. Let code unit be the result of: 3108 | var code_unit; 3109 | if (utf16_be) { 3110 | // utf-16be decoder flag is set 3111 | // (utf-16 lead byte << 8) + byte. 3112 | code_unit = (utf16_lead_byte << 8) + bite; 3113 | } else { 3114 | // utf-16be decoder flag is unset 3115 | // (byte << 8) + utf-16 lead byte. 3116 | code_unit = (bite << 8) + utf16_lead_byte; 3117 | } 3118 | // Then set utf-16 lead byte to null. 3119 | utf16_lead_byte = null; 3120 | 3121 | // 5. If utf-16 lead surrogate is not null, let lead surrogate 3122 | // be utf-16 lead surrogate, set utf-16 lead surrogate to null, 3123 | // and then run these substeps: 3124 | if (utf16_lead_surrogate !== null) { 3125 | var lead_surrogate = utf16_lead_surrogate; 3126 | utf16_lead_surrogate = null; 3127 | 3128 | // 1. If code unit is in the range U+DC00 to U+DFFF, 3129 | // inclusive, return a code point whose value is 0x10000 + 3130 | // ((lead surrogate − 0xD800) << 10) + (code unit − 0xDC00). 3131 | if (inRange(code_unit, 0xDC00, 0xDFFF)) { 3132 | return 0x10000 + (lead_surrogate - 0xD800) * 0x400 + 3133 | (code_unit - 0xDC00); 3134 | } 3135 | 3136 | // 2. Prepend the sequence resulting of converting code unit 3137 | // to bytes using utf-16be decoder flag to stream and return 3138 | // error. 3139 | stream.prepend(convertCodeUnitToBytes(code_unit, utf16_be)); 3140 | return decoderError(fatal); 3141 | } 3142 | 3143 | // 6. If code unit is in the range U+D800 to U+DBFF, inclusive, 3144 | // set utf-16 lead surrogate to code unit and return continue. 3145 | if (inRange(code_unit, 0xD800, 0xDBFF)) { 3146 | utf16_lead_surrogate = code_unit; 3147 | return null; 3148 | } 3149 | 3150 | // 7. If code unit is in the range U+DC00 to U+DFFF, inclusive, 3151 | // return error. 3152 | if (inRange(code_unit, 0xDC00, 0xDFFF)) 3153 | return decoderError(fatal); 3154 | 3155 | // 8. Return code point code unit. 3156 | return code_unit; 3157 | }; 3158 | } 3159 | 3160 | // 15.2.2 shared utf-16 encoder 3161 | /** 3162 | * @constructor 3163 | * @implements {Encoder} 3164 | * @param {boolean} utf16_be True if big-endian, false if little-endian. 3165 | * @param {{fatal: boolean}} options 3166 | */ 3167 | function UTF16Encoder(utf16_be, options) { 3168 | var fatal = options.fatal; 3169 | /** 3170 | * @param {Stream} stream Input stream. 3171 | * @param {number} code_point Next code point read from the stream. 3172 | * @return {(number|!Array.)} Byte(s) to emit. 3173 | */ 3174 | this.handler = function(stream, code_point) { 3175 | // 1. If code point is end-of-stream, return finished. 3176 | if (code_point === end_of_stream) 3177 | return finished; 3178 | 3179 | // 2. If code point is in the range U+0000 to U+FFFF, inclusive, 3180 | // return the sequence resulting of converting code point to 3181 | // bytes using utf-16be encoder flag. 3182 | if (inRange(code_point, 0x0000, 0xFFFF)) 3183 | return convertCodeUnitToBytes(code_point, utf16_be); 3184 | 3185 | // 3. Let lead be ((code point − 0x10000) >> 10) + 0xD800, 3186 | // converted to bytes using utf-16be encoder flag. 3187 | var lead = convertCodeUnitToBytes( 3188 | ((code_point - 0x10000) >> 10) + 0xD800, utf16_be); 3189 | 3190 | // 4. Let trail be ((code point − 0x10000) & 0x3FF) + 0xDC00, 3191 | // converted to bytes using utf-16be encoder flag. 3192 | var trail = convertCodeUnitToBytes( 3193 | ((code_point - 0x10000) & 0x3FF) + 0xDC00, utf16_be); 3194 | 3195 | // 5. Return a byte sequence of lead followed by trail. 3196 | return lead.concat(trail); 3197 | }; 3198 | } 3199 | 3200 | // 15.3 utf-16be 3201 | // 15.3.1 utf-16be decoder 3202 | /** @param {{fatal: boolean}} options */ 3203 | encoders['UTF-16BE'] = function(options) { 3204 | return new UTF16Encoder(true, options); 3205 | }; 3206 | // 15.3.2 utf-16be encoder 3207 | /** @param {{fatal: boolean}} options */ 3208 | decoders['UTF-16BE'] = function(options) { 3209 | return new UTF16Decoder(true, options); 3210 | }; 3211 | 3212 | // 15.4 utf-16le 3213 | // 15.4.1 utf-16le decoder 3214 | /** @param {{fatal: boolean}} options */ 3215 | encoders['UTF-16LE'] = function(options) { 3216 | return new UTF16Encoder(false, options); 3217 | }; 3218 | // 15.4.2 utf-16le encoder 3219 | /** @param {{fatal: boolean}} options */ 3220 | decoders['UTF-16LE'] = function(options) { 3221 | return new UTF16Decoder(false, options); 3222 | }; 3223 | 3224 | // 15.5 x-user-defined 3225 | 3226 | // 15.5.1 x-user-defined decoder 3227 | /** 3228 | * @constructor 3229 | * @implements {Decoder} 3230 | * @param {{fatal: boolean}} options 3231 | */ 3232 | function XUserDefinedDecoder(options) { 3233 | var fatal = options.fatal; 3234 | /** 3235 | * @param {Stream} stream The stream of bytes being decoded. 3236 | * @param {number} bite The next byte read from the stream. 3237 | * @return {?(number|!Array.)} The next code point(s) 3238 | * decoded, or null if not enough data exists in the input 3239 | * stream to decode a complete code point. 3240 | */ 3241 | this.handler = function(stream, bite) { 3242 | // 1. If byte is end-of-stream, return finished. 3243 | if (bite === end_of_stream) 3244 | return finished; 3245 | 3246 | // 2. If byte is an ASCII byte, return a code point whose value 3247 | // is byte. 3248 | if (isASCIIByte(bite)) 3249 | return bite; 3250 | 3251 | // 3. Return a code point whose value is 0xF780 + byte − 0x80. 3252 | return 0xF780 + bite - 0x80; 3253 | }; 3254 | } 3255 | 3256 | // 15.5.2 x-user-defined encoder 3257 | /** 3258 | * @constructor 3259 | * @implements {Encoder} 3260 | * @param {{fatal: boolean}} options 3261 | */ 3262 | function XUserDefinedEncoder(options) { 3263 | var fatal = options.fatal; 3264 | /** 3265 | * @param {Stream} stream Input stream. 3266 | * @param {number} code_point Next code point read from the stream. 3267 | * @return {(number|!Array.)} Byte(s) to emit. 3268 | */ 3269 | this.handler = function(stream, code_point) { 3270 | // 1.If code point is end-of-stream, return finished. 3271 | if (code_point === end_of_stream) 3272 | return finished; 3273 | 3274 | // 2. If code point is an ASCII code point, return a byte whose 3275 | // value is code point. 3276 | if (isASCIICodePoint(code_point)) 3277 | return code_point; 3278 | 3279 | // 3. If code point is in the range U+F780 to U+F7FF, inclusive, 3280 | // return a byte whose value is code point − 0xF780 + 0x80. 3281 | if (inRange(code_point, 0xF780, 0xF7FF)) 3282 | return code_point - 0xF780 + 0x80; 3283 | 3284 | // 4. Return error with code point. 3285 | return encoderError(code_point); 3286 | }; 3287 | } 3288 | 3289 | /** @param {{fatal: boolean}} options */ 3290 | encoders['x-user-defined'] = function(options) { 3291 | return new XUserDefinedEncoder(options); 3292 | }; 3293 | /** @param {{fatal: boolean}} options */ 3294 | decoders['x-user-defined'] = function(options) { 3295 | return new XUserDefinedDecoder(options); 3296 | }; 3297 | 3298 | if (!global['TextEncoder']) 3299 | global['TextEncoder'] = TextEncoder; 3300 | if (!global['TextDecoder']) 3301 | global['TextDecoder'] = TextDecoder; 3302 | 3303 | if (typeof module !== "undefined" && module.exports) { 3304 | module.exports = { 3305 | TextEncoder: global['TextEncoder'], 3306 | TextDecoder: global['TextDecoder'], 3307 | EncodingIndexes: global["encoding-indexes"] 3308 | }; 3309 | } 3310 | 3311 | // For strict environments where `this` inside the global scope 3312 | // is `undefined`, take a pure object instead 3313 | }(this || {})); -------------------------------------------------------------------------------- /package/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /package/README.md: -------------------------------------------------------------------------------- 1 | # [Pintorita - Pintora plugin for typst ](https://github.com/taylorh140/typst-pintora) 2 | 3 | [Pintora](https://pintorajs.vercel.app/) 4 | 5 | Typst package for drawing the following from markup: 6 | - Sequence Diagram 7 | - Entity Relationship Diagram 8 | - Component Diagram 9 | - Activity Diagram 10 | - Mind Map Experiment 11 | - Gantt Diagram Experiment 12 | - DOT Diagram Experiment 13 | 14 | ![](pintorita.svg) 15 | 16 | 17 | ````typ 18 | #import "@preview/pintorita:0.1.4" 19 | 20 | #set page(height: auto, width: auto, fill: black, margin: 2em) 21 | #set text(fill: white) 22 | 23 | #show raw.where(lang: "pintora"): it => pintorita.render(it.text) 24 | 25 | = pintora 26 | 27 | Typst just got a load of diagrams. 28 | 29 | ```pintora 30 | mindmap 31 | @param layoutDirection TB 32 | + UML Diagrams 33 | ++ Behavior Diagrams 34 | +++ Sequence Diagram 35 | +++ State Diagram 36 | +++ Activity Diagram 37 | ++ Structural Diagrams 38 | +++ Class Diagram 39 | +++ Component Diagram 40 | ``` 41 | 42 | ``` 43 | mindmap 44 | @param layoutDirection TB 45 | + UML Diagrams 46 | ++ Behavior Diagrams 47 | +++ Sequence Diagram 48 | +++ State Diagram 49 | +++ Activity Diagram 50 | ++ Structural Diagrams 51 | +++ Class Diagram 52 | +++ Component Diagram 53 | ``` 54 | 55 | ```` 56 | 57 | 58 | ## Documentation 59 | 60 | ### `render` 61 | 62 | Render a pintora string to an image 63 | 64 | #### Arguments 65 | 66 | * `src`: `str` - pintora source string 67 | * `factor`: scale output svg, "factor:0.5" will scale images down by half, so scale can be consistent across renders. 68 | * `style`: `str` diagram style, `default` or `dark` or `larkLight` or `larkDark` 69 | * `font`: `str` font family, default is `Source Code Pro, sans-serif` 70 | * All other arguments are passed to `image.decode` so you can customize the image size 71 | 72 | #### Returns 73 | 74 | The image, of type `content` 75 | 76 | ### `render-svg` 77 | 78 | Render a pintora string to an image 79 | 80 | #### Arguments 81 | 82 | * `src`: `str` - pintora source string 83 | * `style`: `str` diagram style, `default` or `dark` or `larkLight` or `larkDark` 84 | * `font`: `str` font family, default is `Source Code Pro, sans-serif` 85 | * All other arguments are passed to `image.decode` so you can customize the image size 86 | 87 | #### Returns 88 | 89 | The svg image 90 | 91 | ## History 92 | 93 | * 0.1.0 - Inital Release 94 | * 0.1.1 - Updated to Jogs 0.2.3 and pintora 0.7.3 95 | * 0.1.2 - Fixed strange offset of text rows in class diagram, added `render-svg` function and more customization options 96 | * 0.1.3 - Bug Fixes for argument handling. 97 | * 0.1.4 - Updated to Jogs 0.2.4 and pintora 0.7.5. 98 | -------------------------------------------------------------------------------- /package/lib.typ: -------------------------------------------------------------------------------- 1 | #import "./pintorita.typ": render, render-svg 2 | 3 | 4 | -------------------------------------------------------------------------------- /package/pintorita.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /package/pintorita.typ: -------------------------------------------------------------------------------- 1 | #import "@preview/jogs:0.2.4": compile-js, call-js-function 2 | 3 | #let pintora-src = read("./pintora.js") 4 | #let pintora-bytecode = compile-js(pintora-src) 5 | 6 | // Helper function that sets the controls the svg width by minipulating the svg string. 7 | // Can either have the scale factor set or the width of the svg which simply passes the 8 | // width through. 9 | #let getNewWidth(svg-output, factor, width) = { 10 | if (factor == none) { 11 | return width 12 | } 13 | 14 | if (width != auto) { 15 | panic("invalid arguments. factor and width cannot both be set.") 16 | } 17 | 18 | //This method depends on the consitency of the generated svg 19 | //since it uses simple regexes to get the pre-rendered width 20 | let svg-width = svg-output.find(regex("width=\"(\d+)")).find(regex("\d+")) 21 | return int(svg-width) * factor * 1pt 22 | } 23 | 24 | // Renders image based on the source pintora string. 25 | #let render( 26 | src, 27 | factor: none, 28 | style: "larkLight", 29 | font: "Arial", 30 | width: auto, 31 | ..args, 32 | ) = { 33 | let named-args = args.named() 34 | 35 | let svg-output = call-js-function(pintora-bytecode, "PintoraRender", src, style, font) 36 | 37 | let newWidth = getNewWidth(svg-output, factor, width) 38 | 39 | image( 40 | bytes(svg-output), 41 | width: newWidth, 42 | ..args, 43 | ) 44 | } 45 | 46 | // Produces svg from the pintora source using the requested style and font. 47 | #let render-svg( 48 | src, 49 | style: "larkLight", 50 | font: "Arial", 51 | ) = { 52 | // style: ["default", "larkLight", "larkDark", "dark"] 53 | let svg-output = call-js-function(pintora-bytecode, "PintoraRender", src, style, font) 54 | 55 | svg-output 56 | } 57 | -------------------------------------------------------------------------------- /package/typst.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pintorita" 3 | version = "0.1.4" 4 | entrypoint = "lib.typ" 5 | authors = ["Min Chen (hikerpig)","Taylorh140"] 6 | license = "MIT" 7 | description = "Package to draw Sequence Diagrams, Entity Relationship Diagrams, Component Diagrams, Activity Diagrams, Mind Maps, Gantt Diagrams, and DOT Diagrams based on Pintora which is heavily influenced by mermaid.js and plantuml." 8 | categories = ["visualization"] 9 | 10 | repository = "https://github.com/taylorh140/typst-pintora" 11 | keywords = [ 12 | "js", 13 | "javascript", 14 | "pintora", 15 | "gantt", 16 | "sequence", 17 | ] 18 | exclude = [ 19 | "pintorita.svg", 20 | ] 21 | --------------------------------------------------------------------------------