├── test ├── web-platform-tests │ └── .gitkeep ├── web-platform.js └── api.js ├── .gitattributes ├── .gitignore ├── .editorconfig ├── eslint.config.mjs ├── lib ├── utils.js └── parser.js ├── .github └── workflows │ └── build.yml ├── LICENSE.txt ├── scripts └── get-latest-platform-tests.js ├── package.json └── README.md /test/web-platform-tests/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # lint requires lf line endings 2 | *.js text eol=lf 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules/ 2 | /npm-debug.log 3 | 4 | /coverage/ 5 | /test/web-platform-tests/* 6 | !/test/web-platform-tests/.gitkeep 7 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | insert_final_newline = true 6 | trim_trailing_whitespace = true 7 | charset = utf-8 8 | indent_style = space 9 | indent_size = 2 10 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import domenicConfig from "@domenic/eslint-config"; 2 | import globals from "globals"; 3 | 4 | export default [ 5 | { 6 | files: ["**/*.js"], 7 | languageOptions: { 8 | sourceType: "commonjs", 9 | globals: globals.node 10 | } 11 | }, 12 | ...domenicConfig, 13 | { 14 | files: ["scripts/**.js"], 15 | rules: { 16 | "no-process-env": "off", 17 | "no-process-exit": "off", 18 | "no-console": "off" 19 | } 20 | } 21 | ]; 22 | -------------------------------------------------------------------------------- /lib/utils.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | exports.stripLeadingAndTrailingASCIIWhitespace = string => { 4 | return string.replace(/^[ \t\n\f\r]+/u, "").replace(/[ \t\n\f\r]+$/u, ""); 5 | }; 6 | 7 | exports.isomorphicDecode = input => { 8 | return Array.from(input, byte => String.fromCodePoint(byte)).join(""); 9 | }; 10 | 11 | exports.forgivingBase64Decode = data => { 12 | let asString; 13 | try { 14 | asString = atob(data); 15 | } catch { 16 | return null; 17 | } 18 | 19 | return Uint8Array.from(asString, c => c.codePointAt(0)); 20 | }; 21 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | on: 3 | pull_request: 4 | branches: [main] 5 | push: 6 | branches: [main] 7 | jobs: 8 | build: 9 | name: Lint and tests 10 | runs-on: ubuntu-latest 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | node-version: 15 | - 20 16 | - 22 17 | - latest 18 | steps: 19 | - uses: actions/checkout@v5 20 | - uses: actions/setup-node@v5 21 | with: 22 | node-version: ${{ matrix.node-version }} 23 | - run: npm ci 24 | - run: npm run lint 25 | - run: npm test 26 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright © Domenic Denicola 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /scripts/get-latest-platform-tests.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | if (process.env.NO_UPDATE) { 4 | process.exit(0); 5 | } 6 | 7 | const path = require("node:path"); 8 | const fs = require("node:fs/promises"); 9 | 10 | // Pin to specific version, reflecting the spec version in the readme. 11 | // 12 | // To get the latest commit: 13 | // 1. Go to https://github.com/web-platform-tests/wpt/tree/master/fetch/data-urls 14 | // 2. Press "y" on your keyboard to get a permalink 15 | // 3. Copy the commit hash 16 | const commitHash = "d9d78543960a04ea8ad8f1aa3c7536b6a9a87d9a"; 17 | 18 | const urlPrefix = `https://raw.githubusercontent.com/w3c/web-platform-tests/${commitHash}` + 19 | `/fetch/data-urls/resources/`; 20 | 21 | const files = ["base64.json", "data-urls.json"]; 22 | 23 | async function main() { 24 | await Promise.all(files.map(async file => { 25 | const url = urlPrefix + file; 26 | const targetFile = path.resolve(__dirname, "..", "test", "web-platform-tests", file); 27 | 28 | const res = await fetch(url); 29 | await fs.writeFile(targetFile, res.body); 30 | })); 31 | } 32 | 33 | main().catch(e => { 34 | console.error(e.stack); 35 | process.exit(1); 36 | }); 37 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "data-urls", 3 | "description": "Parses data: URLs", 4 | "keywords": [ 5 | "data url", 6 | "data uri", 7 | "data:", 8 | "http", 9 | "fetch", 10 | "whatwg" 11 | ], 12 | "version": "6.0.0", 13 | "author": "Domenic Denicola (https://domenic.me/)", 14 | "license": "MIT", 15 | "repository": { 16 | "type": "git", 17 | "url": "git+https://github.com/jsdom/data-urls.git" 18 | }, 19 | "main": "lib/parser.js", 20 | "files": [ 21 | "lib/" 22 | ], 23 | "scripts": { 24 | "test": "node --test", 25 | "coverage": "c8 node --test --experimental-test-coverage", 26 | "lint": "eslint .", 27 | "pretest": "node scripts/get-latest-platform-tests.js" 28 | }, 29 | "dependencies": { 30 | "whatwg-mimetype": "^4.0.0", 31 | "whatwg-url": "^15.0.0" 32 | }, 33 | "devDependencies": { 34 | "@domenic/eslint-config": "^4.0.1", 35 | "c8": "^10.1.3", 36 | "eslint": "^9.35.0", 37 | "globals": "^16.4.0" 38 | }, 39 | "engines": { 40 | "node": ">=20" 41 | }, 42 | "c8": { 43 | "reporter": [ 44 | "text", 45 | "html" 46 | ], 47 | "exclude": [ 48 | "scripts/", 49 | "test/" 50 | ] 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /test/web-platform.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | const { describe, test } = require("node:test"); 3 | const assert = require("node:assert"); 4 | const base64TestCases = require("./web-platform-tests/base64.json"); 5 | const dataURLsTestCases = require("./web-platform-tests/data-urls.json"); 6 | const parse = require("../lib/parser.js"); 7 | 8 | describe("base64.json", () => { 9 | for (const [input, expectedBodyBytes] of base64TestCases) { 10 | const dataURL = `data:;base64,${input}`; 11 | test(dataURL, () => { 12 | const result = parse(dataURL); 13 | 14 | if (expectedBodyBytes === null) { 15 | assert.equal(result, null); 16 | } else { 17 | assert.equal(result.mimeType.type, "text"); 18 | assert.equal(result.mimeType.subtype, "plain"); 19 | assert.equal(result.mimeType.parameters.size, 1); 20 | assert.equal(result.mimeType.parameters.get("charset"), "US-ASCII"); 21 | 22 | assert.equal(result.body.constructor, Uint8Array); 23 | assert.deepEqual(result.body, Uint8Array.from(expectedBodyBytes)); 24 | } 25 | }); 26 | } 27 | }); 28 | 29 | describe("data-urls.json", () => { 30 | for (const [dataURL, expectedMIMEType, expectedBodyBytes] of dataURLsTestCases) { 31 | test(dataURL, () => { 32 | const result = parse(dataURL); 33 | 34 | if (expectedMIMEType === null) { 35 | assert.equal(result, null); 36 | } else { 37 | assert.equal(result.mimeType.toString(), expectedMIMEType); 38 | 39 | assert.equal(result.body.constructor, Uint8Array); 40 | assert.deepEqual(result.body, Uint8Array.from(expectedBodyBytes)); 41 | } 42 | }); 43 | } 44 | }); 45 | -------------------------------------------------------------------------------- /lib/parser.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | const MIMEType = require("whatwg-mimetype"); 3 | const { parseURL, serializeURL, percentDecodeString } = require("whatwg-url"); 4 | const { stripLeadingAndTrailingASCIIWhitespace, isomorphicDecode, forgivingBase64Decode } = require("./utils.js"); 5 | 6 | module.exports = stringInput => { 7 | const urlRecord = parseURL(stringInput); 8 | 9 | if (urlRecord === null) { 10 | return null; 11 | } 12 | 13 | return module.exports.fromURLRecord(urlRecord); 14 | }; 15 | 16 | module.exports.fromURLRecord = urlRecord => { 17 | if (urlRecord.scheme !== "data") { 18 | return null; 19 | } 20 | 21 | const input = serializeURL(urlRecord, true).substring("data:".length); 22 | 23 | let position = 0; 24 | 25 | let mimeType = ""; 26 | while (position < input.length && input[position] !== ",") { 27 | mimeType += input[position]; 28 | ++position; 29 | } 30 | mimeType = stripLeadingAndTrailingASCIIWhitespace(mimeType); 31 | 32 | if (position === input.length) { 33 | return null; 34 | } 35 | 36 | ++position; 37 | 38 | const encodedBody = input.substring(position); 39 | 40 | let body = percentDecodeString(encodedBody); 41 | 42 | // Can't use /i regexp flag because it isn't restricted to ASCII. 43 | const mimeTypeBase64MatchResult = /(.*); *[Bb][Aa][Ss][Ee]64$/u.exec(mimeType); 44 | if (mimeTypeBase64MatchResult) { 45 | const stringBody = isomorphicDecode(body); 46 | body = forgivingBase64Decode(stringBody); 47 | 48 | if (body === null) { 49 | return null; 50 | } 51 | mimeType = mimeTypeBase64MatchResult[1]; 52 | } 53 | 54 | if (mimeType.startsWith(";")) { 55 | mimeType = `text/plain${mimeType}`; 56 | } 57 | 58 | let mimeTypeRecord; 59 | try { 60 | mimeTypeRecord = new MIMEType(mimeType); 61 | } catch { 62 | mimeTypeRecord = new MIMEType("text/plain;charset=US-ASCII"); 63 | } 64 | 65 | return { 66 | mimeType: mimeTypeRecord, 67 | body 68 | }; 69 | }; 70 | -------------------------------------------------------------------------------- /test/api.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | const { describe, it } = require("node:test"); 3 | const assert = require("node:assert"); 4 | const parseDataURL = require(".."); 5 | /* 6 | eslint 7 | array-bracket-newline: ["error", "consistent"] 8 | array-element-newline: "off" 9 | */ 10 | 11 | describe("Smoke tests via README examples", () => { 12 | it("should parse no-type as expected", () => { 13 | const textExample = parseDataURL("data:,Hello%2C%20World!"); 14 | assert.equal(textExample.mimeType.toString(), "text/plain;charset=US-ASCII"); 15 | assert.equal(textExample.body.constructor, Uint8Array); 16 | assert.deepEqual(textExample.body, new Uint8Array([72, 101, 108, 108, 111, 44, 32, 87, 111, 114, 108, 100, 33])); 17 | }); 18 | 19 | it("should parse text/html as expected", () => { 20 | const htmlExample = parseDataURL("data:text/html,%3Ch1%3EHello%2C%20World!%3C%2Fh1%3E"); 21 | assert.equal(htmlExample.mimeType.toString(), "text/html"); 22 | assert.equal(htmlExample.body.constructor, Uint8Array); 23 | assert.deepEqual(htmlExample.body, new Uint8Array([ 24 | 60, 104, 49, 62, 72, 101, 108, 108, 111, 44, 32, 87, 111, 114, 108, 100, 33, 60, 47, 104, 49, 62 25 | ])); 26 | }); 27 | 28 | it("should parse img/png base64 as expected", () => { 29 | const pngExample = parseDataURL("" + 30 | "ANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4" + 31 | "//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU" + 32 | "5ErkJggg=="); 33 | assert.equal(pngExample.mimeType.toString(), "image/png"); 34 | assert.equal(pngExample.body.constructor, Uint8Array); 35 | assert.deepEqual(pngExample.body, new Uint8Array([ 36 | 137, 80, 78, 71, 13, 10, 26, 10, 0, 0, 0, 13, 37 | 73, 72, 68, 82, 0, 0, 0, 5, 0, 0, 0, 5, 38 | 8, 6, 0, 0, 0, 141, 111, 38, 229, 0, 0, 0, 39 | 28, 73, 68, 65, 84, 8, 215, 99, 248, 255, 255, 63, 40 | 195, 127, 6, 32, 5, 195, 32, 18, 132, 208, 49, 241, 41 | 130, 88, 205, 4, 0, 14, 245, 53, 203, 209, 142, 14, 42 | 31, 0, 0, 0, 0, 73, 69, 78, 68, 174, 66, 96, 43 | 130 44 | ])); 45 | }); 46 | }); 47 | 48 | describe("Additional coverage", () => { 49 | it("should return null for non-data: URLs", () => { 50 | assert.equal(parseDataURL("https://example.com/"), null); 51 | }); 52 | }); 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parse `data:` URLs 2 | 3 | This package helps you parse `data:` URLs [according to the WHATWG Fetch Standard](https://fetch.spec.whatwg.org/#data-urls): 4 | 5 | ```js 6 | const parseDataURL = require("data-urls"); 7 | 8 | const textExample = parseDataURL("data:,Hello%2C%20World!"); 9 | console.log(textExample.mimeType.toString()); // "text/plain;charset=US-ASCII" 10 | console.log(textExample.body); // Uint8Array(13) [ 72, 101, 108, 108, 111, 44, … ] 11 | 12 | const htmlExample = parseDataURL("data:text/html,%3Ch1%3EHello%2C%20World!%3C%2Fh1%3E"); 13 | console.log(htmlExample.mimeType.toString()); // "text/html" 14 | console.log(htmlExample.body); // Uint8Array(22) [ 60, 104, 49, 62, 72, 101, … ] 15 | 16 | const pngExample = parseDataURL("" + 17 | "ANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4" + 18 | "//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU" + 19 | "5ErkJggg=="); 20 | console.log(pngExample.mimeType.toString()); // "image/png" 21 | console.log(pngExample.body); // Uint8Array(85) [ 137, 80, 78, 71, 13, 10, … ] 22 | ``` 23 | 24 | ## API 25 | 26 | This package's main module's default export is a function that accepts a string and returns a `{ mimeType, body }` object, or `null` if the result cannot be parsed as a `data:` URL. 27 | 28 | - The `mimeType` property is an instance of [whatwg-mimetype](https://www.npmjs.com/package/whatwg-mimetype)'s `MIMEType` class. 29 | - The `body` property is a `Uint8Array` instance. 30 | 31 | As shown in the examples above, you can easily get a stringified version of the MIME type using its `toString()` method. Read on for more on getting the stringified version of the body. 32 | 33 | ### Decoding the body 34 | 35 | To decode the body bytes of a parsed data URL, you'll need to use the `charset` parameter of the MIME type, if any. This contains an encoding [label](https://encoding.spec.whatwg.org/#label); there are [various possible labels](https://encoding.spec.whatwg.org/#names-and-labels) for a given encoding. We suggest using the [whatwg-encoding](https://www.npmjs.com/package/whatwg-encoding) package as follows: 36 | 37 | ```js 38 | const parseDataURL = require("data-urls"); 39 | const { labelToName, decode } = require("whatwg-encoding"); 40 | 41 | const dataURL = parseDataURL(arbitraryString); 42 | 43 | // If there's no charset parameter, let's just hope it's UTF-8; that seems like a good guess. 44 | const encodingName = labelToName(dataURL.mimeType.parameters.get("charset") || "utf-8"); 45 | const bodyDecoded = decode(dataURL.body, encodingName); 46 | ``` 47 | 48 | This is especially important since the default, if no parseable MIME type is given, is "US-ASCII", [aka windows-1252](https://encoding.spec.whatwg.org/#names-and-labels), not UTF-8 like you might asume. So for example given an `arbitraryString` of `"data:,Héllo!"`, the above code snippet will correctly produce a `bodyDecoded` of `"Héllo!"` by using the windows-1252 decoder, whereas if you used a UTF-8 decoder you'd get back `"Héllo!"`. 49 | 50 | ### Advanced functionality: parsing from a URL record 51 | 52 | If you are using the [whatwg-url](https://github.com/jsdom/whatwg-url) package, you may already have a "URL record" object on hand, as produced by that package's `parseURL` export. In that case, you can use this package's `fromURLRecord` export to save a bit of work: 53 | 54 | ```js 55 | const { parseURL } = require("whatwg-url"); 56 | const dataURLFromURLRecord = require("data-urls").fromURLRecord; 57 | 58 | const urlRecord = parseURL("data:,Hello%2C%20World!"); 59 | const dataURL = dataURLFromURLRecord(urlRecord); 60 | ``` 61 | 62 | In practice, we expect this functionality only to be used by consumers like [jsdom](https://www.npmjs.com/package/jsdom), which are using these packages at a very low level. 63 | --------------------------------------------------------------------------------