├── .gitignore ├── rollup.config.mjs ├── .eslintrc.yml ├── index.mjs ├── .github ├── dependabot.yml └── workflows │ └── ci.yml ├── test ├── cjs.js ├── format.mjs ├── parse.mjs ├── encode.mjs ├── decode.mjs └── fixtures │ └── url.mjs ├── CHANGELOG.md ├── lib ├── format.mjs ├── encode.mjs ├── decode.mjs └── parse.mjs ├── package.json ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | coverage/ 3 | build/ 4 | -------------------------------------------------------------------------------- /rollup.config.mjs: -------------------------------------------------------------------------------- 1 | export default [ 2 | { 3 | input: 'index.mjs', 4 | output: { 5 | file: 'build/index.cjs.js', 6 | format: 'cjs' 7 | } 8 | } 9 | ] 10 | -------------------------------------------------------------------------------- /.eslintrc.yml: -------------------------------------------------------------------------------- 1 | extends: standard 2 | 3 | ignorePatterns: 4 | - build/ 5 | 6 | overrides: 7 | - 8 | files: [ 'index.mjs', 'lib/**/*.mjs' ] 9 | - 10 | files: [ 'test/**/*.mjs' ] 11 | env: 12 | mocha: true -------------------------------------------------------------------------------- /index.mjs: -------------------------------------------------------------------------------- 1 | import decode from './lib/decode.mjs' 2 | import encode from './lib/encode.mjs' 3 | import format from './lib/format.mjs' 4 | import parse from './lib/parse.mjs' 5 | 6 | export { 7 | decode, 8 | encode, 9 | format, 10 | parse 11 | } 12 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: / 5 | schedule: 6 | interval: daily 7 | 8 | - package-ecosystem: npm 9 | directory: / 10 | schedule: 11 | interval: daily 12 | allow: 13 | - dependency-type: production 14 | -------------------------------------------------------------------------------- /test/cjs.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | /* eslint-env mocha */ 3 | 4 | const mdurl = require('../') 5 | const assert = require('assert') 6 | 7 | describe('CJS', () => { 8 | it('require', () => { 9 | assert.ok(mdurl.parse) 10 | assert.ok(mdurl.format) 11 | assert.ok(mdurl.encode) 12 | assert.ok(mdurl.decode) 13 | }) 14 | }) 15 | -------------------------------------------------------------------------------- /test/format.mjs: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert' 2 | import { parse, format } from '../index.mjs' 3 | import fixtures from './fixtures/url.mjs' 4 | 5 | describe('format', () => { 6 | Object.keys(fixtures).forEach(url => { 7 | it(url, () => { 8 | const parsed = parse(url) 9 | assert.strictEqual(format(parsed), url) 10 | }) 11 | }) 12 | }) 13 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2.0.0 / 2023-12-01 2 | ------------------ 3 | 4 | - Rewrite to ESM. 5 | 6 | 7 | 1.0.1 / 2015-09-15 8 | ------------------ 9 | 10 | - Fixed closure compiler compatibility (#1). 11 | 12 | 13 | 1.0.0 / 2015-03-04 14 | ------------------ 15 | 16 | - Added `.decode()`, `.parse()`, `.format()`. 17 | 18 | 19 | 0.0.1 / 2015-03-02 20 | ------------------ 21 | 22 | - First release. 23 | -------------------------------------------------------------------------------- /test/parse.mjs: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert' 2 | import { parse } from '../index.mjs' 3 | import fixtures from './fixtures/url.mjs' 4 | 5 | describe('parse', () => { 6 | Object.keys(fixtures).forEach(function (url) { 7 | it(url, () => { 8 | const parsed = parse(url) 9 | 10 | Object.keys(parsed).forEach(function (x) { 11 | if (parsed[x] === null) { delete parsed[x] } 12 | }) 13 | 14 | assert.deepEqual(parsed, fixtures[url]) 15 | }) 16 | }) 17 | }) 18 | -------------------------------------------------------------------------------- /lib/format.mjs: -------------------------------------------------------------------------------- 1 | export default function format (url) { 2 | let result = '' 3 | 4 | result += url.protocol || '' 5 | result += url.slashes ? '//' : '' 6 | result += url.auth ? url.auth + '@' : '' 7 | 8 | if (url.hostname && url.hostname.indexOf(':') !== -1) { 9 | // ipv6 address 10 | result += '[' + url.hostname + ']' 11 | } else { 12 | result += url.hostname || '' 13 | } 14 | 15 | result += url.port ? ':' + url.port : '' 16 | result += url.pathname || '' 17 | result += url.search || '' 18 | result += url.hash || '' 19 | 20 | return result 21 | }; 22 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: '0 0 * * 3' 8 | 9 | jobs: 10 | test: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | strategy: 15 | matrix: 16 | node-version: [ '18' ] 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - name: Use Node.js ${{ matrix.node-version }} 22 | uses: actions/setup-node@v4 23 | with: 24 | node-version: ${{ matrix.node-version }} 25 | 26 | - run: npm install 27 | 28 | - name: Test 29 | run: npm test 30 | 31 | - name: Upload coverage report to coveralls.io 32 | uses: coverallsapp/github-action@master 33 | with: 34 | github-token: ${{ secrets.GITHUB_TOKEN }} 35 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mdurl", 3 | "version": "2.0.0", 4 | "description": "URL utilities for markdown-it", 5 | "repository": "markdown-it/mdurl", 6 | "license": "MIT", 7 | "main": "build/index.cjs.js", 8 | "module": "index.mjs", 9 | "exports": { 10 | ".": { 11 | "require": "./build/index.cjs.js", 12 | "import": "./index.mjs" 13 | }, 14 | "./*": { 15 | "require": "./*", 16 | "import": "./*" 17 | } 18 | }, 19 | "scripts": { 20 | "lint": "eslint .", 21 | "build": "rollup -c", 22 | "test": "npm run lint && npm run build && c8 --exclude build --exclude test -r text -r html -r lcov mocha", 23 | "prepublishOnly": "npm run lint && npm run build" 24 | }, 25 | "files": [ 26 | "index.mjs", 27 | "lib/", 28 | "build/" 29 | ], 30 | "devDependencies": { 31 | "c8": "^8.0.1", 32 | "eslint": "^8.54.0", 33 | "eslint-config-standard": "^17.1.0", 34 | "mocha": "^10.2.0", 35 | "rollup": "^4.6.1" 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /test/encode.mjs: -------------------------------------------------------------------------------- 1 | import { strictEqual as equals } from 'node:assert' 2 | import { encode } from '../index.mjs' 3 | 4 | describe('encode', () => { 5 | it('should encode percent', () => { 6 | equals(encode('%%%'), '%25%25%25') 7 | }) 8 | 9 | it('should encode control chars', () => { 10 | equals(encode('\r\n'), '%0D%0A') 11 | }) 12 | 13 | it('should not encode parts of an url', () => { 14 | equals(encode('?#'), '?#') 15 | }) 16 | 17 | it('should not encode []^ - commonmark tests', () => { 18 | equals(encode('[]^'), '%5B%5D%5E') 19 | }) 20 | 21 | it('should encode spaces', () => { 22 | equals(encode('my url'), 'my%20url') 23 | }) 24 | 25 | it('should encode unicode', () => { 26 | equals(encode('φου'), '%CF%86%CE%BF%CF%85') 27 | }) 28 | 29 | it('should encode % if it doesn\'t start a valid escape seq', () => { 30 | equals(encode('%FG'), '%25FG') 31 | }) 32 | 33 | it('should preserve non-utf8 encoded characters', () => { 34 | equals(encode('%00%FF'), '%00%FF') 35 | }) 36 | 37 | it('should encode characters on the cache borders', () => { 38 | // protects against off-by-one in cache implementation 39 | equals(encode('\x00\x7F\x80'), '%00%7F%C2%80') 40 | }) 41 | 42 | describe('arguments', () => { 43 | it('encode(string, unescapedSet)', () => { 44 | equals(encode('!@#$', '@$'), '%21@%23$') 45 | }) 46 | 47 | it('encode(string, keepEscaped=true)', () => { 48 | equals(encode('%20%2G', true), '%20%252G') 49 | }) 50 | 51 | it('encode(string, keepEscaped=false)', () => { 52 | equals(encode('%20%2G', false), '%2520%252G') 53 | }) 54 | 55 | it('encode(string, unescapedSet, keepEscaped)', () => { 56 | equals(encode('!@%25', '@', false), '%21@%2525') 57 | }) 58 | }) 59 | 60 | describe('surrogates', () => { 61 | it('bad surrogates (high)', () => { 62 | equals(encode('\uD800foo'), '%EF%BF%BDfoo') 63 | equals(encode('foo\uD800'), 'foo%EF%BF%BD') 64 | }) 65 | 66 | it('bad surrogates (low)', () => { 67 | equals(encode('\uDD00foo'), '%EF%BF%BDfoo') 68 | equals(encode('foo\uDD00'), 'foo%EF%BF%BD') 69 | }) 70 | 71 | it('valid one', () => { 72 | equals(encode('\uD800\uDD00'), '%F0%90%84%80') 73 | }) 74 | }) 75 | }) 76 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Vitaly Puzrin, Alex Kocharin. 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- 25 | 26 | .parse() is based on Joyent's node.js `url` code: 27 | 28 | Copyright Joyent, Inc. and other Node contributors. All rights reserved. 29 | Permission is hereby granted, free of charge, to any person obtaining a copy 30 | of this software and associated documentation files (the "Software"), to 31 | deal in the Software without restriction, including without limitation the 32 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 33 | sell copies of the Software, and to permit persons to whom the Software is 34 | furnished to do so, subject to the following conditions: 35 | 36 | The above copyright notice and this permission notice shall be included in 37 | all copies or substantial portions of the Software. 38 | 39 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 40 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 41 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 42 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 43 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 44 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 45 | IN THE SOFTWARE. 46 | -------------------------------------------------------------------------------- /lib/encode.mjs: -------------------------------------------------------------------------------- 1 | const encodeCache = {} 2 | 3 | // Create a lookup array where anything but characters in `chars` string 4 | // and alphanumeric chars is percent-encoded. 5 | // 6 | function getEncodeCache (exclude) { 7 | let cache = encodeCache[exclude] 8 | if (cache) { return cache } 9 | 10 | cache = encodeCache[exclude] = [] 11 | 12 | for (let i = 0; i < 128; i++) { 13 | const ch = String.fromCharCode(i) 14 | 15 | if (/^[0-9a-z]$/i.test(ch)) { 16 | // always allow unencoded alphanumeric characters 17 | cache.push(ch) 18 | } else { 19 | cache.push('%' + ('0' + i.toString(16).toUpperCase()).slice(-2)) 20 | } 21 | } 22 | 23 | for (let i = 0; i < exclude.length; i++) { 24 | cache[exclude.charCodeAt(i)] = exclude[i] 25 | } 26 | 27 | return cache 28 | } 29 | 30 | // Encode unsafe characters with percent-encoding, skipping already 31 | // encoded sequences. 32 | // 33 | // - string - string to encode 34 | // - exclude - list of characters to ignore (in addition to a-zA-Z0-9) 35 | // - keepEscaped - don't encode '%' in a correct escape sequence (default: true) 36 | // 37 | function encode (string, exclude, keepEscaped) { 38 | if (typeof exclude !== 'string') { 39 | // encode(string, keepEscaped) 40 | keepEscaped = exclude 41 | exclude = encode.defaultChars 42 | } 43 | 44 | if (typeof keepEscaped === 'undefined') { 45 | keepEscaped = true 46 | } 47 | 48 | const cache = getEncodeCache(exclude) 49 | let result = '' 50 | 51 | for (let i = 0, l = string.length; i < l; i++) { 52 | const code = string.charCodeAt(i) 53 | 54 | if (keepEscaped && code === 0x25 /* % */ && i + 2 < l) { 55 | if (/^[0-9a-f]{2}$/i.test(string.slice(i + 1, i + 3))) { 56 | result += string.slice(i, i + 3) 57 | i += 2 58 | continue 59 | } 60 | } 61 | 62 | if (code < 128) { 63 | result += cache[code] 64 | continue 65 | } 66 | 67 | if (code >= 0xD800 && code <= 0xDFFF) { 68 | if (code >= 0xD800 && code <= 0xDBFF && i + 1 < l) { 69 | const nextCode = string.charCodeAt(i + 1) 70 | if (nextCode >= 0xDC00 && nextCode <= 0xDFFF) { 71 | result += encodeURIComponent(string[i] + string[i + 1]) 72 | i++ 73 | continue 74 | } 75 | } 76 | result += '%EF%BF%BD' 77 | continue 78 | } 79 | 80 | result += encodeURIComponent(string[i]) 81 | } 82 | 83 | return result 84 | } 85 | 86 | encode.defaultChars = ";/?:@&=+$,-_.!~*'()#" 87 | encode.componentChars = "-_.!~*'()" 88 | 89 | export default encode 90 | -------------------------------------------------------------------------------- /lib/decode.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-bitwise */ 2 | 3 | const decodeCache = {} 4 | 5 | function getDecodeCache (exclude) { 6 | let cache = decodeCache[exclude] 7 | if (cache) { return cache } 8 | 9 | cache = decodeCache[exclude] = [] 10 | 11 | for (let i = 0; i < 128; i++) { 12 | const ch = String.fromCharCode(i) 13 | cache.push(ch) 14 | } 15 | 16 | for (let i = 0; i < exclude.length; i++) { 17 | const ch = exclude.charCodeAt(i) 18 | cache[ch] = '%' + ('0' + ch.toString(16).toUpperCase()).slice(-2) 19 | } 20 | 21 | return cache 22 | } 23 | 24 | // Decode percent-encoded string. 25 | // 26 | function decode (string, exclude) { 27 | if (typeof exclude !== 'string') { 28 | exclude = decode.defaultChars 29 | } 30 | 31 | const cache = getDecodeCache(exclude) 32 | 33 | return string.replace(/(%[a-f0-9]{2})+/gi, function (seq) { 34 | let result = '' 35 | 36 | for (let i = 0, l = seq.length; i < l; i += 3) { 37 | const b1 = parseInt(seq.slice(i + 1, i + 3), 16) 38 | 39 | if (b1 < 0x80) { 40 | result += cache[b1] 41 | continue 42 | } 43 | 44 | if ((b1 & 0xE0) === 0xC0 && (i + 3 < l)) { 45 | // 110xxxxx 10xxxxxx 46 | const b2 = parseInt(seq.slice(i + 4, i + 6), 16) 47 | 48 | if ((b2 & 0xC0) === 0x80) { 49 | const chr = ((b1 << 6) & 0x7C0) | (b2 & 0x3F) 50 | 51 | if (chr < 0x80) { 52 | result += '\ufffd\ufffd' 53 | } else { 54 | result += String.fromCharCode(chr) 55 | } 56 | 57 | i += 3 58 | continue 59 | } 60 | } 61 | 62 | if ((b1 & 0xF0) === 0xE0 && (i + 6 < l)) { 63 | // 1110xxxx 10xxxxxx 10xxxxxx 64 | const b2 = parseInt(seq.slice(i + 4, i + 6), 16) 65 | const b3 = parseInt(seq.slice(i + 7, i + 9), 16) 66 | 67 | if ((b2 & 0xC0) === 0x80 && (b3 & 0xC0) === 0x80) { 68 | const chr = ((b1 << 12) & 0xF000) | ((b2 << 6) & 0xFC0) | (b3 & 0x3F) 69 | 70 | if (chr < 0x800 || (chr >= 0xD800 && chr <= 0xDFFF)) { 71 | result += '\ufffd\ufffd\ufffd' 72 | } else { 73 | result += String.fromCharCode(chr) 74 | } 75 | 76 | i += 6 77 | continue 78 | } 79 | } 80 | 81 | if ((b1 & 0xF8) === 0xF0 && (i + 9 < l)) { 82 | // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 83 | const b2 = parseInt(seq.slice(i + 4, i + 6), 16) 84 | const b3 = parseInt(seq.slice(i + 7, i + 9), 16) 85 | const b4 = parseInt(seq.slice(i + 10, i + 12), 16) 86 | 87 | if ((b2 & 0xC0) === 0x80 && (b3 & 0xC0) === 0x80 && (b4 & 0xC0) === 0x80) { 88 | let chr = ((b1 << 18) & 0x1C0000) | ((b2 << 12) & 0x3F000) | ((b3 << 6) & 0xFC0) | (b4 & 0x3F) 89 | 90 | if (chr < 0x10000 || chr > 0x10FFFF) { 91 | result += '\ufffd\ufffd\ufffd\ufffd' 92 | } else { 93 | chr -= 0x10000 94 | result += String.fromCharCode(0xD800 + (chr >> 10), 0xDC00 + (chr & 0x3FF)) 95 | } 96 | 97 | i += 9 98 | continue 99 | } 100 | } 101 | 102 | result += '\ufffd' 103 | } 104 | 105 | return result 106 | }) 107 | } 108 | 109 | decode.defaultChars = ';/?:@&=+$,#' 110 | decode.componentChars = '' 111 | 112 | export default decode 113 | -------------------------------------------------------------------------------- /test/decode.mjs: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert' 2 | import { decode } from '../index.mjs' 3 | 4 | function encodeBinary (str) { 5 | let result = '' 6 | 7 | str = str.replace(/\s+/g, '') 8 | while (str.length) { 9 | result = '%' + ('0' + parseInt(str.slice(-8), 2).toString(16)).slice(-2) + result 10 | str = str.slice(0, -8) 11 | } 12 | 13 | return result 14 | } 15 | 16 | const samples = { 17 | '00000000': true, 18 | '01010101': true, 19 | '01111111': true, 20 | 21 | // invalid as 1st byte 22 | 10000000: true, 23 | 10111111: true, 24 | 25 | // invalid sequences, 2nd byte should be >= 0x80 26 | '11000111 01010101': false, 27 | '11100011 01010101': false, 28 | '11110001 01010101': false, 29 | 30 | // invalid sequences, 2nd byte should be < 0xc0 31 | '11000111 11000000': false, 32 | '11100011 11000000': false, 33 | '11110001 11000000': false, 34 | 35 | // invalid 3rd byte 36 | '11100011 10010101 01010101': false, 37 | '11110001 10010101 01010101': false, 38 | 39 | // invalid 4th byte 40 | '11110001 10010101 10010101 01010101': false, 41 | 42 | // valid sequences 43 | '11000111 10101010': true, 44 | '11100011 10101010 10101010': true, 45 | '11110001 10101010 10101010 10101010': true, 46 | 47 | // minimal chars with given length 48 | '11000010 10000000': true, 49 | '11100000 10100000 10000000': true, 50 | 51 | // impossible sequences 52 | '11000001 10111111': false, 53 | '11100000 10011111 10111111': false, 54 | '11000001 10000000': false, 55 | '11100000 10010000 10000000': false, 56 | 57 | // maximum chars with given length 58 | '11011111 10111111': true, 59 | '11101111 10111111 10111111': true, 60 | 61 | '11110000 10010000 10000000 10000000': true, 62 | '11110000 10010000 10001111 10001111': true, 63 | '11110100 10001111 10110000 10000000': true, 64 | '11110100 10001111 10111111 10111111': true, 65 | 66 | // too low 67 | '11110000 10001111 10111111 10111111': false, 68 | 69 | // too high 70 | '11110100 10010000 10000000 10000000': false, 71 | '11110100 10011111 10111111 10111111': false, 72 | 73 | // surrogate range 74 | '11101101 10011111 10111111': true, 75 | '11101101 10100000 10000000': false, 76 | '11101101 10111111 10111111': false, 77 | '11101110 10000000 10000000': true 78 | } 79 | 80 | describe('decode', () => { 81 | it('should decode %xx', () => { 82 | assert.equal(decode('x%20xx%20%2520'), 'x xx %20') 83 | }) 84 | 85 | it('should not decode invalid sequences', () => { 86 | assert.equal(decode('%2g%z1%%'), '%2g%z1%%') 87 | }) 88 | 89 | it('should not decode reservedSet', () => { 90 | assert.equal(decode('%20%25%20', '%'), ' %25 ') 91 | assert.equal(decode('%20%25%20', ' '), '%20%%20') 92 | assert.equal(decode('%20%25%20', ' %'), '%20%25%20') 93 | }) 94 | 95 | describe('utf8', () => { 96 | Object.keys(samples).forEach(function (k) { 97 | it(k, () => { 98 | let res1, er 99 | 100 | const str = encodeBinary(k) 101 | 102 | try { 103 | res1 = decodeURIComponent(str) 104 | } catch (e) { 105 | er = e 106 | } 107 | 108 | const res2 = decode(str) 109 | 110 | if (er) { 111 | assert.notEqual(res2.indexOf('\ufffd'), -1) 112 | } else { 113 | assert.equal(res1, res2) 114 | assert.equal(res2.indexOf('\ufffd'), -1) 115 | } 116 | }) 117 | }) 118 | }) 119 | }) 120 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mdurl 2 | 3 | [![CI](https://github.com/markdown-it/mdurl/actions/workflows/ci.yml/badge.svg)](https://github.com/markdown-it/mdurl/actions/workflows/ci.yml) 4 | [![NPM version](https://img.shields.io/npm/v/mdurl.svg?style=flat)](https://www.npmjs.org/package/mdurl) 5 | 6 | > URL utilities for [markdown-it](https://github.com/markdown-it/markdown-it) parser. 7 | 8 | 9 | ## API 10 | 11 | ### .encode(str [, exclude, keepEncoded]) -> String 12 | 13 | Percent-encode a string, avoiding double encoding. Don't touch `/a-zA-Z0-9/` + 14 | excluded chars + `/%[a-fA-F0-9]{2}/` (if not disabled). Broken surrorates are 15 | replaced with `U+FFFD`. 16 | 17 | Params: 18 | 19 | - __str__ - input string. 20 | - __exclude__ - optional, `;/?:@&=+$,-_.!~*'()#`. Additional chars to keep intact 21 | (except `/a-zA-Z0-9/`). 22 | - __keepEncoded__ - optional, `true`. By default it skips already encoded sequences 23 | (`/%[a-fA-F0-9]{2}/`). If set to `false`, `%` will be encoded. 24 | 25 | 26 | ### encode.defaultChars, encode.componentChars 27 | 28 | You can use these constants as second argument to `encode` function. 29 | 30 | - `encode.defaultChars` is the same exclude set as in the standard `encodeURI()` function 31 | - `encode.componentChars` is the same exclude set as in the `encodeURIComponent()` function 32 | 33 | For example, `encode('something', encode.componentChars, true)` is roughly the equivalent of 34 | the `encodeURIComponent()` function (except `encode()` doesn't throw). 35 | 36 | 37 | ### .decode(str [, exclude]) -> String 38 | 39 | Decode percent-encoded string. Invalid percent-encoded sequences (e.g. `%2G`) 40 | are left as is. Invalid UTF-8 characters are replaced with `U+FFFD`. 41 | 42 | 43 | Params: 44 | 45 | - __str__ - input string. 46 | - __exclude__ - set of characters to leave encoded, optional, `;/?:@&=+$,#`. 47 | 48 | 49 | ### decode.defaultChars, decode.componentChars 50 | 51 | You can use these constants as second argument to `decode` function. 52 | 53 | - `decode.defaultChars` is the same exclude set as in the standard `decodeURI()` function 54 | - `decode.componentChars` is the same exclude set as in the `decodeURIComponent()` function 55 | 56 | For example, `decode('something', decode.defaultChars)` has the same behavior as 57 | `decodeURI('something')` on a correctly encoded input. 58 | 59 | 60 | ### .parse(url, slashesDenoteHost) -> urlObs 61 | 62 | Parse url string. Similar to node's [url.parse](http://nodejs.org/api/url.html#url_url_parse_urlstr_parsequerystring_slashesdenotehost), but without any 63 | normalizations and query string parse. 64 | 65 | - __url__ - input url (string) 66 | - __slashesDenoteHost__ - if url starts with `//`, expect a hostname after it. Optional, `false`. 67 | 68 | Result (hash): 69 | 70 | - protocol 71 | - slashes 72 | - auth 73 | - port 74 | - hostname 75 | - hash 76 | - search 77 | - pathname 78 | 79 | Difference with node's `url`: 80 | 81 | 1. No leading slash in paths, e.g. in `url.parse('http://foo?bar')` pathname is 82 | ``, not `/` 83 | 2. Backslashes are not replaced with slashes, so `http:\\example.org\` is 84 | treated like a relative path 85 | 3. Trailing colon is treated like a part of the path, i.e. in 86 | `http://example.org:foo` pathname is `:foo` 87 | 4. Nothing is URL-encoded in the resulting object, (in joyent/node some chars 88 | in auth and paths are encoded) 89 | 5. `url.parse()` does not have `parseQueryString` argument 90 | 6. Removed extraneous result properties: `host`, `path`, `query`, etc., 91 | which can be constructed using other parts of the url. 92 | 93 | 94 | ### .format(urlObject) 95 | 96 | Format an object previously obtained with `.parse()` function. Similar to node's 97 | [url.format](http://nodejs.org/api/url.html#url_url_format_urlobj). 98 | 99 | 100 | ## License 101 | 102 | [MIT](https://github.com/markdown-it/mdurl/blob/master/LICENSE) 103 | -------------------------------------------------------------------------------- /lib/parse.mjs: -------------------------------------------------------------------------------- 1 | // Copyright Joyent, Inc. and other Node contributors. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a 4 | // copy of this software and associated documentation files (the 5 | // "Software"), to deal in the Software without restriction, including 6 | // without limitation the rights to use, copy, modify, merge, publish, 7 | // distribute, sublicense, and/or sell copies of the Software, and to permit 8 | // persons to whom the Software is furnished to do so, subject to the 9 | // following conditions: 10 | // 11 | // The above copyright notice and this permission notice shall be included 12 | // in all copies or substantial portions of the Software. 13 | // 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 17 | // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 18 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 | // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 | // USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | // 23 | // Changes from joyent/node: 24 | // 25 | // 1. No leading slash in paths, 26 | // e.g. in `url.parse('http://foo?bar')` pathname is ``, not `/` 27 | // 28 | // 2. Backslashes are not replaced with slashes, 29 | // so `http:\\example.org\` is treated like a relative path 30 | // 31 | // 3. Trailing colon is treated like a part of the path, 32 | // i.e. in `http://example.org:foo` pathname is `:foo` 33 | // 34 | // 4. Nothing is URL-encoded in the resulting object, 35 | // (in joyent/node some chars in auth and paths are encoded) 36 | // 37 | // 5. `url.parse()` does not have `parseQueryString` argument 38 | // 39 | // 6. Removed extraneous result properties: `host`, `path`, `query`, etc., 40 | // which can be constructed using other parts of the url. 41 | // 42 | 43 | function Url () { 44 | this.protocol = null 45 | this.slashes = null 46 | this.auth = null 47 | this.port = null 48 | this.hostname = null 49 | this.hash = null 50 | this.search = null 51 | this.pathname = null 52 | } 53 | 54 | // Reference: RFC 3986, RFC 1808, RFC 2396 55 | 56 | // define these here so at least they only have to be 57 | // compiled once on the first module load. 58 | const protocolPattern = /^([a-z0-9.+-]+:)/i 59 | const portPattern = /:[0-9]*$/ 60 | 61 | // Special case for a simple path URL 62 | /* eslint-disable-next-line no-useless-escape */ 63 | const simplePathPattern = /^(\/\/?(?!\/)[^\?\s]*)(\?[^\s]*)?$/ 64 | 65 | // RFC 2396: characters reserved for delimiting URLs. 66 | // We actually just auto-escape these. 67 | const delims = ['<', '>', '"', '`', ' ', '\r', '\n', '\t'] 68 | 69 | // RFC 2396: characters not allowed for various reasons. 70 | const unwise = ['{', '}', '|', '\\', '^', '`'].concat(delims) 71 | 72 | // Allowed by RFCs, but cause of XSS attacks. Always escape these. 73 | const autoEscape = ['\''].concat(unwise) 74 | // Characters that are never ever allowed in a hostname. 75 | // Note that any invalid chars are also handled, but these 76 | // are the ones that are *expected* to be seen, so we fast-path 77 | // them. 78 | const nonHostChars = ['%', '/', '?', ';', '#'].concat(autoEscape) 79 | const hostEndingChars = ['/', '?', '#'] 80 | const hostnameMaxLen = 255 81 | const hostnamePartPattern = /^[+a-z0-9A-Z_-]{0,63}$/ 82 | const hostnamePartStart = /^([+a-z0-9A-Z_-]{0,63})(.*)$/ 83 | // protocols that can allow "unsafe" and "unwise" chars. 84 | // protocols that never have a hostname. 85 | const hostlessProtocol = { 86 | javascript: true, 87 | 'javascript:': true 88 | } 89 | // protocols that always contain a // bit. 90 | const slashedProtocol = { 91 | http: true, 92 | https: true, 93 | ftp: true, 94 | gopher: true, 95 | file: true, 96 | 'http:': true, 97 | 'https:': true, 98 | 'ftp:': true, 99 | 'gopher:': true, 100 | 'file:': true 101 | } 102 | 103 | function urlParse (url, slashesDenoteHost) { 104 | if (url && url instanceof Url) return url 105 | 106 | const u = new Url() 107 | u.parse(url, slashesDenoteHost) 108 | return u 109 | } 110 | 111 | Url.prototype.parse = function (url, slashesDenoteHost) { 112 | let lowerProto, hec, slashes 113 | let rest = url 114 | 115 | // trim before proceeding. 116 | // This is to support parse stuff like " http://foo.com \n" 117 | rest = rest.trim() 118 | 119 | if (!slashesDenoteHost && url.split('#').length === 1) { 120 | // Try fast path regexp 121 | const simplePath = simplePathPattern.exec(rest) 122 | if (simplePath) { 123 | this.pathname = simplePath[1] 124 | if (simplePath[2]) { 125 | this.search = simplePath[2] 126 | } 127 | return this 128 | } 129 | } 130 | 131 | let proto = protocolPattern.exec(rest) 132 | if (proto) { 133 | proto = proto[0] 134 | lowerProto = proto.toLowerCase() 135 | this.protocol = proto 136 | rest = rest.substr(proto.length) 137 | } 138 | 139 | // figure out if it's got a host 140 | // user@server is *always* interpreted as a hostname, and url 141 | // resolution will treat //foo/bar as host=foo,path=bar because that's 142 | // how the browser resolves relative URLs. 143 | /* eslint-disable-next-line no-useless-escape */ 144 | if (slashesDenoteHost || proto || rest.match(/^\/\/[^@\/]+@[^@\/]+/)) { 145 | slashes = rest.substr(0, 2) === '//' 146 | if (slashes && !(proto && hostlessProtocol[proto])) { 147 | rest = rest.substr(2) 148 | this.slashes = true 149 | } 150 | } 151 | 152 | if (!hostlessProtocol[proto] && 153 | (slashes || (proto && !slashedProtocol[proto]))) { 154 | // there's a hostname. 155 | // the first instance of /, ?, ;, or # ends the host. 156 | // 157 | // If there is an @ in the hostname, then non-host chars *are* allowed 158 | // to the left of the last @ sign, unless some host-ending character 159 | // comes *before* the @-sign. 160 | // URLs are obnoxious. 161 | // 162 | // ex: 163 | // http://a@b@c/ => user:a@b host:c 164 | // http://a@b?@c => user:a host:c path:/?@c 165 | 166 | // v0.12 TODO(isaacs): This is not quite how Chrome does things. 167 | // Review our test case against browsers more comprehensively. 168 | 169 | // find the first instance of any hostEndingChars 170 | let hostEnd = -1 171 | for (let i = 0; i < hostEndingChars.length; i++) { 172 | hec = rest.indexOf(hostEndingChars[i]) 173 | if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) { 174 | hostEnd = hec 175 | } 176 | } 177 | 178 | // at this point, either we have an explicit point where the 179 | // auth portion cannot go past, or the last @ char is the decider. 180 | let auth, atSign 181 | if (hostEnd === -1) { 182 | // atSign can be anywhere. 183 | atSign = rest.lastIndexOf('@') 184 | } else { 185 | // atSign must be in auth portion. 186 | // http://a@b/c@d => host:b auth:a path:/c@d 187 | atSign = rest.lastIndexOf('@', hostEnd) 188 | } 189 | 190 | // Now we have a portion which is definitely the auth. 191 | // Pull that off. 192 | if (atSign !== -1) { 193 | auth = rest.slice(0, atSign) 194 | rest = rest.slice(atSign + 1) 195 | this.auth = auth 196 | } 197 | 198 | // the host is the remaining to the left of the first non-host char 199 | hostEnd = -1 200 | for (let i = 0; i < nonHostChars.length; i++) { 201 | hec = rest.indexOf(nonHostChars[i]) 202 | if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) { 203 | hostEnd = hec 204 | } 205 | } 206 | // if we still have not hit it, then the entire thing is a host. 207 | if (hostEnd === -1) { 208 | hostEnd = rest.length 209 | } 210 | 211 | if (rest[hostEnd - 1] === ':') { hostEnd-- } 212 | const host = rest.slice(0, hostEnd) 213 | rest = rest.slice(hostEnd) 214 | 215 | // pull out port. 216 | this.parseHost(host) 217 | 218 | // we've indicated that there is a hostname, 219 | // so even if it's empty, it has to be present. 220 | this.hostname = this.hostname || '' 221 | 222 | // if hostname begins with [ and ends with ] 223 | // assume that it's an IPv6 address. 224 | const ipv6Hostname = this.hostname[0] === '[' && 225 | this.hostname[this.hostname.length - 1] === ']' 226 | 227 | // validate a little. 228 | if (!ipv6Hostname) { 229 | const hostparts = this.hostname.split(/\./) 230 | for (let i = 0, l = hostparts.length; i < l; i++) { 231 | const part = hostparts[i] 232 | if (!part) { continue } 233 | if (!part.match(hostnamePartPattern)) { 234 | let newpart = '' 235 | for (let j = 0, k = part.length; j < k; j++) { 236 | if (part.charCodeAt(j) > 127) { 237 | // we replace non-ASCII char with a temporary placeholder 238 | // we need this to make sure size of hostname is not 239 | // broken by replacing non-ASCII by nothing 240 | newpart += 'x' 241 | } else { 242 | newpart += part[j] 243 | } 244 | } 245 | // we test again with ASCII char only 246 | if (!newpart.match(hostnamePartPattern)) { 247 | const validParts = hostparts.slice(0, i) 248 | const notHost = hostparts.slice(i + 1) 249 | const bit = part.match(hostnamePartStart) 250 | if (bit) { 251 | validParts.push(bit[1]) 252 | notHost.unshift(bit[2]) 253 | } 254 | if (notHost.length) { 255 | rest = notHost.join('.') + rest 256 | } 257 | this.hostname = validParts.join('.') 258 | break 259 | } 260 | } 261 | } 262 | } 263 | 264 | if (this.hostname.length > hostnameMaxLen) { 265 | this.hostname = '' 266 | } 267 | 268 | // strip [ and ] from the hostname 269 | // the host field still retains them, though 270 | if (ipv6Hostname) { 271 | this.hostname = this.hostname.substr(1, this.hostname.length - 2) 272 | } 273 | } 274 | 275 | // chop off from the tail first. 276 | const hash = rest.indexOf('#') 277 | if (hash !== -1) { 278 | // got a fragment string. 279 | this.hash = rest.substr(hash) 280 | rest = rest.slice(0, hash) 281 | } 282 | const qm = rest.indexOf('?') 283 | if (qm !== -1) { 284 | this.search = rest.substr(qm) 285 | rest = rest.slice(0, qm) 286 | } 287 | if (rest) { this.pathname = rest } 288 | if (slashedProtocol[lowerProto] && 289 | this.hostname && !this.pathname) { 290 | this.pathname = '' 291 | } 292 | 293 | return this 294 | } 295 | 296 | Url.prototype.parseHost = function (host) { 297 | let port = portPattern.exec(host) 298 | if (port) { 299 | port = port[0] 300 | if (port !== ':') { 301 | this.port = port.substr(1) 302 | } 303 | host = host.substr(0, host.length - port.length) 304 | } 305 | if (host) { this.hostname = host } 306 | } 307 | 308 | export default urlParse 309 | -------------------------------------------------------------------------------- /test/fixtures/url.mjs: -------------------------------------------------------------------------------- 1 | // Copyright Joyent, Inc. and other Node contributors. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a 4 | // copy of this software and associated documentation files (the 5 | // "Software"), to deal in the Software without restriction, including 6 | // without limitation the rights to use, copy, modify, merge, publish, 7 | // distribute, sublicense, and/or sell copies of the Software, and to permit 8 | // persons to whom the Software is furnished to do so, subject to the 9 | // following conditions: 10 | // 11 | // The above copyright notice and this permission notice shall be included 12 | // in all copies or substantial portions of the Software. 13 | // 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 17 | // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 18 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 | // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 | // USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | 'use strict' 23 | 24 | /* eslint-disable no-script-url */ 25 | 26 | // URLs to parse, and expected data 27 | // { url : parsed } 28 | export default { 29 | '//some_path': { 30 | pathname: '//some_path' 31 | }, 32 | 33 | 'HTTP://www.example.com/': { 34 | protocol: 'HTTP:', 35 | slashes: true, 36 | hostname: 'www.example.com', 37 | pathname: '/' 38 | }, 39 | 40 | 'HTTP://www.example.com': { 41 | protocol: 'HTTP:', 42 | slashes: true, 43 | hostname: 'www.example.com', 44 | pathname: '' 45 | }, 46 | 47 | 'http://www.ExAmPlE.com/': { 48 | protocol: 'http:', 49 | slashes: true, 50 | hostname: 'www.ExAmPlE.com', 51 | pathname: '/' 52 | }, 53 | 54 | 'http://user:pw@www.ExAmPlE.com/': { 55 | protocol: 'http:', 56 | slashes: true, 57 | auth: 'user:pw', 58 | hostname: 'www.ExAmPlE.com', 59 | pathname: '/' 60 | }, 61 | 62 | 'http://USER:PW@www.ExAmPlE.com/': { 63 | protocol: 'http:', 64 | slashes: true, 65 | auth: 'USER:PW', 66 | hostname: 'www.ExAmPlE.com', 67 | pathname: '/' 68 | }, 69 | 70 | 'http://user@www.example.com/': { 71 | protocol: 'http:', 72 | slashes: true, 73 | auth: 'user', 74 | hostname: 'www.example.com', 75 | pathname: '/' 76 | }, 77 | 78 | 'http://user%3Apw@www.example.com/': { 79 | protocol: 'http:', 80 | slashes: true, 81 | auth: 'user%3Apw', 82 | hostname: 'www.example.com', 83 | pathname: '/' 84 | }, 85 | 86 | 'http://x.com/path?that\'s#all, folks': { 87 | protocol: 'http:', 88 | hostname: 'x.com', 89 | slashes: true, 90 | search: '?that\'s', 91 | pathname: '/path', 92 | hash: '#all, folks' 93 | }, 94 | 95 | 'HTTP://X.COM/Y': { 96 | protocol: 'HTTP:', 97 | slashes: true, 98 | hostname: 'X.COM', 99 | pathname: '/Y' 100 | }, 101 | 102 | // + not an invalid host character 103 | // per https://url.spec.whatwg.org/#host-parsing 104 | 'http://x.y.com+a/b/c': { 105 | protocol: 'http:', 106 | slashes: true, 107 | hostname: 'x.y.com+a', 108 | pathname: '/b/c' 109 | }, 110 | 111 | // an unexpected invalid char in the hostname. 112 | 'HtTp://x.y.cOm;a/b/c?d=e#f gi': { 113 | protocol: 'HtTp:', 114 | slashes: true, 115 | hostname: 'x.y.cOm', 116 | pathname: ';a/b/c', 117 | search: '?d=e', 118 | hash: '#f gi' 119 | }, 120 | 121 | // make sure that we don't accidentally lcast the path parts. 122 | 'HtTp://x.y.cOm;A/b/c?d=e#f gi': { 123 | protocol: 'HtTp:', 124 | slashes: true, 125 | hostname: 'x.y.cOm', 126 | pathname: ';A/b/c', 127 | search: '?d=e', 128 | hash: '#f gi' 129 | }, 130 | 131 | 'http://x...y...#p': { 132 | protocol: 'http:', 133 | slashes: true, 134 | hostname: 'x...y...', 135 | hash: '#p', 136 | pathname: '' 137 | }, 138 | 139 | 'http://x/p/"quoted"': { 140 | protocol: 'http:', 141 | slashes: true, 142 | hostname: 'x', 143 | pathname: '/p/"quoted"' 144 | }, 145 | 146 | ' Is a URL!': { 147 | pathname: ' Is a URL!' 148 | }, 149 | 150 | 'http://www.narwhaljs.org/blog/categories?id=news': { 151 | protocol: 'http:', 152 | slashes: true, 153 | hostname: 'www.narwhaljs.org', 154 | search: '?id=news', 155 | pathname: '/blog/categories' 156 | }, 157 | 158 | 'http://mt0.google.com/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=': { 159 | protocol: 'http:', 160 | slashes: true, 161 | hostname: 'mt0.google.com', 162 | pathname: '/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=' 163 | }, 164 | 165 | 'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=': { 166 | protocol: 'http:', 167 | slashes: true, 168 | hostname: 'mt0.google.com', 169 | search: '???&hl=en&src=api&x=2&y=2&z=3&s=', 170 | pathname: '/vt/lyrs=m@114' 171 | }, 172 | 173 | 'http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=': 174 | { 175 | protocol: 'http:', 176 | slashes: true, 177 | auth: 'user:pass', 178 | hostname: 'mt0.google.com', 179 | search: '???&hl=en&src=api&x=2&y=2&z=3&s=', 180 | pathname: '/vt/lyrs=m@114' 181 | }, 182 | 183 | 'file:///etc/passwd': { 184 | slashes: true, 185 | protocol: 'file:', 186 | pathname: '/etc/passwd', 187 | hostname: '' 188 | }, 189 | 190 | 'file://localhost/etc/passwd': { 191 | protocol: 'file:', 192 | slashes: true, 193 | pathname: '/etc/passwd', 194 | hostname: 'localhost' 195 | }, 196 | 197 | 'file://foo/etc/passwd': { 198 | protocol: 'file:', 199 | slashes: true, 200 | pathname: '/etc/passwd', 201 | hostname: 'foo' 202 | }, 203 | 204 | 'file:///etc/node/': { 205 | slashes: true, 206 | protocol: 'file:', 207 | pathname: '/etc/node/', 208 | hostname: '' 209 | }, 210 | 211 | 'file://localhost/etc/node/': { 212 | protocol: 'file:', 213 | slashes: true, 214 | pathname: '/etc/node/', 215 | hostname: 'localhost' 216 | }, 217 | 218 | 'file://foo/etc/node/': { 219 | protocol: 'file:', 220 | slashes: true, 221 | pathname: '/etc/node/', 222 | hostname: 'foo' 223 | }, 224 | 225 | 'http:/baz/../foo/bar': { 226 | protocol: 'http:', 227 | pathname: '/baz/../foo/bar' 228 | }, 229 | 230 | 'http://user:pass@example.com:8000/foo/bar?baz=quux#frag': { 231 | protocol: 'http:', 232 | slashes: true, 233 | auth: 'user:pass', 234 | port: '8000', 235 | hostname: 'example.com', 236 | hash: '#frag', 237 | search: '?baz=quux', 238 | pathname: '/foo/bar' 239 | }, 240 | 241 | '//user:pass@example.com:8000/foo/bar?baz=quux#frag': { 242 | slashes: true, 243 | auth: 'user:pass', 244 | port: '8000', 245 | hostname: 'example.com', 246 | hash: '#frag', 247 | search: '?baz=quux', 248 | pathname: '/foo/bar' 249 | }, 250 | 251 | '/foo/bar?baz=quux#frag': { 252 | hash: '#frag', 253 | search: '?baz=quux', 254 | pathname: '/foo/bar' 255 | }, 256 | 257 | 'http:/foo/bar?baz=quux#frag': { 258 | protocol: 'http:', 259 | hash: '#frag', 260 | search: '?baz=quux', 261 | pathname: '/foo/bar' 262 | }, 263 | 264 | 'mailto:foo@bar.com?subject=hello': { 265 | protocol: 'mailto:', 266 | auth: 'foo', 267 | hostname: 'bar.com', 268 | search: '?subject=hello' 269 | }, 270 | 271 | 'javascript:alert(\'hello\');': { 272 | protocol: 'javascript:', 273 | pathname: 'alert(\'hello\');' 274 | }, 275 | 276 | 'xmpp:isaacschlueter@jabber.org': { 277 | protocol: 'xmpp:', 278 | auth: 'isaacschlueter', 279 | hostname: 'jabber.org' 280 | }, 281 | 282 | 'http://atpass:foo%40bar@127.0.0.1:8080/path?search=foo#bar': { 283 | protocol: 'http:', 284 | slashes: true, 285 | auth: 'atpass:foo%40bar', 286 | hostname: '127.0.0.1', 287 | port: '8080', 288 | pathname: '/path', 289 | search: '?search=foo', 290 | hash: '#bar' 291 | }, 292 | 293 | 'svn+ssh://foo/bar': { 294 | hostname: 'foo', 295 | protocol: 'svn+ssh:', 296 | pathname: '/bar', 297 | slashes: true 298 | }, 299 | 300 | 'dash-test://foo/bar': { 301 | hostname: 'foo', 302 | protocol: 'dash-test:', 303 | pathname: '/bar', 304 | slashes: true 305 | }, 306 | 307 | 'dash-test:foo/bar': { 308 | hostname: 'foo', 309 | protocol: 'dash-test:', 310 | pathname: '/bar' 311 | }, 312 | 313 | 'dot.test://foo/bar': { 314 | hostname: 'foo', 315 | protocol: 'dot.test:', 316 | pathname: '/bar', 317 | slashes: true 318 | }, 319 | 320 | 'dot.test:foo/bar': { 321 | hostname: 'foo', 322 | protocol: 'dot.test:', 323 | pathname: '/bar' 324 | }, 325 | 326 | // IDNA tests 327 | 'http://www.日本語.com/': { 328 | protocol: 'http:', 329 | slashes: true, 330 | hostname: 'www.日本語.com', 331 | pathname: '/' 332 | }, 333 | 334 | 'http://example.Bücher.com/': { 335 | protocol: 'http:', 336 | slashes: true, 337 | hostname: 'example.Bücher.com', 338 | pathname: '/' 339 | }, 340 | 341 | 'http://www.Äffchen.com/': { 342 | protocol: 'http:', 343 | slashes: true, 344 | hostname: 'www.Äffchen.com', 345 | pathname: '/' 346 | }, 347 | 348 | 'http://www.Äffchen.cOm;A/b/c?d=e#f gi': { 349 | protocol: 'http:', 350 | slashes: true, 351 | hostname: 'www.Äffchen.cOm', 352 | pathname: ';A/b/c', 353 | search: '?d=e', 354 | hash: '#f gi' 355 | }, 356 | 357 | 'http://SÉLIER.COM/': { 358 | protocol: 'http:', 359 | slashes: true, 360 | hostname: 'SÉLIER.COM', 361 | pathname: '/' 362 | }, 363 | 364 | 'http://ليهمابتكلموشعربي؟.ي؟/': { 365 | protocol: 'http:', 366 | slashes: true, 367 | hostname: 'ليهمابتكلموشعربي؟.ي؟', 368 | pathname: '/' 369 | }, 370 | 371 | 'http://➡.ws/➡': { 372 | protocol: 'http:', 373 | slashes: true, 374 | hostname: '➡.ws', 375 | pathname: '/➡' 376 | }, 377 | 378 | 'http://bucket_name.s3.amazonaws.com/image.jpg': { 379 | protocol: 'http:', 380 | slashes: true, 381 | hostname: 'bucket_name.s3.amazonaws.com', 382 | pathname: '/image.jpg' 383 | }, 384 | 385 | 'git+http://github.com/joyent/node.git': { 386 | protocol: 'git+http:', 387 | slashes: true, 388 | hostname: 'github.com', 389 | pathname: '/joyent/node.git' 390 | }, 391 | 392 | // if local1@domain1 is uses as a relative URL it may 393 | // be parse into auth@hostname, but here there is no 394 | // way to make it work in url.parse, I add the test to be explicit 395 | 'local1@domain1': { 396 | pathname: 'local1@domain1' 397 | }, 398 | 399 | // While this may seem counter-intuitive, a browser will parse 400 | // as a path. 401 | 'www.example.com': { 402 | pathname: 'www.example.com' 403 | }, 404 | 405 | // ipv6 support 406 | '[fe80::1]': { 407 | pathname: '[fe80::1]' 408 | }, 409 | 410 | 'coap://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]': { 411 | protocol: 'coap:', 412 | slashes: true, 413 | hostname: 'FEDC:BA98:7654:3210:FEDC:BA98:7654:3210' 414 | }, 415 | 416 | 'coap://[1080:0:0:0:8:800:200C:417A]:61616/': { 417 | protocol: 'coap:', 418 | slashes: true, 419 | port: '61616', 420 | hostname: '1080:0:0:0:8:800:200C:417A', 421 | pathname: '/' 422 | }, 423 | 424 | 'http://user:password@[3ffe:2a00:100:7031::1]:8080': { 425 | protocol: 'http:', 426 | slashes: true, 427 | auth: 'user:password', 428 | port: '8080', 429 | hostname: '3ffe:2a00:100:7031::1', 430 | pathname: '' 431 | }, 432 | 433 | 'coap://u:p@[::192.9.5.5]:61616/.well-known/r?n=Temperature': { 434 | protocol: 'coap:', 435 | slashes: true, 436 | auth: 'u:p', 437 | port: '61616', 438 | hostname: '::192.9.5.5', 439 | search: '?n=Temperature', 440 | pathname: '/.well-known/r' 441 | }, 442 | 443 | // empty port 444 | 'http://example.com:': { 445 | protocol: 'http:', 446 | slashes: true, 447 | hostname: 'example.com', 448 | pathname: ':' 449 | }, 450 | 451 | 'http://example.com:/a/b.html': { 452 | protocol: 'http:', 453 | slashes: true, 454 | hostname: 'example.com', 455 | pathname: ':/a/b.html' 456 | }, 457 | 458 | 'http://example.com:?a=b': { 459 | protocol: 'http:', 460 | slashes: true, 461 | hostname: 'example.com', 462 | search: '?a=b', 463 | pathname: ':' 464 | }, 465 | 466 | 'http://example.com:#abc': { 467 | protocol: 'http:', 468 | slashes: true, 469 | hostname: 'example.com', 470 | hash: '#abc', 471 | pathname: ':' 472 | }, 473 | 474 | 'http://[fe80::1]:/a/b?a=b#abc': { 475 | protocol: 'http:', 476 | slashes: true, 477 | hostname: 'fe80::1', 478 | search: '?a=b', 479 | hash: '#abc', 480 | pathname: ':/a/b' 481 | }, 482 | 483 | 'http://-lovemonsterz.tumblr.com/rss': { 484 | protocol: 'http:', 485 | slashes: true, 486 | hostname: '-lovemonsterz.tumblr.com', 487 | pathname: '/rss' 488 | }, 489 | 490 | 'http://-lovemonsterz.tumblr.com:80/rss': { 491 | protocol: 'http:', 492 | slashes: true, 493 | port: '80', 494 | hostname: '-lovemonsterz.tumblr.com', 495 | pathname: '/rss' 496 | }, 497 | 498 | 'http://user:pass@-lovemonsterz.tumblr.com/rss': { 499 | protocol: 'http:', 500 | slashes: true, 501 | auth: 'user:pass', 502 | hostname: '-lovemonsterz.tumblr.com', 503 | pathname: '/rss' 504 | }, 505 | 506 | 'http://user:pass@-lovemonsterz.tumblr.com:80/rss': { 507 | protocol: 'http:', 508 | slashes: true, 509 | auth: 'user:pass', 510 | port: '80', 511 | hostname: '-lovemonsterz.tumblr.com', 512 | pathname: '/rss' 513 | }, 514 | 515 | 'http://_jabber._tcp.google.com/test': { 516 | protocol: 'http:', 517 | slashes: true, 518 | hostname: '_jabber._tcp.google.com', 519 | pathname: '/test' 520 | }, 521 | 522 | 'http://user:pass@_jabber._tcp.google.com/test': { 523 | protocol: 'http:', 524 | slashes: true, 525 | auth: 'user:pass', 526 | hostname: '_jabber._tcp.google.com', 527 | pathname: '/test' 528 | }, 529 | 530 | 'http://_jabber._tcp.google.com:80/test': { 531 | protocol: 'http:', 532 | slashes: true, 533 | port: '80', 534 | hostname: '_jabber._tcp.google.com', 535 | pathname: '/test' 536 | }, 537 | 538 | 'http://user:pass@_jabber._tcp.google.com:80/test': { 539 | protocol: 'http:', 540 | slashes: true, 541 | auth: 'user:pass', 542 | port: '80', 543 | hostname: '_jabber._tcp.google.com', 544 | pathname: '/test' 545 | }, 546 | 547 | 'http://x:1/\' <>"`/{}|\\^~`/': { 548 | protocol: 'http:', 549 | slashes: true, 550 | port: '1', 551 | hostname: 'x', 552 | pathname: '/\' <>"`/{}|\\^~`/' 553 | }, 554 | 555 | 'http://a@b@c/': { 556 | protocol: 'http:', 557 | slashes: true, 558 | auth: 'a@b', 559 | hostname: 'c', 560 | pathname: '/' 561 | }, 562 | 563 | 'http://a@b?@c': { 564 | protocol: 'http:', 565 | slashes: true, 566 | auth: 'a', 567 | hostname: 'b', 568 | pathname: '', 569 | search: '?@c' 570 | }, 571 | 572 | 'http://a\r" \t\n<\'b:b@c\r\nd/e?f': { 573 | protocol: 'http:', 574 | slashes: true, 575 | auth: 'a\r" \t\n<\'b:b', 576 | hostname: 'c', 577 | search: '?f', 578 | pathname: '\r\nd/e' 579 | }, 580 | 581 | // git urls used by npm 582 | 'git+ssh://git@github.com:npm/npm': { 583 | protocol: 'git+ssh:', 584 | slashes: true, 585 | auth: 'git', 586 | hostname: 'github.com', 587 | pathname: ':npm/npm' 588 | }, 589 | 590 | 'http://example.com?foo=bar#frag': { 591 | protocol: 'http:', 592 | slashes: true, 593 | hostname: 'example.com', 594 | hash: '#frag', 595 | search: '?foo=bar', 596 | pathname: '' 597 | }, 598 | 599 | 'http://example.com?foo=@bar#frag': { 600 | protocol: 'http:', 601 | slashes: true, 602 | hostname: 'example.com', 603 | hash: '#frag', 604 | search: '?foo=@bar', 605 | pathname: '' 606 | }, 607 | 608 | 'http://example.com?foo=/bar/#frag': { 609 | protocol: 'http:', 610 | slashes: true, 611 | hostname: 'example.com', 612 | hash: '#frag', 613 | search: '?foo=/bar/', 614 | pathname: '' 615 | }, 616 | 617 | 'http://example.com?foo=?bar/#frag': { 618 | protocol: 'http:', 619 | slashes: true, 620 | hostname: 'example.com', 621 | hash: '#frag', 622 | search: '?foo=?bar/', 623 | pathname: '' 624 | }, 625 | 626 | 'http://example.com#frag=?bar/#frag': { 627 | protocol: 'http:', 628 | slashes: true, 629 | hostname: 'example.com', 630 | hash: '#frag=?bar/#frag', 631 | pathname: '' 632 | }, 633 | 634 | 'http://google.com" onload="alert(42)/': { 635 | hostname: 'google.com', 636 | protocol: 'http:', 637 | slashes: true, 638 | pathname: '" onload="alert(42)/' 639 | }, 640 | 641 | 'http://a.com/a/b/c?s#h': { 642 | protocol: 'http:', 643 | slashes: true, 644 | pathname: '/a/b/c', 645 | hostname: 'a.com', 646 | hash: '#h', 647 | search: '?s' 648 | }, 649 | 650 | 'http://atpass:foo%40bar@127.0.0.1/': { 651 | auth: 'atpass:foo%40bar', 652 | slashes: true, 653 | hostname: '127.0.0.1', 654 | protocol: 'http:', 655 | pathname: '/' 656 | }, 657 | 658 | 'http://atslash%2F%40:%2F%40@foo/': { 659 | auth: 'atslash%2F%40:%2F%40', 660 | hostname: 'foo', 661 | protocol: 'http:', 662 | pathname: '/', 663 | slashes: true 664 | }, 665 | 666 | // ipv6 support 667 | 'coap:u:p@[::1]:61616/.well-known/r?n=Temperature': { 668 | protocol: 'coap:', 669 | auth: 'u:p', 670 | hostname: '::1', 671 | port: '61616', 672 | pathname: '/.well-known/r', 673 | search: '?n=Temperature' 674 | }, 675 | 676 | 'coap:[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:61616/s/stopButton': { 677 | hostname: 'fedc:ba98:7654:3210:fedc:ba98:7654:3210', 678 | port: '61616', 679 | protocol: 'coap:', 680 | pathname: '/s/stopButton' 681 | }, 682 | 683 | // encode context-specific delimiters in path and query, but do not touch 684 | // other non-delimiter chars like `%`. 685 | // 686 | 687 | // `?` and `#` in path and search 688 | 'http://ex.com/foo%3F100%m%23r?abc=the%231?&foo=bar#frag': { 689 | protocol: 'http:', 690 | hostname: 'ex.com', 691 | hash: '#frag', 692 | search: '?abc=the%231?&foo=bar', 693 | pathname: '/foo%3F100%m%23r', 694 | slashes: true 695 | }, 696 | 697 | // `?` and `#` in search only 698 | 'http://ex.com/fooA100%mBr?abc=the%231?&foo=bar#frag': { 699 | protocol: 'http:', 700 | hostname: 'ex.com', 701 | hash: '#frag', 702 | search: '?abc=the%231?&foo=bar', 703 | pathname: '/fooA100%mBr', 704 | slashes: true 705 | } 706 | } 707 | --------------------------------------------------------------------------------