├── .gitignore
├── rollup.config.mjs
├── .eslintrc.yml
├── index.mjs
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── ci.yml
├── test
    ├── cjs.js
    ├── format.mjs
    ├── parse.mjs
    ├── encode.mjs
    ├── decode.mjs
    └── fixtures
    │   └── url.mjs
├── CHANGELOG.md
├── lib
    ├── format.mjs
    ├── encode.mjs
    ├── decode.mjs
    └── parse.mjs
├── package.json
├── LICENSE
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | coverage/
3 | build/
4 | 


--------------------------------------------------------------------------------
/rollup.config.mjs:
--------------------------------------------------------------------------------
 1 | export default [
 2 |   {
 3 |     input: 'index.mjs',
 4 |     output: {
 5 |       file: 'build/index.cjs.js',
 6 |       format: 'cjs'
 7 |     }
 8 |   }
 9 | ]
10 | 


--------------------------------------------------------------------------------
/.eslintrc.yml:
--------------------------------------------------------------------------------
 1 | extends: standard
 2 | 
 3 | ignorePatterns:
 4 |   - build/
 5 | 
 6 | overrides:
 7 |   -
 8 |     files: [ 'index.mjs', 'lib/**/*.mjs' ]
 9 |   -
10 |     files: [ 'test/**/*.mjs' ]
11 |     env:
12 |       mocha: true


--------------------------------------------------------------------------------
/index.mjs:
--------------------------------------------------------------------------------
 1 | import decode from './lib/decode.mjs'
 2 | import encode from './lib/encode.mjs'
 3 | import format from './lib/format.mjs'
 4 | import parse from './lib/parse.mjs'
 5 | 
 6 | export {
 7 |   decode,
 8 |   encode,
 9 |   format,
10 |   parse
11 | }
12 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: github-actions
 4 |     directory: /
 5 |     schedule:
 6 |       interval: daily
 7 | 
 8 |   - package-ecosystem: npm
 9 |     directory: /
10 |     schedule:
11 |       interval: daily
12 |     allow:
13 |       - dependency-type: production
14 | 


--------------------------------------------------------------------------------
/test/cjs.js:
--------------------------------------------------------------------------------
 1 | 'use strict'
 2 | /* eslint-env mocha */
 3 | 
 4 | const mdurl = require('../')
 5 | const assert = require('assert')
 6 | 
 7 | describe('CJS', () => {
 8 |   it('require', () => {
 9 |     assert.ok(mdurl.parse)
10 |     assert.ok(mdurl.format)
11 |     assert.ok(mdurl.encode)
12 |     assert.ok(mdurl.decode)
13 |   })
14 | })
15 | 


--------------------------------------------------------------------------------
/test/format.mjs:
--------------------------------------------------------------------------------
 1 | import assert from 'node:assert'
 2 | import { parse, format } from '../index.mjs'
 3 | import fixtures from './fixtures/url.mjs'
 4 | 
 5 | describe('format', () => {
 6 |   Object.keys(fixtures).forEach(url => {
 7 |     it(url, () => {
 8 |       const parsed = parse(url)
 9 |       assert.strictEqual(format(parsed), url)
10 |     })
11 |   })
12 | })
13 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | 2.0.0 / 2023-12-01
 2 | ------------------
 3 | 
 4 | - Rewrite to ESM.
 5 | 
 6 | 
 7 | 1.0.1 / 2015-09-15
 8 | ------------------
 9 | 
10 | - Fixed closure compiler compatibility (#1).
11 | 
12 | 
13 | 1.0.0 / 2015-03-04
14 | ------------------
15 | 
16 | - Added `.decode()`, `.parse()`, `.format()`.
17 | 
18 | 
19 | 0.0.1 / 2015-03-02
20 | ------------------
21 | 
22 | - First release.
23 | 


--------------------------------------------------------------------------------
/test/parse.mjs:
--------------------------------------------------------------------------------
 1 | import assert from 'node:assert'
 2 | import { parse } from '../index.mjs'
 3 | import fixtures from './fixtures/url.mjs'
 4 | 
 5 | describe('parse', () => {
 6 |   Object.keys(fixtures).forEach(function (url) {
 7 |     it(url, () => {
 8 |       const parsed = parse(url)
 9 | 
10 |       Object.keys(parsed).forEach(function (x) {
11 |         if (parsed[x] === null) { delete parsed[x] }
12 |       })
13 | 
14 |       assert.deepEqual(parsed, fixtures[url])
15 |     })
16 |   })
17 | })
18 | 


--------------------------------------------------------------------------------
/lib/format.mjs:
--------------------------------------------------------------------------------
 1 | export default function format (url) {
 2 |   let result = ''
 3 | 
 4 |   result += url.protocol || ''
 5 |   result += url.slashes ? '//' : ''
 6 |   result += url.auth ? url.auth + '@' : ''
 7 | 
 8 |   if (url.hostname && url.hostname.indexOf(':') !== -1) {
 9 |     // ipv6 address
10 |     result += '[' + url.hostname + ']'
11 |   } else {
12 |     result += url.hostname || ''
13 |   }
14 | 
15 |   result += url.port ? ':' + url.port : ''
16 |   result += url.pathname || ''
17 |   result += url.search || ''
18 |   result += url.hash || ''
19 | 
20 |   return result
21 | };
22 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |   schedule:
 7 |     - cron: '0 0 * * 3'
 8 | 
 9 | jobs:
10 |   test:
11 | 
12 |     runs-on: ubuntu-latest
13 | 
14 |     strategy:
15 |       matrix:
16 |         node-version: [ '18' ]
17 | 
18 |     steps:
19 |     - uses: actions/checkout@v4
20 | 
21 |     - name: Use Node.js ${{ matrix.node-version }}
22 |       uses: actions/setup-node@v4
23 |       with:
24 |         node-version: ${{ matrix.node-version }}
25 | 
26 |     - run: npm install
27 | 
28 |     - name: Test
29 |       run: npm test
30 | 
31 |     - name: Upload coverage report to coveralls.io
32 |       uses: coverallsapp/github-action@master
33 |       with:
34 |         github-token: ${{ secrets.GITHUB_TOKEN }}
35 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "mdurl",
 3 |   "version": "2.0.0",
 4 |   "description": "URL utilities for markdown-it",
 5 |   "repository": "markdown-it/mdurl",
 6 |   "license": "MIT",
 7 |   "main": "build/index.cjs.js",
 8 |   "module": "index.mjs",
 9 |   "exports": {
10 |     ".": {
11 |       "require": "./build/index.cjs.js",
12 |       "import": "./index.mjs"
13 |     },
14 |     "./*": {
15 |       "require": "./*",
16 |       "import": "./*"
17 |     }
18 |   },
19 |   "scripts": {
20 |     "lint": "eslint .",
21 |     "build": "rollup -c",
22 |     "test": "npm run lint && npm run build && c8 --exclude build --exclude test -r text -r html -r lcov mocha",
23 |     "prepublishOnly": "npm run lint && npm run build"
24 |   },
25 |   "files": [
26 |     "index.mjs",
27 |     "lib/",
28 |     "build/"
29 |   ],
30 |   "devDependencies": {
31 |     "c8": "^8.0.1",
32 |     "eslint": "^8.54.0",
33 |     "eslint-config-standard": "^17.1.0",
34 |     "mocha": "^10.2.0",
35 |     "rollup": "^4.6.1"
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/test/encode.mjs:
--------------------------------------------------------------------------------
 1 | import { strictEqual as equals } from 'node:assert'
 2 | import { encode } from '../index.mjs'
 3 | 
 4 | describe('encode', () => {
 5 |   it('should encode percent', () => {
 6 |     equals(encode('%%%'), '%25%25%25')
 7 |   })
 8 | 
 9 |   it('should encode control chars', () => {
10 |     equals(encode('\r\n'), '%0D%0A')
11 |   })
12 | 
13 |   it('should not encode parts of an url', () => {
14 |     equals(encode('?#'), '?#')
15 |   })
16 | 
17 |   it('should not encode []^ - commonmark tests', () => {
18 |     equals(encode('[]^'), '%5B%5D%5E')
19 |   })
20 | 
21 |   it('should encode spaces', () => {
22 |     equals(encode('my url'), 'my%20url')
23 |   })
24 | 
25 |   it('should encode unicode', () => {
26 |     equals(encode('φου'), '%CF%86%CE%BF%CF%85')
27 |   })
28 | 
29 |   it('should encode % if it doesn\'t start a valid escape seq', () => {
30 |     equals(encode('%FG'), '%25FG')
31 |   })
32 | 
33 |   it('should preserve non-utf8 encoded characters', () => {
34 |     equals(encode('%00%FF'), '%00%FF')
35 |   })
36 | 
37 |   it('should encode characters on the cache borders', () => {
38 |     // protects against off-by-one in cache implementation
39 |     equals(encode('\x00\x7F\x80'), '%00%7F%C2%80')
40 |   })
41 | 
42 |   describe('arguments', () => {
43 |     it('encode(string, unescapedSet)', () => {
44 |       equals(encode('!@#$', '@$'), '%21@%23$')
45 |     })
46 | 
47 |     it('encode(string, keepEscaped=true)', () => {
48 |       equals(encode('%20%2G', true), '%20%252G')
49 |     })
50 | 
51 |     it('encode(string, keepEscaped=false)', () => {
52 |       equals(encode('%20%2G', false), '%2520%252G')
53 |     })
54 | 
55 |     it('encode(string, unescapedSet, keepEscaped)', () => {
56 |       equals(encode('!@%25', '@', false), '%21@%2525')
57 |     })
58 |   })
59 | 
60 |   describe('surrogates', () => {
61 |     it('bad surrogates (high)', () => {
62 |       equals(encode('\uD800foo'), '%EF%BF%BDfoo')
63 |       equals(encode('foo\uD800'), 'foo%EF%BF%BD')
64 |     })
65 | 
66 |     it('bad surrogates (low)', () => {
67 |       equals(encode('\uDD00foo'), '%EF%BF%BDfoo')
68 |       equals(encode('foo\uDD00'), 'foo%EF%BF%BD')
69 |     })
70 | 
71 |     it('valid one', () => {
72 |       equals(encode('\uD800\uDD00'), '%F0%90%84%80')
73 |     })
74 |   })
75 | })
76 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Vitaly Puzrin, Alex Kocharin.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person
 4 | obtaining a copy of this software and associated documentation
 5 | files (the "Software"), to deal in the Software without
 6 | restriction, including without limitation the rights to use,
 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the
 9 | Software is furnished to do so, subject to the following
10 | conditions:
11 | 
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | --------------------------------------------------------------------------------
25 | 
26 | .parse() is based on Joyent's node.js `url` code:
27 | 
28 | Copyright Joyent, Inc. and other Node contributors. All rights reserved.
29 | Permission is hereby granted, free of charge, to any person obtaining a copy
30 | of this software and associated documentation files (the "Software"), to
31 | deal in the Software without restriction, including without limitation the
32 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
33 | sell copies of the Software, and to permit persons to whom the Software is
34 | furnished to do so, subject to the following conditions:
35 | 
36 | The above copyright notice and this permission notice shall be included in
37 | all copies or substantial portions of the Software.
38 | 
39 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
40 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
41 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
42 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
43 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
44 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
45 | IN THE SOFTWARE.
46 | 


--------------------------------------------------------------------------------
/lib/encode.mjs:
--------------------------------------------------------------------------------
 1 | const encodeCache = {}
 2 | 
 3 | // Create a lookup array where anything but characters in `chars` string
 4 | // and alphanumeric chars is percent-encoded.
 5 | //
 6 | function getEncodeCache (exclude) {
 7 |   let cache = encodeCache[exclude]
 8 |   if (cache) { return cache }
 9 | 
10 |   cache = encodeCache[exclude] = []
11 | 
12 |   for (let i = 0; i < 128; i++) {
13 |     const ch = String.fromCharCode(i)
14 | 
15 |     if (/^[0-9a-z]$/i.test(ch)) {
16 |       // always allow unencoded alphanumeric characters
17 |       cache.push(ch)
18 |     } else {
19 |       cache.push('%' + ('0' + i.toString(16).toUpperCase()).slice(-2))
20 |     }
21 |   }
22 | 
23 |   for (let i = 0; i < exclude.length; i++) {
24 |     cache[exclude.charCodeAt(i)] = exclude[i]
25 |   }
26 | 
27 |   return cache
28 | }
29 | 
30 | // Encode unsafe characters with percent-encoding, skipping already
31 | // encoded sequences.
32 | //
33 | //  - string       - string to encode
34 | //  - exclude      - list of characters to ignore (in addition to a-zA-Z0-9)
35 | //  - keepEscaped  - don't encode '%' in a correct escape sequence (default: true)
36 | //
37 | function encode (string, exclude, keepEscaped) {
38 |   if (typeof exclude !== 'string') {
39 |     // encode(string, keepEscaped)
40 |     keepEscaped = exclude
41 |     exclude = encode.defaultChars
42 |   }
43 | 
44 |   if (typeof keepEscaped === 'undefined') {
45 |     keepEscaped = true
46 |   }
47 | 
48 |   const cache = getEncodeCache(exclude)
49 |   let result = ''
50 | 
51 |   for (let i = 0, l = string.length; i < l; i++) {
52 |     const code = string.charCodeAt(i)
53 | 
54 |     if (keepEscaped && code === 0x25 /* % */ && i + 2 < l) {
55 |       if (/^[0-9a-f]{2}$/i.test(string.slice(i + 1, i + 3))) {
56 |         result += string.slice(i, i + 3)
57 |         i += 2
58 |         continue
59 |       }
60 |     }
61 | 
62 |     if (code < 128) {
63 |       result += cache[code]
64 |       continue
65 |     }
66 | 
67 |     if (code >= 0xD800 && code <= 0xDFFF) {
68 |       if (code >= 0xD800 && code <= 0xDBFF && i + 1 < l) {
69 |         const nextCode = string.charCodeAt(i + 1)
70 |         if (nextCode >= 0xDC00 && nextCode <= 0xDFFF) {
71 |           result += encodeURIComponent(string[i] + string[i + 1])
72 |           i++
73 |           continue
74 |         }
75 |       }
76 |       result += '%EF%BF%BD'
77 |       continue
78 |     }
79 | 
80 |     result += encodeURIComponent(string[i])
81 |   }
82 | 
83 |   return result
84 | }
85 | 
86 | encode.defaultChars = ";/?:@&=+$,-_.!~*'()#"
87 | encode.componentChars = "-_.!~*'()"
88 | 
89 | export default encode
90 | 


--------------------------------------------------------------------------------
/lib/decode.mjs:
--------------------------------------------------------------------------------
  1 | /* eslint-disable no-bitwise */
  2 | 
  3 | const decodeCache = {}
  4 | 
  5 | function getDecodeCache (exclude) {
  6 |   let cache = decodeCache[exclude]
  7 |   if (cache) { return cache }
  8 | 
  9 |   cache = decodeCache[exclude] = []
 10 | 
 11 |   for (let i = 0; i < 128; i++) {
 12 |     const ch = String.fromCharCode(i)
 13 |     cache.push(ch)
 14 |   }
 15 | 
 16 |   for (let i = 0; i < exclude.length; i++) {
 17 |     const ch = exclude.charCodeAt(i)
 18 |     cache[ch] = '%' + ('0' + ch.toString(16).toUpperCase()).slice(-2)
 19 |   }
 20 | 
 21 |   return cache
 22 | }
 23 | 
 24 | // Decode percent-encoded string.
 25 | //
 26 | function decode (string, exclude) {
 27 |   if (typeof exclude !== 'string') {
 28 |     exclude = decode.defaultChars
 29 |   }
 30 | 
 31 |   const cache = getDecodeCache(exclude)
 32 | 
 33 |   return string.replace(/(%[a-f0-9]{2})+/gi, function (seq) {
 34 |     let result = ''
 35 | 
 36 |     for (let i = 0, l = seq.length; i < l; i += 3) {
 37 |       const b1 = parseInt(seq.slice(i + 1, i + 3), 16)
 38 | 
 39 |       if (b1 < 0x80) {
 40 |         result += cache[b1]
 41 |         continue
 42 |       }
 43 | 
 44 |       if ((b1 & 0xE0) === 0xC0 && (i + 3 < l)) {
 45 |         // 110xxxxx 10xxxxxx
 46 |         const b2 = parseInt(seq.slice(i + 4, i + 6), 16)
 47 | 
 48 |         if ((b2 & 0xC0) === 0x80) {
 49 |           const chr = ((b1 << 6) & 0x7C0) | (b2 & 0x3F)
 50 | 
 51 |           if (chr < 0x80) {
 52 |             result += '\ufffd\ufffd'
 53 |           } else {
 54 |             result += String.fromCharCode(chr)
 55 |           }
 56 | 
 57 |           i += 3
 58 |           continue
 59 |         }
 60 |       }
 61 | 
 62 |       if ((b1 & 0xF0) === 0xE0 && (i + 6 < l)) {
 63 |         // 1110xxxx 10xxxxxx 10xxxxxx
 64 |         const b2 = parseInt(seq.slice(i + 4, i + 6), 16)
 65 |         const b3 = parseInt(seq.slice(i + 7, i + 9), 16)
 66 | 
 67 |         if ((b2 & 0xC0) === 0x80 && (b3 & 0xC0) === 0x80) {
 68 |           const chr = ((b1 << 12) & 0xF000) | ((b2 << 6) & 0xFC0) | (b3 & 0x3F)
 69 | 
 70 |           if (chr < 0x800 || (chr >= 0xD800 && chr <= 0xDFFF)) {
 71 |             result += '\ufffd\ufffd\ufffd'
 72 |           } else {
 73 |             result += String.fromCharCode(chr)
 74 |           }
 75 | 
 76 |           i += 6
 77 |           continue
 78 |         }
 79 |       }
 80 | 
 81 |       if ((b1 & 0xF8) === 0xF0 && (i + 9 < l)) {
 82 |         // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
 83 |         const b2 = parseInt(seq.slice(i + 4, i + 6), 16)
 84 |         const b3 = parseInt(seq.slice(i + 7, i + 9), 16)
 85 |         const b4 = parseInt(seq.slice(i + 10, i + 12), 16)
 86 | 
 87 |         if ((b2 & 0xC0) === 0x80 && (b3 & 0xC0) === 0x80 && (b4 & 0xC0) === 0x80) {
 88 |           let chr = ((b1 << 18) & 0x1C0000) | ((b2 << 12) & 0x3F000) | ((b3 << 6) & 0xFC0) | (b4 & 0x3F)
 89 | 
 90 |           if (chr < 0x10000 || chr > 0x10FFFF) {
 91 |             result += '\ufffd\ufffd\ufffd\ufffd'
 92 |           } else {
 93 |             chr -= 0x10000
 94 |             result += String.fromCharCode(0xD800 + (chr >> 10), 0xDC00 + (chr & 0x3FF))
 95 |           }
 96 | 
 97 |           i += 9
 98 |           continue
 99 |         }
100 |       }
101 | 
102 |       result += '\ufffd'
103 |     }
104 | 
105 |     return result
106 |   })
107 | }
108 | 
109 | decode.defaultChars = ';/?:@&=+$,#'
110 | decode.componentChars = ''
111 | 
112 | export default decode
113 | 


--------------------------------------------------------------------------------
/test/decode.mjs:
--------------------------------------------------------------------------------
  1 | import assert from 'node:assert'
  2 | import { decode } from '../index.mjs'
  3 | 
  4 | function encodeBinary (str) {
  5 |   let result = ''
  6 | 
  7 |   str = str.replace(/\s+/g, '')
  8 |   while (str.length) {
  9 |     result = '%' + ('0' + parseInt(str.slice(-8), 2).toString(16)).slice(-2) + result
 10 |     str = str.slice(0, -8)
 11 |   }
 12 | 
 13 |   return result
 14 | }
 15 | 
 16 | const samples = {
 17 |   '00000000': true,
 18 |   '01010101': true,
 19 |   '01111111': true,
 20 | 
 21 |   // invalid as 1st byte
 22 |   10000000: true,
 23 |   10111111: true,
 24 | 
 25 |   // invalid sequences, 2nd byte should be >= 0x80
 26 |   '11000111 01010101': false,
 27 |   '11100011 01010101': false,
 28 |   '11110001 01010101': false,
 29 | 
 30 |   // invalid sequences, 2nd byte should be < 0xc0
 31 |   '11000111 11000000': false,
 32 |   '11100011 11000000': false,
 33 |   '11110001 11000000': false,
 34 | 
 35 |   // invalid 3rd byte
 36 |   '11100011 10010101 01010101': false,
 37 |   '11110001 10010101 01010101': false,
 38 | 
 39 |   // invalid 4th byte
 40 |   '11110001 10010101 10010101 01010101': false,
 41 | 
 42 |   // valid sequences
 43 |   '11000111 10101010': true,
 44 |   '11100011 10101010 10101010': true,
 45 |   '11110001 10101010 10101010 10101010': true,
 46 | 
 47 |   // minimal chars with given length
 48 |   '11000010 10000000': true,
 49 |   '11100000 10100000 10000000': true,
 50 | 
 51 |   // impossible sequences
 52 |   '11000001 10111111': false,
 53 |   '11100000 10011111 10111111': false,
 54 |   '11000001 10000000': false,
 55 |   '11100000 10010000 10000000': false,
 56 | 
 57 |   // maximum chars with given length
 58 |   '11011111 10111111': true,
 59 |   '11101111 10111111 10111111': true,
 60 | 
 61 |   '11110000 10010000 10000000 10000000': true,
 62 |   '11110000 10010000 10001111 10001111': true,
 63 |   '11110100 10001111 10110000 10000000': true,
 64 |   '11110100 10001111 10111111 10111111': true,
 65 | 
 66 |   // too low
 67 |   '11110000 10001111 10111111 10111111': false,
 68 | 
 69 |   // too high
 70 |   '11110100 10010000 10000000 10000000': false,
 71 |   '11110100 10011111 10111111 10111111': false,
 72 | 
 73 |   // surrogate range
 74 |   '11101101 10011111 10111111': true,
 75 |   '11101101 10100000 10000000': false,
 76 |   '11101101 10111111 10111111': false,
 77 |   '11101110 10000000 10000000': true
 78 | }
 79 | 
 80 | describe('decode', () => {
 81 |   it('should decode %xx', () => {
 82 |     assert.equal(decode('x%20xx%20%2520'), 'x xx %20')
 83 |   })
 84 | 
 85 |   it('should not decode invalid sequences', () => {
 86 |     assert.equal(decode('%2g%z1%%'), '%2g%z1%%')
 87 |   })
 88 | 
 89 |   it('should not decode reservedSet', () => {
 90 |     assert.equal(decode('%20%25%20', '%'), ' %25 ')
 91 |     assert.equal(decode('%20%25%20', ' '), '%20%%20')
 92 |     assert.equal(decode('%20%25%20', ' %'), '%20%25%20')
 93 |   })
 94 | 
 95 |   describe('utf8', () => {
 96 |     Object.keys(samples).forEach(function (k) {
 97 |       it(k, () => {
 98 |         let res1, er
 99 | 
100 |         const str = encodeBinary(k)
101 | 
102 |         try {
103 |           res1 = decodeURIComponent(str)
104 |         } catch (e) {
105 |           er = e
106 |         }
107 | 
108 |         const res2 = decode(str)
109 | 
110 |         if (er) {
111 |           assert.notEqual(res2.indexOf('\ufffd'), -1)
112 |         } else {
113 |           assert.equal(res1, res2)
114 |           assert.equal(res2.indexOf('\ufffd'), -1)
115 |         }
116 |       })
117 |     })
118 |   })
119 | })
120 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # mdurl
  2 | 
  3 | [![CI](https://github.com/markdown-it/mdurl/actions/workflows/ci.yml/badge.svg)](https://github.com/markdown-it/mdurl/actions/workflows/ci.yml)
  4 | [![NPM version](https://img.shields.io/npm/v/mdurl.svg?style=flat)](https://www.npmjs.org/package/mdurl)
  5 | 
  6 | > URL utilities for [markdown-it](https://github.com/markdown-it/markdown-it) parser.
  7 | 
  8 | 
  9 | ## API
 10 | 
 11 | ### .encode(str [, exclude, keepEncoded]) -> String
 12 | 
 13 | Percent-encode a string, avoiding double encoding. Don't touch `/a-zA-Z0-9/` +
 14 | excluded chars + `/%[a-fA-F0-9]{2}/` (if not disabled). Broken surrorates are
 15 | replaced with `U+FFFD`.
 16 | 
 17 | Params:
 18 | 
 19 | - __str__ - input string.
 20 | - __exclude__ - optional, `;/?:@&=+$,-_.!~*'()#`. Additional chars to keep intact
 21 |   (except `/a-zA-Z0-9/`).
 22 | - __keepEncoded__ - optional, `true`. By default it skips already encoded sequences
 23 |   (`/%[a-fA-F0-9]{2}/`). If set to `false`, `%` will be encoded.
 24 | 
 25 | 
 26 | ### encode.defaultChars, encode.componentChars
 27 | 
 28 | You can use these constants as second argument to `encode` function.
 29 | 
 30 |  - `encode.defaultChars` is the same exclude set as in the standard `encodeURI()` function
 31 |  - `encode.componentChars` is the same exclude set as in the `encodeURIComponent()` function
 32 | 
 33 | For example, `encode('something', encode.componentChars, true)` is roughly the equivalent of
 34 | the `encodeURIComponent()` function (except `encode()` doesn't throw).
 35 | 
 36 | 
 37 | ### .decode(str [, exclude]) -> String
 38 | 
 39 | Decode percent-encoded string. Invalid percent-encoded sequences (e.g. `%2G`)
 40 | are left as is. Invalid UTF-8 characters are replaced with `U+FFFD`.
 41 | 
 42 | 
 43 | Params:
 44 | 
 45 | - __str__ - input string.
 46 | - __exclude__ - set of characters to leave encoded, optional, `;/?:@&=+$,#`.
 47 | 
 48 | 
 49 | ### decode.defaultChars, decode.componentChars
 50 | 
 51 | You can use these constants as second argument to `decode` function.
 52 | 
 53 |  - `decode.defaultChars` is the same exclude set as in the standard `decodeURI()` function
 54 |  - `decode.componentChars` is the same exclude set as in the `decodeURIComponent()` function
 55 | 
 56 | For example, `decode('something', decode.defaultChars)` has the same behavior as
 57 | `decodeURI('something')` on a correctly encoded input.
 58 | 
 59 | 
 60 | ### .parse(url, slashesDenoteHost) -> urlObs
 61 | 
 62 | Parse url string. Similar to node's [url.parse](http://nodejs.org/api/url.html#url_url_parse_urlstr_parsequerystring_slashesdenotehost), but without any
 63 | normalizations and query string parse.
 64 | 
 65 |  - __url__ - input url (string)
 66 |  - __slashesDenoteHost__ - if url starts with `//`, expect a hostname after it. Optional, `false`.
 67 | 
 68 | Result (hash):
 69 | 
 70 | - protocol
 71 | - slashes
 72 | - auth
 73 | - port
 74 | - hostname
 75 | - hash
 76 | - search
 77 | - pathname
 78 | 
 79 | Difference with node's `url`:
 80 | 
 81 | 1. No leading slash in paths, e.g. in `url.parse('http://foo?bar')` pathname is
 82 |    ``, not `/`
 83 | 2. Backslashes are not replaced with slashes, so `http:\\example.org\` is
 84 |    treated like a relative path
 85 | 3. Trailing colon is treated like a part of the path, i.e. in
 86 |    `http://example.org:foo` pathname is `:foo`
 87 | 4. Nothing is URL-encoded in the resulting object, (in joyent/node some chars
 88 |    in auth and paths are encoded)
 89 | 5. `url.parse()` does not have `parseQueryString` argument
 90 | 6. Removed extraneous result properties: `host`, `path`, `query`, etc.,
 91 |    which can be constructed using other parts of the url.
 92 | 
 93 | 
 94 | ### .format(urlObject)
 95 | 
 96 | Format an object previously obtained with `.parse()` function. Similar to node's
 97 | [url.format](http://nodejs.org/api/url.html#url_url_format_urlobj).
 98 | 
 99 | 
100 | ## License
101 | 
102 | [MIT](https://github.com/markdown-it/mdurl/blob/master/LICENSE)
103 | 


--------------------------------------------------------------------------------
/lib/parse.mjs:
--------------------------------------------------------------------------------
  1 | // Copyright Joyent, Inc. and other Node contributors.
  2 | //
  3 | // Permission is hereby granted, free of charge, to any person obtaining a
  4 | // copy of this software and associated documentation files (the
  5 | // "Software"), to deal in the Software without restriction, including
  6 | // without limitation the rights to use, copy, modify, merge, publish,
  7 | // distribute, sublicense, and/or sell copies of the Software, and to permit
  8 | // persons to whom the Software is furnished to do so, subject to the
  9 | // following conditions:
 10 | //
 11 | // The above copyright notice and this permission notice shall be included
 12 | // in all copies or substantial portions of the Software.
 13 | //
 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 15 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 16 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
 17 | // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 18 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 19 | // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 20 | // USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | 
 22 | //
 23 | // Changes from joyent/node:
 24 | //
 25 | // 1. No leading slash in paths,
 26 | //    e.g. in `url.parse('http://foo?bar')` pathname is ``, not `/`
 27 | //
 28 | // 2. Backslashes are not replaced with slashes,
 29 | //    so `http:\\example.org\` is treated like a relative path
 30 | //
 31 | // 3. Trailing colon is treated like a part of the path,
 32 | //    i.e. in `http://example.org:foo` pathname is `:foo`
 33 | //
 34 | // 4. Nothing is URL-encoded in the resulting object,
 35 | //    (in joyent/node some chars in auth and paths are encoded)
 36 | //
 37 | // 5. `url.parse()` does not have `parseQueryString` argument
 38 | //
 39 | // 6. Removed extraneous result properties: `host`, `path`, `query`, etc.,
 40 | //    which can be constructed using other parts of the url.
 41 | //
 42 | 
 43 | function Url () {
 44 |   this.protocol = null
 45 |   this.slashes = null
 46 |   this.auth = null
 47 |   this.port = null
 48 |   this.hostname = null
 49 |   this.hash = null
 50 |   this.search = null
 51 |   this.pathname = null
 52 | }
 53 | 
 54 | // Reference: RFC 3986, RFC 1808, RFC 2396
 55 | 
 56 | // define these here so at least they only have to be
 57 | // compiled once on the first module load.
 58 | const protocolPattern = /^([a-z0-9.+-]+:)/i
 59 | const portPattern = /:[0-9]*$/
 60 | 
 61 | // Special case for a simple path URL
 62 | /* eslint-disable-next-line no-useless-escape */
 63 | const simplePathPattern = /^(\/\/?(?!\/)[^\?\s]*)(\?[^\s]*)?$/
 64 | 
 65 | // RFC 2396: characters reserved for delimiting URLs.
 66 | // We actually just auto-escape these.
 67 | const delims = ['<', '>', '"', '`', ' ', '\r', '\n', '\t']
 68 | 
 69 | // RFC 2396: characters not allowed for various reasons.
 70 | const unwise = ['{', '}', '|', '\\', '^', '`'].concat(delims)
 71 | 
 72 | // Allowed by RFCs, but cause of XSS attacks.  Always escape these.
 73 | const autoEscape = ['\''].concat(unwise)
 74 | // Characters that are never ever allowed in a hostname.
 75 | // Note that any invalid chars are also handled, but these
 76 | // are the ones that are *expected* to be seen, so we fast-path
 77 | // them.
 78 | const nonHostChars = ['%', '/', '?', ';', '#'].concat(autoEscape)
 79 | const hostEndingChars = ['/', '?', '#']
 80 | const hostnameMaxLen = 255
 81 | const hostnamePartPattern = /^[+a-z0-9A-Z_-]{0,63}$/
 82 | const hostnamePartStart = /^([+a-z0-9A-Z_-]{0,63})(.*)$/
 83 | // protocols that can allow "unsafe" and "unwise" chars.
 84 | // protocols that never have a hostname.
 85 | const hostlessProtocol = {
 86 |   javascript: true,
 87 |   'javascript:': true
 88 | }
 89 | // protocols that always contain a // bit.
 90 | const slashedProtocol = {
 91 |   http: true,
 92 |   https: true,
 93 |   ftp: true,
 94 |   gopher: true,
 95 |   file: true,
 96 |   'http:': true,
 97 |   'https:': true,
 98 |   'ftp:': true,
 99 |   'gopher:': true,
100 |   'file:': true
101 | }
102 | 
103 | function urlParse (url, slashesDenoteHost) {
104 |   if (url && url instanceof Url) return url
105 | 
106 |   const u = new Url()
107 |   u.parse(url, slashesDenoteHost)
108 |   return u
109 | }
110 | 
111 | Url.prototype.parse = function (url, slashesDenoteHost) {
112 |   let lowerProto, hec, slashes
113 |   let rest = url
114 | 
115 |   // trim before proceeding.
116 |   // This is to support parse stuff like "  http://foo.com  \n"
117 |   rest = rest.trim()
118 | 
119 |   if (!slashesDenoteHost && url.split('#').length === 1) {
120 |     // Try fast path regexp
121 |     const simplePath = simplePathPattern.exec(rest)
122 |     if (simplePath) {
123 |       this.pathname = simplePath[1]
124 |       if (simplePath[2]) {
125 |         this.search = simplePath[2]
126 |       }
127 |       return this
128 |     }
129 |   }
130 | 
131 |   let proto = protocolPattern.exec(rest)
132 |   if (proto) {
133 |     proto = proto[0]
134 |     lowerProto = proto.toLowerCase()
135 |     this.protocol = proto
136 |     rest = rest.substr(proto.length)
137 |   }
138 | 
139 |   // figure out if it's got a host
140 |   // user@server is *always* interpreted as a hostname, and url
141 |   // resolution will treat //foo/bar as host=foo,path=bar because that's
142 |   // how the browser resolves relative URLs.
143 |   /* eslint-disable-next-line no-useless-escape */
144 |   if (slashesDenoteHost || proto || rest.match(/^\/\/[^@\/]+@[^@\/]+/)) {
145 |     slashes = rest.substr(0, 2) === '//'
146 |     if (slashes && !(proto && hostlessProtocol[proto])) {
147 |       rest = rest.substr(2)
148 |       this.slashes = true
149 |     }
150 |   }
151 | 
152 |   if (!hostlessProtocol[proto] &&
153 |       (slashes || (proto && !slashedProtocol[proto]))) {
154 |     // there's a hostname.
155 |     // the first instance of /, ?, ;, or # ends the host.
156 |     //
157 |     // If there is an @ in the hostname, then non-host chars *are* allowed
158 |     // to the left of the last @ sign, unless some host-ending character
159 |     // comes *before* the @-sign.
160 |     // URLs are obnoxious.
161 |     //
162 |     // ex:
163 |     // http://a@b@c/ => user:a@b host:c
164 |     // http://a@b?@c => user:a host:c path:/?@c
165 | 
166 |     // v0.12 TODO(isaacs): This is not quite how Chrome does things.
167 |     // Review our test case against browsers more comprehensively.
168 | 
169 |     // find the first instance of any hostEndingChars
170 |     let hostEnd = -1
171 |     for (let i = 0; i < hostEndingChars.length; i++) {
172 |       hec = rest.indexOf(hostEndingChars[i])
173 |       if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) {
174 |         hostEnd = hec
175 |       }
176 |     }
177 | 
178 |     // at this point, either we have an explicit point where the
179 |     // auth portion cannot go past, or the last @ char is the decider.
180 |     let auth, atSign
181 |     if (hostEnd === -1) {
182 |       // atSign can be anywhere.
183 |       atSign = rest.lastIndexOf('@')
184 |     } else {
185 |       // atSign must be in auth portion.
186 |       // http://a@b/c@d => host:b auth:a path:/c@d
187 |       atSign = rest.lastIndexOf('@', hostEnd)
188 |     }
189 | 
190 |     // Now we have a portion which is definitely the auth.
191 |     // Pull that off.
192 |     if (atSign !== -1) {
193 |       auth = rest.slice(0, atSign)
194 |       rest = rest.slice(atSign + 1)
195 |       this.auth = auth
196 |     }
197 | 
198 |     // the host is the remaining to the left of the first non-host char
199 |     hostEnd = -1
200 |     for (let i = 0; i < nonHostChars.length; i++) {
201 |       hec = rest.indexOf(nonHostChars[i])
202 |       if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) {
203 |         hostEnd = hec
204 |       }
205 |     }
206 |     // if we still have not hit it, then the entire thing is a host.
207 |     if (hostEnd === -1) {
208 |       hostEnd = rest.length
209 |     }
210 | 
211 |     if (rest[hostEnd - 1] === ':') { hostEnd-- }
212 |     const host = rest.slice(0, hostEnd)
213 |     rest = rest.slice(hostEnd)
214 | 
215 |     // pull out port.
216 |     this.parseHost(host)
217 | 
218 |     // we've indicated that there is a hostname,
219 |     // so even if it's empty, it has to be present.
220 |     this.hostname = this.hostname || ''
221 | 
222 |     // if hostname begins with [ and ends with ]
223 |     // assume that it's an IPv6 address.
224 |     const ipv6Hostname = this.hostname[0] === '[' &&
225 |         this.hostname[this.hostname.length - 1] === ']'
226 | 
227 |     // validate a little.
228 |     if (!ipv6Hostname) {
229 |       const hostparts = this.hostname.split(/\./)
230 |       for (let i = 0, l = hostparts.length; i < l; i++) {
231 |         const part = hostparts[i]
232 |         if (!part) { continue }
233 |         if (!part.match(hostnamePartPattern)) {
234 |           let newpart = ''
235 |           for (let j = 0, k = part.length; j < k; j++) {
236 |             if (part.charCodeAt(j) > 127) {
237 |               // we replace non-ASCII char with a temporary placeholder
238 |               // we need this to make sure size of hostname is not
239 |               // broken by replacing non-ASCII by nothing
240 |               newpart += 'x'
241 |             } else {
242 |               newpart += part[j]
243 |             }
244 |           }
245 |           // we test again with ASCII char only
246 |           if (!newpart.match(hostnamePartPattern)) {
247 |             const validParts = hostparts.slice(0, i)
248 |             const notHost = hostparts.slice(i + 1)
249 |             const bit = part.match(hostnamePartStart)
250 |             if (bit) {
251 |               validParts.push(bit[1])
252 |               notHost.unshift(bit[2])
253 |             }
254 |             if (notHost.length) {
255 |               rest = notHost.join('.') + rest
256 |             }
257 |             this.hostname = validParts.join('.')
258 |             break
259 |           }
260 |         }
261 |       }
262 |     }
263 | 
264 |     if (this.hostname.length > hostnameMaxLen) {
265 |       this.hostname = ''
266 |     }
267 | 
268 |     // strip [ and ] from the hostname
269 |     // the host field still retains them, though
270 |     if (ipv6Hostname) {
271 |       this.hostname = this.hostname.substr(1, this.hostname.length - 2)
272 |     }
273 |   }
274 | 
275 |   // chop off from the tail first.
276 |   const hash = rest.indexOf('#')
277 |   if (hash !== -1) {
278 |     // got a fragment string.
279 |     this.hash = rest.substr(hash)
280 |     rest = rest.slice(0, hash)
281 |   }
282 |   const qm = rest.indexOf('?')
283 |   if (qm !== -1) {
284 |     this.search = rest.substr(qm)
285 |     rest = rest.slice(0, qm)
286 |   }
287 |   if (rest) { this.pathname = rest }
288 |   if (slashedProtocol[lowerProto] &&
289 |       this.hostname && !this.pathname) {
290 |     this.pathname = ''
291 |   }
292 | 
293 |   return this
294 | }
295 | 
296 | Url.prototype.parseHost = function (host) {
297 |   let port = portPattern.exec(host)
298 |   if (port) {
299 |     port = port[0]
300 |     if (port !== ':') {
301 |       this.port = port.substr(1)
302 |     }
303 |     host = host.substr(0, host.length - port.length)
304 |   }
305 |   if (host) { this.hostname = host }
306 | }
307 | 
308 | export default urlParse
309 | 


--------------------------------------------------------------------------------
/test/fixtures/url.mjs:
--------------------------------------------------------------------------------
  1 | // Copyright Joyent, Inc. and other Node contributors.
  2 | //
  3 | // Permission is hereby granted, free of charge, to any person obtaining a
  4 | // copy of this software and associated documentation files (the
  5 | // "Software"), to deal in the Software without restriction, including
  6 | // without limitation the rights to use, copy, modify, merge, publish,
  7 | // distribute, sublicense, and/or sell copies of the Software, and to permit
  8 | // persons to whom the Software is furnished to do so, subject to the
  9 | // following conditions:
 10 | //
 11 | // The above copyright notice and this permission notice shall be included
 12 | // in all copies or substantial portions of the Software.
 13 | //
 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 15 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 16 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
 17 | // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 18 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 19 | // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 20 | // USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | 
 22 | 'use strict'
 23 | 
 24 | /* eslint-disable no-script-url */
 25 | 
 26 | // URLs to parse, and expected data
 27 | // { url : parsed }
 28 | export default {
 29 |   '//some_path': {
 30 |     pathname: '//some_path'
 31 |   },
 32 | 
 33 |   'HTTP://www.example.com/': {
 34 |     protocol: 'HTTP:',
 35 |     slashes: true,
 36 |     hostname: 'www.example.com',
 37 |     pathname: '/'
 38 |   },
 39 | 
 40 |   'HTTP://www.example.com': {
 41 |     protocol: 'HTTP:',
 42 |     slashes: true,
 43 |     hostname: 'www.example.com',
 44 |     pathname: ''
 45 |   },
 46 | 
 47 |   'http://www.ExAmPlE.com/': {
 48 |     protocol: 'http:',
 49 |     slashes: true,
 50 |     hostname: 'www.ExAmPlE.com',
 51 |     pathname: '/'
 52 |   },
 53 | 
 54 |   'http://user:pw@www.ExAmPlE.com/': {
 55 |     protocol: 'http:',
 56 |     slashes: true,
 57 |     auth: 'user:pw',
 58 |     hostname: 'www.ExAmPlE.com',
 59 |     pathname: '/'
 60 |   },
 61 | 
 62 |   'http://USER:PW@www.ExAmPlE.com/': {
 63 |     protocol: 'http:',
 64 |     slashes: true,
 65 |     auth: 'USER:PW',
 66 |     hostname: 'www.ExAmPlE.com',
 67 |     pathname: '/'
 68 |   },
 69 | 
 70 |   'http://user@www.example.com/': {
 71 |     protocol: 'http:',
 72 |     slashes: true,
 73 |     auth: 'user',
 74 |     hostname: 'www.example.com',
 75 |     pathname: '/'
 76 |   },
 77 | 
 78 |   'http://user%3Apw@www.example.com/': {
 79 |     protocol: 'http:',
 80 |     slashes: true,
 81 |     auth: 'user%3Apw',
 82 |     hostname: 'www.example.com',
 83 |     pathname: '/'
 84 |   },
 85 | 
 86 |   'http://x.com/path?that\'s#all, folks': {
 87 |     protocol: 'http:',
 88 |     hostname: 'x.com',
 89 |     slashes: true,
 90 |     search: '?that\'s',
 91 |     pathname: '/path',
 92 |     hash: '#all, folks'
 93 |   },
 94 | 
 95 |   'HTTP://X.COM/Y': {
 96 |     protocol: 'HTTP:',
 97 |     slashes: true,
 98 |     hostname: 'X.COM',
 99 |     pathname: '/Y'
100 |   },
101 | 
102 |   // + not an invalid host character
103 |   // per https://url.spec.whatwg.org/#host-parsing
104 |   'http://x.y.com+a/b/c': {
105 |     protocol: 'http:',
106 |     slashes: true,
107 |     hostname: 'x.y.com+a',
108 |     pathname: '/b/c'
109 |   },
110 | 
111 |   // an unexpected invalid char in the hostname.
112 |   'HtTp://x.y.cOm;a/b/c?d=e#f g<h>i': {
113 |     protocol: 'HtTp:',
114 |     slashes: true,
115 |     hostname: 'x.y.cOm',
116 |     pathname: ';a/b/c',
117 |     search: '?d=e',
118 |     hash: '#f g<h>i'
119 |   },
120 | 
121 |   // make sure that we don't accidentally lcast the path parts.
122 |   'HtTp://x.y.cOm;A/b/c?d=e#f g<h>i': {
123 |     protocol: 'HtTp:',
124 |     slashes: true,
125 |     hostname: 'x.y.cOm',
126 |     pathname: ';A/b/c',
127 |     search: '?d=e',
128 |     hash: '#f g<h>i'
129 |   },
130 | 
131 |   'http://x...y...#p': {
132 |     protocol: 'http:',
133 |     slashes: true,
134 |     hostname: 'x...y...',
135 |     hash: '#p',
136 |     pathname: ''
137 |   },
138 | 
139 |   'http://x/p/"quoted"': {
140 |     protocol: 'http:',
141 |     slashes: true,
142 |     hostname: 'x',
143 |     pathname: '/p/"quoted"'
144 |   },
145 | 
146 |   '<http://goo.corn/bread> Is a URL!': {
147 |     pathname: '<http://goo.corn/bread> Is a URL!'
148 |   },
149 | 
150 |   'http://www.narwhaljs.org/blog/categories?id=news': {
151 |     protocol: 'http:',
152 |     slashes: true,
153 |     hostname: 'www.narwhaljs.org',
154 |     search: '?id=news',
155 |     pathname: '/blog/categories'
156 |   },
157 | 
158 |   'http://mt0.google.com/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=': {
159 |     protocol: 'http:',
160 |     slashes: true,
161 |     hostname: 'mt0.google.com',
162 |     pathname: '/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s='
163 |   },
164 | 
165 |   'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=': {
166 |     protocol: 'http:',
167 |     slashes: true,
168 |     hostname: 'mt0.google.com',
169 |     search: '???&hl=en&src=api&x=2&y=2&z=3&s=',
170 |     pathname: '/vt/lyrs=m@114'
171 |   },
172 | 
173 |   'http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=':
174 |       {
175 |         protocol: 'http:',
176 |         slashes: true,
177 |         auth: 'user:pass',
178 |         hostname: 'mt0.google.com',
179 |         search: '???&hl=en&src=api&x=2&y=2&z=3&s=',
180 |         pathname: '/vt/lyrs=m@114'
181 |       },
182 | 
183 |   'file:///etc/passwd': {
184 |     slashes: true,
185 |     protocol: 'file:',
186 |     pathname: '/etc/passwd',
187 |     hostname: ''
188 |   },
189 | 
190 |   'file://localhost/etc/passwd': {
191 |     protocol: 'file:',
192 |     slashes: true,
193 |     pathname: '/etc/passwd',
194 |     hostname: 'localhost'
195 |   },
196 | 
197 |   'file://foo/etc/passwd': {
198 |     protocol: 'file:',
199 |     slashes: true,
200 |     pathname: '/etc/passwd',
201 |     hostname: 'foo'
202 |   },
203 | 
204 |   'file:///etc/node/': {
205 |     slashes: true,
206 |     protocol: 'file:',
207 |     pathname: '/etc/node/',
208 |     hostname: ''
209 |   },
210 | 
211 |   'file://localhost/etc/node/': {
212 |     protocol: 'file:',
213 |     slashes: true,
214 |     pathname: '/etc/node/',
215 |     hostname: 'localhost'
216 |   },
217 | 
218 |   'file://foo/etc/node/': {
219 |     protocol: 'file:',
220 |     slashes: true,
221 |     pathname: '/etc/node/',
222 |     hostname: 'foo'
223 |   },
224 | 
225 |   'http:/baz/../foo/bar': {
226 |     protocol: 'http:',
227 |     pathname: '/baz/../foo/bar'
228 |   },
229 | 
230 |   'http://user:pass@example.com:8000/foo/bar?baz=quux#frag': {
231 |     protocol: 'http:',
232 |     slashes: true,
233 |     auth: 'user:pass',
234 |     port: '8000',
235 |     hostname: 'example.com',
236 |     hash: '#frag',
237 |     search: '?baz=quux',
238 |     pathname: '/foo/bar'
239 |   },
240 | 
241 |   '//user:pass@example.com:8000/foo/bar?baz=quux#frag': {
242 |     slashes: true,
243 |     auth: 'user:pass',
244 |     port: '8000',
245 |     hostname: 'example.com',
246 |     hash: '#frag',
247 |     search: '?baz=quux',
248 |     pathname: '/foo/bar'
249 |   },
250 | 
251 |   '/foo/bar?baz=quux#frag': {
252 |     hash: '#frag',
253 |     search: '?baz=quux',
254 |     pathname: '/foo/bar'
255 |   },
256 | 
257 |   'http:/foo/bar?baz=quux#frag': {
258 |     protocol: 'http:',
259 |     hash: '#frag',
260 |     search: '?baz=quux',
261 |     pathname: '/foo/bar'
262 |   },
263 | 
264 |   'mailto:foo@bar.com?subject=hello': {
265 |     protocol: 'mailto:',
266 |     auth: 'foo',
267 |     hostname: 'bar.com',
268 |     search: '?subject=hello'
269 |   },
270 | 
271 |   'javascript:alert(\'hello\');': {
272 |     protocol: 'javascript:',
273 |     pathname: 'alert(\'hello\');'
274 |   },
275 | 
276 |   'xmpp:isaacschlueter@jabber.org': {
277 |     protocol: 'xmpp:',
278 |     auth: 'isaacschlueter',
279 |     hostname: 'jabber.org'
280 |   },
281 | 
282 |   'http://atpass:foo%40bar@127.0.0.1:8080/path?search=foo#bar': {
283 |     protocol: 'http:',
284 |     slashes: true,
285 |     auth: 'atpass:foo%40bar',
286 |     hostname: '127.0.0.1',
287 |     port: '8080',
288 |     pathname: '/path',
289 |     search: '?search=foo',
290 |     hash: '#bar'
291 |   },
292 | 
293 |   'svn+ssh://foo/bar': {
294 |     hostname: 'foo',
295 |     protocol: 'svn+ssh:',
296 |     pathname: '/bar',
297 |     slashes: true
298 |   },
299 | 
300 |   'dash-test://foo/bar': {
301 |     hostname: 'foo',
302 |     protocol: 'dash-test:',
303 |     pathname: '/bar',
304 |     slashes: true
305 |   },
306 | 
307 |   'dash-test:foo/bar': {
308 |     hostname: 'foo',
309 |     protocol: 'dash-test:',
310 |     pathname: '/bar'
311 |   },
312 | 
313 |   'dot.test://foo/bar': {
314 |     hostname: 'foo',
315 |     protocol: 'dot.test:',
316 |     pathname: '/bar',
317 |     slashes: true
318 |   },
319 | 
320 |   'dot.test:foo/bar': {
321 |     hostname: 'foo',
322 |     protocol: 'dot.test:',
323 |     pathname: '/bar'
324 |   },
325 | 
326 |   // IDNA tests
327 |   'http://www.日本語.com/': {
328 |     protocol: 'http:',
329 |     slashes: true,
330 |     hostname: 'www.日本語.com',
331 |     pathname: '/'
332 |   },
333 | 
334 |   'http://example.Bücher.com/': {
335 |     protocol: 'http:',
336 |     slashes: true,
337 |     hostname: 'example.Bücher.com',
338 |     pathname: '/'
339 |   },
340 | 
341 |   'http://www.Äffchen.com/': {
342 |     protocol: 'http:',
343 |     slashes: true,
344 |     hostname: 'www.Äffchen.com',
345 |     pathname: '/'
346 |   },
347 | 
348 |   'http://www.Äffchen.cOm;A/b/c?d=e#f g<h>i': {
349 |     protocol: 'http:',
350 |     slashes: true,
351 |     hostname: 'www.Äffchen.cOm',
352 |     pathname: ';A/b/c',
353 |     search: '?d=e',
354 |     hash: '#f g<h>i'
355 |   },
356 | 
357 |   'http://SÉLIER.COM/': {
358 |     protocol: 'http:',
359 |     slashes: true,
360 |     hostname: 'SÉLIER.COM',
361 |     pathname: '/'
362 |   },
363 | 
364 |   'http://ليهمابتكلموشعربي؟.ي؟/': {
365 |     protocol: 'http:',
366 |     slashes: true,
367 |     hostname: 'ليهمابتكلموشعربي؟.ي؟',
368 |     pathname: '/'
369 |   },
370 | 
371 |   'http://➡.ws/➡': {
372 |     protocol: 'http:',
373 |     slashes: true,
374 |     hostname: '➡.ws',
375 |     pathname: '/➡'
376 |   },
377 | 
378 |   'http://bucket_name.s3.amazonaws.com/image.jpg': {
379 |     protocol: 'http:',
380 |     slashes: true,
381 |     hostname: 'bucket_name.s3.amazonaws.com',
382 |     pathname: '/image.jpg'
383 |   },
384 | 
385 |   'git+http://github.com/joyent/node.git': {
386 |     protocol: 'git+http:',
387 |     slashes: true,
388 |     hostname: 'github.com',
389 |     pathname: '/joyent/node.git'
390 |   },
391 | 
392 |   // if local1@domain1 is uses as a relative URL it may
393 |   // be parse into auth@hostname, but here there is no
394 |   // way to make it work in url.parse, I add the test to be explicit
395 |   'local1@domain1': {
396 |     pathname: 'local1@domain1'
397 |   },
398 | 
399 |   // While this may seem counter-intuitive, a browser will parse
400 |   // <a href='www.google.com'> as a path.
401 |   'www.example.com': {
402 |     pathname: 'www.example.com'
403 |   },
404 | 
405 |   // ipv6 support
406 |   '[fe80::1]': {
407 |     pathname: '[fe80::1]'
408 |   },
409 | 
410 |   'coap://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]': {
411 |     protocol: 'coap:',
412 |     slashes: true,
413 |     hostname: 'FEDC:BA98:7654:3210:FEDC:BA98:7654:3210'
414 |   },
415 | 
416 |   'coap://[1080:0:0:0:8:800:200C:417A]:61616/': {
417 |     protocol: 'coap:',
418 |     slashes: true,
419 |     port: '61616',
420 |     hostname: '1080:0:0:0:8:800:200C:417A',
421 |     pathname: '/'
422 |   },
423 | 
424 |   'http://user:password@[3ffe:2a00:100:7031::1]:8080': {
425 |     protocol: 'http:',
426 |     slashes: true,
427 |     auth: 'user:password',
428 |     port: '8080',
429 |     hostname: '3ffe:2a00:100:7031::1',
430 |     pathname: ''
431 |   },
432 | 
433 |   'coap://u:p@[::192.9.5.5]:61616/.well-known/r?n=Temperature': {
434 |     protocol: 'coap:',
435 |     slashes: true,
436 |     auth: 'u:p',
437 |     port: '61616',
438 |     hostname: '::192.9.5.5',
439 |     search: '?n=Temperature',
440 |     pathname: '/.well-known/r'
441 |   },
442 | 
443 |   // empty port
444 |   'http://example.com:': {
445 |     protocol: 'http:',
446 |     slashes: true,
447 |     hostname: 'example.com',
448 |     pathname: ':'
449 |   },
450 | 
451 |   'http://example.com:/a/b.html': {
452 |     protocol: 'http:',
453 |     slashes: true,
454 |     hostname: 'example.com',
455 |     pathname: ':/a/b.html'
456 |   },
457 | 
458 |   'http://example.com:?a=b': {
459 |     protocol: 'http:',
460 |     slashes: true,
461 |     hostname: 'example.com',
462 |     search: '?a=b',
463 |     pathname: ':'
464 |   },
465 | 
466 |   'http://example.com:#abc': {
467 |     protocol: 'http:',
468 |     slashes: true,
469 |     hostname: 'example.com',
470 |     hash: '#abc',
471 |     pathname: ':'
472 |   },
473 | 
474 |   'http://[fe80::1]:/a/b?a=b#abc': {
475 |     protocol: 'http:',
476 |     slashes: true,
477 |     hostname: 'fe80::1',
478 |     search: '?a=b',
479 |     hash: '#abc',
480 |     pathname: ':/a/b'
481 |   },
482 | 
483 |   'http://-lovemonsterz.tumblr.com/rss': {
484 |     protocol: 'http:',
485 |     slashes: true,
486 |     hostname: '-lovemonsterz.tumblr.com',
487 |     pathname: '/rss'
488 |   },
489 | 
490 |   'http://-lovemonsterz.tumblr.com:80/rss': {
491 |     protocol: 'http:',
492 |     slashes: true,
493 |     port: '80',
494 |     hostname: '-lovemonsterz.tumblr.com',
495 |     pathname: '/rss'
496 |   },
497 | 
498 |   'http://user:pass@-lovemonsterz.tumblr.com/rss': {
499 |     protocol: 'http:',
500 |     slashes: true,
501 |     auth: 'user:pass',
502 |     hostname: '-lovemonsterz.tumblr.com',
503 |     pathname: '/rss'
504 |   },
505 | 
506 |   'http://user:pass@-lovemonsterz.tumblr.com:80/rss': {
507 |     protocol: 'http:',
508 |     slashes: true,
509 |     auth: 'user:pass',
510 |     port: '80',
511 |     hostname: '-lovemonsterz.tumblr.com',
512 |     pathname: '/rss'
513 |   },
514 | 
515 |   'http://_jabber._tcp.google.com/test': {
516 |     protocol: 'http:',
517 |     slashes: true,
518 |     hostname: '_jabber._tcp.google.com',
519 |     pathname: '/test'
520 |   },
521 | 
522 |   'http://user:pass@_jabber._tcp.google.com/test': {
523 |     protocol: 'http:',
524 |     slashes: true,
525 |     auth: 'user:pass',
526 |     hostname: '_jabber._tcp.google.com',
527 |     pathname: '/test'
528 |   },
529 | 
530 |   'http://_jabber._tcp.google.com:80/test': {
531 |     protocol: 'http:',
532 |     slashes: true,
533 |     port: '80',
534 |     hostname: '_jabber._tcp.google.com',
535 |     pathname: '/test'
536 |   },
537 | 
538 |   'http://user:pass@_jabber._tcp.google.com:80/test': {
539 |     protocol: 'http:',
540 |     slashes: true,
541 |     auth: 'user:pass',
542 |     port: '80',
543 |     hostname: '_jabber._tcp.google.com',
544 |     pathname: '/test'
545 |   },
546 | 
547 |   'http://x:1/\' <>"`/{}|\\^~`/': {
548 |     protocol: 'http:',
549 |     slashes: true,
550 |     port: '1',
551 |     hostname: 'x',
552 |     pathname: '/\' <>"`/{}|\\^~`/'
553 |   },
554 | 
555 |   'http://a@b@c/': {
556 |     protocol: 'http:',
557 |     slashes: true,
558 |     auth: 'a@b',
559 |     hostname: 'c',
560 |     pathname: '/'
561 |   },
562 | 
563 |   'http://a@b?@c': {
564 |     protocol: 'http:',
565 |     slashes: true,
566 |     auth: 'a',
567 |     hostname: 'b',
568 |     pathname: '',
569 |     search: '?@c'
570 |   },
571 | 
572 |   'http://a\r" \t\n<\'b:b@c\r\nd/e?f': {
573 |     protocol: 'http:',
574 |     slashes: true,
575 |     auth: 'a\r" \t\n<\'b:b',
576 |     hostname: 'c',
577 |     search: '?f',
578 |     pathname: '\r\nd/e'
579 |   },
580 | 
581 |   // git urls used by npm
582 |   'git+ssh://git@github.com:npm/npm': {
583 |     protocol: 'git+ssh:',
584 |     slashes: true,
585 |     auth: 'git',
586 |     hostname: 'github.com',
587 |     pathname: ':npm/npm'
588 |   },
589 | 
590 |   'http://example.com?foo=bar#frag': {
591 |     protocol: 'http:',
592 |     slashes: true,
593 |     hostname: 'example.com',
594 |     hash: '#frag',
595 |     search: '?foo=bar',
596 |     pathname: ''
597 |   },
598 | 
599 |   'http://example.com?foo=@bar#frag': {
600 |     protocol: 'http:',
601 |     slashes: true,
602 |     hostname: 'example.com',
603 |     hash: '#frag',
604 |     search: '?foo=@bar',
605 |     pathname: ''
606 |   },
607 | 
608 |   'http://example.com?foo=/bar/#frag': {
609 |     protocol: 'http:',
610 |     slashes: true,
611 |     hostname: 'example.com',
612 |     hash: '#frag',
613 |     search: '?foo=/bar/',
614 |     pathname: ''
615 |   },
616 | 
617 |   'http://example.com?foo=?bar/#frag': {
618 |     protocol: 'http:',
619 |     slashes: true,
620 |     hostname: 'example.com',
621 |     hash: '#frag',
622 |     search: '?foo=?bar/',
623 |     pathname: ''
624 |   },
625 | 
626 |   'http://example.com#frag=?bar/#frag': {
627 |     protocol: 'http:',
628 |     slashes: true,
629 |     hostname: 'example.com',
630 |     hash: '#frag=?bar/#frag',
631 |     pathname: ''
632 |   },
633 | 
634 |   'http://google.com" onload="alert(42)/': {
635 |     hostname: 'google.com',
636 |     protocol: 'http:',
637 |     slashes: true,
638 |     pathname: '" onload="alert(42)/'
639 |   },
640 | 
641 |   'http://a.com/a/b/c?s#h': {
642 |     protocol: 'http:',
643 |     slashes: true,
644 |     pathname: '/a/b/c',
645 |     hostname: 'a.com',
646 |     hash: '#h',
647 |     search: '?s'
648 |   },
649 | 
650 |   'http://atpass:foo%40bar@127.0.0.1/': {
651 |     auth: 'atpass:foo%40bar',
652 |     slashes: true,
653 |     hostname: '127.0.0.1',
654 |     protocol: 'http:',
655 |     pathname: '/'
656 |   },
657 | 
658 |   'http://atslash%2F%40:%2F%40@foo/': {
659 |     auth: 'atslash%2F%40:%2F%40',
660 |     hostname: 'foo',
661 |     protocol: 'http:',
662 |     pathname: '/',
663 |     slashes: true
664 |   },
665 | 
666 |   // ipv6 support
667 |   'coap:u:p@[::1]:61616/.well-known/r?n=Temperature': {
668 |     protocol: 'coap:',
669 |     auth: 'u:p',
670 |     hostname: '::1',
671 |     port: '61616',
672 |     pathname: '/.well-known/r',
673 |     search: '?n=Temperature'
674 |   },
675 | 
676 |   'coap:[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:61616/s/stopButton': {
677 |     hostname: 'fedc:ba98:7654:3210:fedc:ba98:7654:3210',
678 |     port: '61616',
679 |     protocol: 'coap:',
680 |     pathname: '/s/stopButton'
681 |   },
682 | 
683 |   // encode context-specific delimiters in path and query, but do not touch
684 |   // other non-delimiter chars like `%`.
685 |   // <https://github.com/joyent/node/issues/4082>
686 | 
687 |   // `?` and `#` in path and search
688 |   'http://ex.com/foo%3F100%m%23r?abc=the%231?&foo=bar#frag': {
689 |     protocol: 'http:',
690 |     hostname: 'ex.com',
691 |     hash: '#frag',
692 |     search: '?abc=the%231?&foo=bar',
693 |     pathname: '/foo%3F100%m%23r',
694 |     slashes: true
695 |   },
696 | 
697 |   // `?` and `#` in search only
698 |   'http://ex.com/fooA100%mBr?abc=the%231?&foo=bar#frag': {
699 |     protocol: 'http:',
700 |     hostname: 'ex.com',
701 |     hash: '#frag',
702 |     search: '?abc=the%231?&foo=bar',
703 |     pathname: '/fooA100%mBr',
704 |     slashes: true
705 |   }
706 | }
707 | 


--------------------------------------------------------------------------------