├── .github └── workflows │ ├── build.yml │ └── docs.yml ├── .gitignore ├── LICENSE ├── README.md ├── src ├── webby.nim └── webby │ ├── httpheaders.nim │ ├── internal.nim │ ├── multipart.nim │ ├── queryparams.nim │ └── urls.nim ├── tests ├── config.nims ├── fuzz.nim ├── test.nim └── test_urls.nim └── webby.nimble /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Github Actions 2 | on: [push, pull_request] 3 | jobs: 4 | build: 5 | strategy: 6 | fail-fast: false 7 | matrix: 8 | os: [ubuntu-latest, windows-latest] 9 | nim-version: ['1.2.2', '1.2.x', '1.4.x', '1.6.x', '2.0.x', 'stable'] 10 | include: 11 | - nim-version: '1.4.x' 12 | gc_orc: true 13 | - nim-version: '1.6.x' 14 | gc_orc: true 15 | - nim-version: 'stable' 16 | do_js: true 17 | 18 | runs-on: ${{ matrix.os }} 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | - uses: jiro4989/setup-nim-action@v2 23 | with: 24 | nim-version: ${{ matrix.nim-version }} 25 | repo-token: ${{ secrets.GITHUB_TOKEN }} 26 | - run: nimble test -y 27 | - run: nimble test -d:release -y 28 | - run: nimble test --gc:orc -d:release -y 29 | if: ${{ matrix.gc_orc }} 30 | - run: nim js -r tests/test.nim 31 | if: ${{ matrix.do_js }} 32 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: docs 2 | on: 3 | push: 4 | branches: 5 | - master 6 | env: 7 | nim-version: 'stable' 8 | nim-src: src/${{ github.event.repository.name }}.nim 9 | deploy-dir: .gh-pages 10 | jobs: 11 | docs: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: jiro4989/setup-nim-action@v2 16 | with: 17 | nim-version: ${{ env.nim-version }} 18 | - run: nimble install -Y 19 | - run: nimble doc --index:on --project --git.url:https://github.com/${{ github.repository }} --git.commit:master --out:${{ env.deploy-dir }} ${{ env.nim-src }} 20 | - name: "Copy to index.html" 21 | run: cp ${{ env.deploy-dir }}/${{ github.event.repository.name }}.html ${{ env.deploy-dir }}/index.html 22 | - name: Deploy documents 23 | uses: peaceiris/actions-gh-pages@v3 24 | with: 25 | github_token: ${{ secrets.GITHUB_TOKEN }} 26 | publish_dir: ${{ env.deploy-dir }} 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore files with no extention: 2 | * 3 | !*/ 4 | !*.* 5 | 6 | # normal ignores: 7 | *.exe 8 | nimcache 9 | *.pdb 10 | *.ilk 11 | .* 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2022 Andre von Houck and Ryan Oldenburg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Webby 2 | 3 | `nimble install webby` 4 | 5 | ![Github Actions](https://github.com/treeform/webby/workflows/Github%20Actions/badge.svg) 6 | 7 | [API reference](https://treeform.github.io/webby) 8 | 9 | This library has no dependencies other than the Nim standard library. 10 | 11 | Webby is a collection of common HTTP data structures and functionality. This includes things like `Url`, `HttpHeaders` and `QueryParams`. 12 | 13 | ## URL 14 | 15 | ``` 16 | foo://admin:hunter1@example.com:8042/over/there?name=ferret#nose 17 | \_/ \___/ \_____/ \_________/ \__/\_________/ \_________/ \__/ 18 | | | | | | | | | 19 | scheme username password hostname port path query fragment 20 | ``` 21 | 22 | Use `parseUrl` to parse a URL: 23 | 24 | ```nim 25 | let url = parseUrl("foo://admin:hunter1@example.com:8042/over/there?name=ferret#nose") 26 | url.scheme == "foo" 27 | url.username == "admin" 28 | url.password == "hunter1" 29 | url.hostname == "example.com" 30 | url.port == "8042" 31 | url.path == "/over/there" 32 | url.query["name"] == "ferret" 33 | url.fragment == "nose" 34 | ``` 35 | 36 | Note that the `Url` fields are stored in decoded form: `/%6E%69%6D` becomes `/nim`. 37 | 38 | ## HTTP headers 39 | 40 | Create a collection of HTTP headers: 41 | ```nim 42 | var headers: HttpHeaders 43 | headers["Content-Type"] = "image/png" 44 | ``` 45 | 46 | Check if a header is present: 47 | ```nim 48 | if "Content-Encoding" in headers: 49 | echo headers["Content-Encoding"] 50 | ``` 51 | 52 | Iterate over the key-value pairs of headers: 53 | ```nim 54 | for (k, v) in headers: 55 | echo k, ": ", v 56 | ``` 57 | 58 | Entries are stored in the order they are added. Procs like `in`, `[]` and `[]=` are NOT case sensitive. 59 | 60 | ## Query parameters 61 | 62 | Parse a form-encoded string: 63 | ```nim 64 | let 65 | search = "name=ferret&age=12&leg=1&leg=2&leg=3&leg=4" 66 | params = parseSearch(search) 67 | ``` 68 | 69 | Create a collection of query parameters: 70 | ```nim 71 | var params: QueryParams 72 | params["hash"] = "17c6d60" 73 | ``` 74 | 75 | Check if a parameter is present: 76 | ```nim 77 | if "hash" in params: 78 | echo params["hash"] 79 | ``` 80 | 81 | Iterate over the query parameters: 82 | ```nim 83 | for (k, v) in params: 84 | echo k, ": ", v 85 | ``` 86 | 87 | Entries are stored in the order they are added. Procs like `in`, `[]` and `[]=` are case sensitive. 88 | 89 | ## Repos using Webby 90 | 91 | Some libraries using Webby include [Mummy](https://github.com/guzba/mummy), [Puppy](https://github.com/treeform/puppy) and [Curly](https://github.com/guzba/curly). 92 | -------------------------------------------------------------------------------- /src/webby.nim: -------------------------------------------------------------------------------- 1 | import webby/httpheaders, webby/queryparams, webby/multipart, webby/urls 2 | 3 | export httpheaders, queryparams, multipart, urls 4 | -------------------------------------------------------------------------------- /src/webby/httpheaders.nim: -------------------------------------------------------------------------------- 1 | import std/strutils, std/typetraits 2 | 3 | type HttpHeaders* = distinct seq[(string, string)] 4 | 5 | converter toBase*(headers: var HttpHeaders): var seq[(string, string)] = 6 | headers.distinctBase 7 | 8 | when (NimMajor, NimMinor, NimPatch) >= (1, 4, 8): 9 | converter toBase*(params: HttpHeaders): lent seq[(string, string)] = 10 | params.distinctBase 11 | else: # Older versions 12 | converter toBase*(params: HttpHeaders): seq[(string, string)] = 13 | params.distinctBase 14 | 15 | converter toWebby*(headers: seq[(string, string)]): HttpHeaders = 16 | headers.HttpHeaders 17 | 18 | proc contains*(headers: HttpHeaders, key: string): bool = 19 | ## Checks if there is at least one header for the key. Not case sensitive. 20 | for (k, v) in headers: 21 | if cmpIgnoreCase(k, key) == 0: 22 | return true 23 | 24 | proc `[]`*(headers: HttpHeaders, key: string): string = 25 | ## Returns the first header value the key. Not case sensitive. 26 | for (k, v) in headers: 27 | if cmpIgnoreCase(k, key) == 0: 28 | return v 29 | 30 | proc `[]=`*(headers: var HttpHeaders, key, value: string) = 31 | ## Adds a new header if the key is not already present. If the key is already 32 | ## present this overrides the first header value for the key. 33 | ## Not case sensitive. 34 | for i, (k, v) in headers: 35 | if cmpIgnoreCase(k, key) == 0: 36 | headers.toBase[i][1] = value 37 | return 38 | headers.add((key, value)) 39 | 40 | proc emptyHttpHeaders*(): HttpHeaders = 41 | discard 42 | -------------------------------------------------------------------------------- /src/webby/internal.nim: -------------------------------------------------------------------------------- 1 | import std/strutils 2 | 3 | type EncodeMode* = enum 4 | EncodePath, EncodePathSegment, EncodeHost, EncodeZone, EncodeUsernamePassword, 5 | EncodeQueryComponent, EncodeFragment 6 | 7 | proc shouldEscape*(c: char, mode: EncodeMode): bool = 8 | ## Return true if the specified character should be escaped when 9 | ## appearing in a URL string, according to RFC 3986. 10 | 11 | # §2.3 Unreserved characters (alphanum) 12 | if c in {'a' .. 'z', 'A' .. 'Z', '0' .. '9'}: 13 | return false 14 | 15 | if mode == EncodeHost or mode == EncodeZone: 16 | # §3.2.2 Host allows 17 | # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" 18 | # as part of reg-name. 19 | # We add : because we include :port as part of host. 20 | # We add [ ] because we include [ipv6]:port as part of host. 21 | # We add < > because they're the only characters left that 22 | # we could possibly allow, and Parse will reject them if we 23 | # escape them (because hosts can't use %-encoding for 24 | # ASCII bytes). 25 | if c in { 26 | '!', '$', '&', '\'', '(', ')', '*', '+', ',', 27 | ';', '=', ':', '[', ']', '<', '>', '"' 28 | }: 29 | return false 30 | 31 | if c in {'-', '_', '.', '~'}: 32 | # §2.3 Unreserved characters (mark) 33 | return false 34 | 35 | if c in {'$', '&', '+', ',', '/', ':', ';', '=', '?', '@'}: 36 | # §2.2 Reserved characters (reserved) 37 | case mode: 38 | of EncodePath: # §3.3 39 | # The RFC allows : @ & = + $ but saves / ; , for assigning 40 | # meaning to individual path segments. This package 41 | # only manipulates the path as a whole, so we allow those 42 | # last three as well. That leaves only ? to escape. 43 | return c == '?' 44 | 45 | of EncodePathSegment: # §3.3 46 | # The RFC allows : @ & = + $ but saves / ; , for assigning 47 | # meaning to individual path segments. 48 | return c == '/' or c == ';' or c == ',' or c == '?' 49 | 50 | of EncodeUsernamePassword: # §3.2.1 51 | # The RFC allows ';', ':', '&', '=', '+', '$', and ',' in 52 | # userinfo, so we must escape only '@', '/', and '?'. 53 | # The parsing of userinfo treats ':' as special so we must escape 54 | # that too. 55 | return c == '@' or c == '/' or c == '?' or c == ':' 56 | 57 | of EncodeQueryComponent: # §3.4 58 | # The RFC reserves (so we must escape) everything. 59 | return true 60 | 61 | of EncodeFragment: # §4.1 62 | # The RFC text is silent but the grammar allows 63 | # everything, so escape nothing. 64 | return false 65 | 66 | else: 67 | discard 68 | 69 | if mode == EncodeFragment: 70 | # RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are 71 | # included in reserved from RFC 2396 §2.2. The remaining sub-delims do not 72 | # need to be escaped. To minimize potential breakage, we apply two restrictions: 73 | # (1) we always escape sub-delims outside of the fragment, and (2) we always 74 | # escape single quote to avoid breaking callers that had previously assumed that 75 | # single quotes would be escaped. See issue #19917. 76 | if c in {'!', '(', ')', '*'}: 77 | return false 78 | 79 | # Everything else must be escaped. 80 | return true 81 | 82 | proc escape*(s: string, mode: EncodeMode): string = 83 | var 84 | spaceCount = 0 85 | hexCount = 0 86 | 87 | for c in s: 88 | if shouldEscape(c, mode): 89 | if c == ' ' and mode == EncodeQueryComponent: 90 | inc spaceCount 91 | else: 92 | inc hexCount 93 | 94 | if spaceCount == 0 and hexCount == 0: 95 | return s 96 | 97 | if hexCount == 0: 98 | result = s 99 | for c in result.mitems: 100 | if c == ' ': 101 | c = '+' 102 | return 103 | 104 | for c in s: 105 | if c == ' ' and mode == EncodeQueryComponent: 106 | result.add '+' 107 | elif shouldEscape(c, mode): 108 | result.add '%' 109 | result.add toHex(ord(c), 2) 110 | else: 111 | result.add c 112 | 113 | proc containsControlByte*(s: string): bool = 114 | for c in s: 115 | if c < ' ' or c == 0x7f.char: 116 | return true 117 | -------------------------------------------------------------------------------- /src/webby/multipart.nim: -------------------------------------------------------------------------------- 1 | import std/random, std/monotimes 2 | 3 | type 4 | MultipartEntry* = object 5 | name*: string 6 | fileName*: string 7 | contentType*: string 8 | payload*: string 9 | 10 | const chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" 11 | proc generateBoundary(): string = 12 | var rand = initRand(getMonoTime().ticks) 13 | for i in 0 ..< 32: 14 | result.add rand.sample(chars) 15 | 16 | proc encodeMultipart*(entries: seq[MultipartEntry]): (string, string) = 17 | ## Encodes MultiPartEntries and returns the Content-Type header and the body 18 | ## to use for your HTTP request. 19 | let boundary = generateBoundary() 20 | var body = "" 21 | 22 | for entry in entries: 23 | body.add "--" & boundary & "\r\n" 24 | 25 | body.add "Content-Disposition: form-data" 26 | if entry.name != "": 27 | # TODO: name must exist 28 | # TODO: name must unique 29 | # TODO: name must be ascii 30 | body.add "; name=\"" & entry.name & "\"" 31 | if entry.fileName != "": 32 | body.add "; filename=\"" & entry.fileName & "\"" 33 | body.add "\r\n" 34 | 35 | if entry.contentType != "": 36 | body.add "Content-Type: " & entry.contentType & "\r\n" 37 | 38 | body.add "\r\n" 39 | body.add entry.payload 40 | body.add "\r\n" 41 | 42 | body.add "--" & boundary & "--\r\n" 43 | 44 | return ("multipart/form-data; boundary=" & boundary, body) 45 | -------------------------------------------------------------------------------- /src/webby/queryparams.nim: -------------------------------------------------------------------------------- 1 | import std/typetraits, std/parseutils, internal 2 | 3 | type QueryParams* = distinct seq[(string, string)] 4 | 5 | converter toBase*(params: var QueryParams): var seq[(string, string)] = 6 | params.distinctBase 7 | 8 | when (NimMajor, NimMinor, NimPatch) >= (1, 4, 8): 9 | converter toBase*(params: QueryParams): lent seq[(string, string)] = 10 | params.distinctBase 11 | else: # Older versions 12 | converter toBase*(params: QueryParams): seq[(string, string)] = 13 | params.distinctBase 14 | 15 | proc encodeQueryComponent*(s: string): string = 16 | ## Similar to encodeURIComponent, however query parameter spaces should 17 | ## be +, not %20 like encodeURIComponent would encode them. 18 | ## The encoded string is in the x-www-form-urlencoded format. 19 | escape(s, EncodeQueryComponent) 20 | 21 | proc decodeQueryComponent*(s: string): string = 22 | ## Takes a string and decodes it from the x-www-form-urlencoded format. 23 | result = newStringOfCap(s.len) 24 | var i = 0 25 | while i < s.len: 26 | case s[i] 27 | of '%': 28 | # Ensure we have room for a hex value 29 | if i + 2 >= s.len: 30 | raise newException(CatchableError, "Invalid hex in form encoding") 31 | # Parse the hex value and add it to result 32 | var v: uint8 33 | if parseHex(s, v, i + 1, 2) == 0: 34 | raise newException(CatchableError, "Invalid hex in form encoding") 35 | else: 36 | result.add v.char 37 | i += 2 38 | of '+': 39 | result.add ' ' 40 | else: 41 | result.add s[i] 42 | inc i 43 | 44 | proc `[]`*(query: QueryParams, key: string): string = 45 | ## Get a key out of url.query. Returns an empty string if key is not present. 46 | ## Use a for loop to get multiple keys. 47 | for (k, v) in query.toBase: 48 | if k == key: 49 | return v 50 | 51 | proc `[]=`*(query: var QueryParams, key, value: string) = 52 | ## Sets the value for the key in url.query. If the key is not present, this 53 | ## appends a new key-value pair to the end. 54 | for pair in query.mitems: 55 | if pair[0] == key: 56 | pair[1] = value 57 | return 58 | query.add((key, value)) 59 | 60 | proc contains*(query: QueryParams, key: string): bool = 61 | ## Returns true if key is in the url.query. 62 | ## `"name" in url.query` or `"name" notin url.query` 63 | for pair in query: 64 | if pair[0] == key: 65 | return true 66 | 67 | proc add*(query: var QueryParams, params: QueryParams) = 68 | for (k, v) in params: 69 | query.add((k, v)) 70 | 71 | proc getOrDefault*(query: QueryParams, key, default: string): string = 72 | if key in query: query[key] else: default 73 | 74 | proc `$`*(query: QueryParams): string = 75 | for i, pair in query: 76 | if i > 0: 77 | result.add '&' 78 | result.add encodeQueryComponent(pair[0]) 79 | result.add '=' 80 | result.add encodeQueryComponent(pair[1]) 81 | 82 | proc emptyQueryParams*(): QueryParams = 83 | discard 84 | -------------------------------------------------------------------------------- /src/webby/urls.nim: -------------------------------------------------------------------------------- 1 | import std/parseutils, std/strutils, internal, queryparams 2 | 3 | export queryparams 4 | 5 | ## Parses URIs and URLs 6 | ## 7 | ## The following are two example URLs and their component parts:: 8 | ## 9 | ## https://admin:hunter1@example.com:8042/over/there?name=ferret#nose 10 | ## \_/ \___/ \_____/ \_________/ \__/\_________/ \_________/ \__/ 11 | ## | | | | | | | | 12 | ## scheme username password hostname port path query fragment 13 | ## 14 | 15 | type Url* = object 16 | scheme*, username*, password*: string 17 | hostname*, port*, fragment*: string 18 | opaque*, path*: string 19 | query*: QueryParams 20 | 21 | proc paths*(url: Url): seq[string] = 22 | ## Returns the path segments (path split on '/'). 23 | ## This returns the same path segments for both relative and absolute 24 | ## paths. For example: 25 | ## "/" -> @[] 26 | ## "" -> @[] 27 | ## "/a/b/c" -> @["a", "b", "c"] 28 | ## "a/b/c" -> @["a", "b", "c"] 29 | if url.path != "" and url.path != "/": 30 | result = url.path.split('/') 31 | if url.path.startsWith('/'): 32 | result.delete(0) 33 | 34 | proc encodeURIComponent*(s: string): string = 35 | ## Encodes the string the same as encodeURIComponent does in the browser. 36 | result = newStringOfCap(s.len) 37 | for c in s: 38 | case c 39 | of 'a'..'z', 'A'..'Z', '0'..'9', 40 | '-', '.', '_', '~', '!', '*', '\'', '(', ')': 41 | result.add(c) 42 | else: 43 | result.add '%' 44 | result.add toHex(ord(c), 2) 45 | 46 | proc decodeURIComponent*(s: string): string = 47 | ## Encodes the string the same as decodeURIComponent does in the browser. 48 | result = newStringOfCap(s.len) 49 | var i = 0 50 | while i < s.len: 51 | if s[i] == '%': 52 | # Ensure we have room for a hex value 53 | if i + 2 >= s.len: 54 | raise newException(CatchableError, "Invalid hex in URI component") 55 | # Parse the hex value and add it to result 56 | var v: uint8 57 | if parseHex(s, v, i + 1, 2) == 0: 58 | raise newException(CatchableError, "Invalid hex in URI component") 59 | else: 60 | result.add v.char 61 | i += 2 62 | else: 63 | result.add s[i] 64 | inc i 65 | 66 | proc encodeURI*(s: string): string = 67 | result = newStringOfCap(s.len) 68 | for c in s: 69 | case c 70 | of 'a'..'z', 'A'..'Z', '0'..'9', 71 | '-', '.', '_', '~', '!', '*', '\'', '(', ')', 72 | ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '#': 73 | result.add(c) 74 | else: 75 | result.add '%' 76 | result.add toHex(ord(c), 2) 77 | 78 | proc parseSearch*(search: string): QueryParams = 79 | ## Parses the search part into strings pairs 80 | ## "name=&age&legs=4" -> @[("name", ""), ("age", ""), ("legs", "4")] 81 | for pairStr in search.split('&'): 82 | let 83 | pair = pairStr.split('=', 1) 84 | kv = 85 | if pair.len == 2: 86 | (decodeQueryComponent(pair[0]), decodeQueryComponent(pair[1])) 87 | else: 88 | (decodeQueryComponent(pair[0]), "") 89 | result.add(kv) 90 | 91 | proc parseUrl*(s: string): Url = 92 | var s = s 93 | 94 | # Fragment 95 | let fragmentIdx = s.find('#') 96 | if fragmentIdx >= 0: 97 | var parts = s.split('#', maxsplit = 1) 98 | result.fragment = decodeURIComponent(parts[1]) 99 | s = move parts[0] 100 | 101 | if containsControlByte(s): 102 | raise newException(CatchableError, "Invalid control character in URL") 103 | 104 | if s == "*": 105 | result.path = "*" 106 | return 107 | 108 | # Scheme 109 | for i, c in s: 110 | if c in {'a' .. 'z', 'A' .. 'Z'}: 111 | discard 112 | elif c in {'0' .. '9', '+', '-', '.'}: 113 | if i == 0: 114 | break 115 | elif c == ':': 116 | if i == 0: 117 | raise newException(CatchableError, "Missing protocol scheme in URL") 118 | var parts = s.split(':', maxsplit = 1) 119 | result.scheme = toLowerAscii(parts[0]) 120 | s = move parts[1] 121 | break 122 | else: 123 | # Invalid character 124 | break 125 | 126 | # Query 127 | if '?' in s: 128 | if s[^1] == '?' and s.count('?') == 1: 129 | # result.forceQuery = true 130 | s.setLen(s.len - 1) 131 | else: 132 | var parts = s.split('?', maxsplit = 1) 133 | result.query = parseSearch(parts[1]) 134 | s = move parts[0] 135 | 136 | # Opaque 137 | if not s.startsWith('/') and result.scheme != "": 138 | # Consider rootless paths per RFC 3986 as opaque 139 | result.opaque = move s 140 | 141 | # Relative URL must not have a colon in the first path segment 142 | if ':' in s and s.find(':') < s.find('/'): 143 | raise newException( 144 | CatchableError, 145 | "First path segment in URL cannot contain colon" 146 | ) 147 | 148 | if (result.scheme != "" or not s.startsWith("///")) and s.startsWith("//"): 149 | s = s[2 .. ^1] # Trim off leading // 150 | 151 | # Authority 152 | let atIdx = s.rfind('@', last = s.find('/')) # Find last @ before any / 153 | if atIdx >= 0: 154 | var authority = s[0 ..< atIdx] 155 | s = s[atIdx + 1 .. ^1] 156 | for c in authority: # Validate 157 | if c in { 158 | 'a' .. 'z', 159 | 'A' .. 'Z', 160 | '0' .. '9', 161 | '-', '.', '_', ':', '~', '!', '$', '&', '\'', '(', ')', '*', '+', 162 | ',', ';', '=', '%', '@' 163 | }: 164 | discard 165 | else: 166 | raise newException( 167 | CatchableError, 168 | "Invalid character in URL authority" 169 | ) 170 | var parts = authority.split(':', maxsplit = 1) 171 | result.username = decodeURIComponent(parts[0]) 172 | if parts.len > 1: 173 | result.password = decodeURIComponent(parts[1]) 174 | 175 | # Host 176 | var host: string 177 | let fsIdx = s.find('/') 178 | if fsIdx >= 0: 179 | host = s[0 ..< fsIdx] 180 | s = s[fsIdx .. ^1] 181 | else: 182 | host = move s 183 | if host.startsWith('['): 184 | let closingIdx = host.find(']') 185 | if closingIdx < 0: 186 | raise newException(CatchableError, "Missing ']' in URL host") 187 | result.hostname = host[0 .. closingIdx] 188 | let zoneIdentifierIdx = result.hostname.find("%25") 189 | if zoneIdentifierIdx >= 0: 190 | var 191 | host1 = result.hostname[0 ..< zoneIdentifierIdx] 192 | host2 = result.hostname[zoneIdentifierIdx .. ^1] 193 | result.hostname = host1 & decodeURIComponent(host2) 194 | if host.len > closingIdx + 2 and host[closingIdx + 1] == ':': 195 | result.port = host[closingIdx + 2 .. ^1] 196 | else: 197 | var parts = host.rsplit(':', maxsplit = 1) 198 | result.hostname = decodeURIComponent(parts[0]) 199 | if parts.len > 1: 200 | result.port = move parts[1] 201 | for c in result.port: 202 | if c notin {'0' .. '9'}: 203 | raise newException( 204 | CatchableError, 205 | "Invalid port `" & result.port & "` after URL host" 206 | ) 207 | 208 | # Path 209 | result.path = decodeURIComponent(s) 210 | 211 | proc `$`*(url: Url): string = 212 | ## Turns Url into a string. Preserves query string param ordering. 213 | if url.scheme != "": 214 | result.add url.scheme 215 | result.add ':' 216 | if url.opaque != "": 217 | result.add url.opaque 218 | else: 219 | if url.scheme != "" or url.hostname != "" or url.port != "" or url.username != "": 220 | if url.hostname != "" or url.port != "" or url.path != "" or url.username != "": 221 | result.add "//" 222 | result.add escape(url.username, EncodeUsernamePassword) 223 | if url.password != "": 224 | result.add ':' 225 | result.add escape(url.password, EncodeUsernamePassword) 226 | if url.username != "" or url.password != "": 227 | result.add '@' 228 | if url.hostname != "": 229 | result.add escape(url.hostname, EncodeHost) 230 | if url.port != "": 231 | result.add ':' 232 | result.add url.port 233 | 234 | var encodedPath: string 235 | if url.path == "*": 236 | encodedPath = "*" # don't escape (go issue 11202) 237 | else: 238 | encodedPath = escape(url.path, EncodePath) 239 | 240 | if encodedPath != "" and encodedPath[0] != '/' and (url.hostname != "" or url.port != ""): 241 | result.add '/' 242 | 243 | if result != "": 244 | # RFC 3986 §4.2 245 | # A path segment that contains a colon character (e.g., "this:that") 246 | # cannot be used as the first segment of a relative-path reference, as 247 | # it would be mistaken for a scheme name. Such a segment must be 248 | # preceded by a dot-segment (e.g., "./this:that") to make a relative- 249 | # path reference. 250 | if ':' in encodedPath and encodedPath.find(':') < encodedPath.find('/'): 251 | result.add "./" 252 | 253 | result.add encodedPath 254 | 255 | if url.query.len > 0: 256 | result.add '?' 257 | result.add $url.query 258 | 259 | if url.fragment != "": 260 | result.add '#' 261 | result.add escape(url.fragment, EncodeFragment) 262 | -------------------------------------------------------------------------------- /tests/config.nims: -------------------------------------------------------------------------------- 1 | --path:"../src" 2 | -------------------------------------------------------------------------------- /tests/fuzz.nim: -------------------------------------------------------------------------------- 1 | import std/random, webby 2 | 3 | randomize() 4 | 5 | const iterations = 10000 6 | 7 | proc randomAsciiString(): string = 8 | let len = rand(1 .. 20) 9 | while result.len < len: 10 | let c = rand(33 .. 126).char 11 | if c in ['&', '=']: 12 | continue 13 | result.add c 14 | 15 | for i in 0 ..< iterations: 16 | var formEncoded: string 17 | for i in 0 ..< 1 + rand(10): 18 | if formEncoded.len > 0: 19 | formEncoded &= "&" 20 | let 21 | k = randomAsciiString() 22 | v = randomAsciiString() 23 | formEncoded &= encodeQueryComponent(k) & "=" & encodeQueryComponent(v) 24 | 25 | let parsed = parseSearch(formEncoded) 26 | 27 | doAssert $parsed == formEncoded 28 | 29 | for i in 0 ..< iterations: 30 | let s = randomAsciiString() 31 | 32 | try: 33 | discard decodeQueryComponent(s) 34 | except CatchableError: 35 | discard 36 | 37 | let encoded = encodeQueryComponent(s) 38 | 39 | doAssert decodeQueryComponent(encoded) == s 40 | -------------------------------------------------------------------------------- /tests/test.nim: -------------------------------------------------------------------------------- 1 | import webby, strutils 2 | 3 | block: 4 | let test = "foo://admin:hunter1@example.com:8042/over/there?name=ferret#nose" 5 | let url = parseUrl(test) 6 | doAssert url.scheme == "foo" 7 | doAssert url.username == "admin" 8 | doAssert url.password == "hunter1" 9 | doAssert url.hostname == "example.com" 10 | doAssert url.port == "8042" 11 | doAssert url.path == "/over/there" 12 | doAssert url.paths == @["over", "there"] 13 | doAssert $url.query == "name=ferret" 14 | doAssert url.query["name"] == "ferret" 15 | doAssert "name" in url.query 16 | doAssert "nothing" notin url.query 17 | doAssert url.fragment == "nose" 18 | doAssert $url == test 19 | 20 | block: 21 | let test = "/over/there?name=ferret" 22 | let url = parseUrl(test) 23 | doAssert url.scheme == "" 24 | doAssert url.username == "" 25 | doAssert url.password == "" 26 | doAssert url.hostname == "" 27 | doAssert url.port == "" 28 | doAssert url.path == "/over/there" 29 | doAssert url.paths == @["over", "there"] 30 | doAssert $url.query == "name=ferret" 31 | doAssert url.query["name"] == "ferret" 32 | doAssert url.fragment == "" 33 | doAssert $url == test 34 | 35 | block: 36 | let test = "?name=ferret&age=12&leg=1&leg=2&leg=3&leg=4" 37 | let url = parseUrl(test) 38 | doAssert url.scheme == "" 39 | doAssert url.username == "" 40 | doAssert url.password == "" 41 | doAssert url.hostname == "" 42 | doAssert url.port == "" 43 | doAssert url.path == "" 44 | doAssert url.paths == @[] 45 | doAssert $url.query == "name=ferret&age=12&leg=1&leg=2&leg=3&leg=4" 46 | doAssert url.query["name"] == "ferret" 47 | doAssert url.query["age"] == "12" 48 | doAssert url.query["leg"] == "1" 49 | doAssert "name" in url.query 50 | doAssert "age" in url.query 51 | doAssert "leg" in url.query 52 | doAssert "eye" notin url.query 53 | doAssert $url.query == "name=ferret&age=12&leg=1&leg=2&leg=3&leg=4" 54 | doAssert url.fragment == "" 55 | doAssert $url == test 56 | 57 | var i = 1 58 | for (k, v) in url.query: 59 | if k == "leg": 60 | doAssert v == $i 61 | inc i 62 | 63 | doAssert url.query["missing"] == "" 64 | 65 | block: 66 | let test = "?name=&age&legs=4" 67 | let url = parseUrl(test) 68 | doAssert $url.query == "name=&age=&legs=4" 69 | 70 | block: 71 | let test = "google.com/a/path?id=3" 72 | let url = parseUrl(test) 73 | doAssert url.path == "google.com/a/path" 74 | doAssert $url == test 75 | 76 | block: 77 | var url = Url() 78 | url.hostname = "example.com" 79 | url.query["q"] = "foo" 80 | url.fragment = "heading1" 81 | 82 | let parsed = parseUrl($url) 83 | doAssert url == parsed 84 | 85 | doAssert $url == "//example.com?q=foo#heading1" 86 | 87 | block: 88 | var url = Url() 89 | url.scheme = "https" 90 | url.hostname = "example.com" 91 | url.query["site"] = "https://nim-lang.org" 92 | url.query["https://nim-lang.org"] = "nice!!!" 93 | url.query["nothing"] = "" 94 | url.query["unicode"] = "шеллы" 95 | url.query["specials"] = "\n\t\b\r\"+&=" 96 | doAssert $url == "https://example.com?site=https%3A%2F%2Fnim-lang.org&https%3A%2F%2Fnim-lang.org=nice%21%21%21¬hing=&unicode=%D1%88%D0%B5%D0%BB%D0%BB%D1%8B&specials=%0A%09%08%0D%22%2B%26%3D" 97 | doAssert $parseUrl($url) == $url 98 | 99 | block: 100 | let test = "http://localhost:8080/p2/foo+and+other+stuff" 101 | let url = parseUrl(test) 102 | doAssert $url == "http://localhost:8080/p2/foo+and+other+stuff" 103 | 104 | block: 105 | let test = "http://localhost:8080/p2/foo%2Band%2Bother%2Bstuff" 106 | let url = parseUrl(test) 107 | doAssert $url == "http://localhost:8080/p2/foo+and+other+stuff" 108 | 109 | block: 110 | let test = "http://localhost:8080/p2/foo%2Fand%2Fother%2Fstuff" 111 | let url = parseUrl(test) 112 | doAssert $url == "http://localhost:8080/p2/foo/and/other/stuff" 113 | 114 | block: 115 | let test = "http://localhost:8080/p2/#foo%2Band%2Bother%2Bstuff" 116 | let url = parseUrl(test) 117 | doAssert $url == "http://localhost:8080/p2/#foo+and+other+stuff" 118 | 119 | block: 120 | let test = "name=&age&legs=4" 121 | let url = parseUrl(test) 122 | doAssert url.path == "name=&age&legs=4" 123 | 124 | block: 125 | let test = "name=&age&legs=4&&&" 126 | let url = parseUrl(test) 127 | doAssert url.path == "name=&age&legs=4&&&" 128 | 129 | block: 130 | doAssert encodeURIComponent("-._~!*'()") == "-._~!*'()" 131 | doAssert decodeURIComponent("-._~!*'()") == "-._~!*'()" 132 | 133 | block: 134 | # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI 135 | 136 | let 137 | set1 = ";/?:@&=+$,#" # Reserved Characters 138 | set2 = "-.!~*'()" # Unreserved Marks 139 | set3 = "ABC abc 123" # Alphanumeric Characters + Space 140 | 141 | doAssert encodeURI(set1) == ";/?:@&=+$,#" 142 | doAssert encodeURI(set2) == "-.!~*'()" 143 | doAssert encodeURI(set3) == "ABC%20abc%20123" # (the space gets encoded as %20) 144 | 145 | doAssert encodeURIComponent(set1) == "%3B%2F%3F%3A%40%26%3D%2B%24%2C%23" 146 | doAssert encodeURIComponent(set2) == "-.!~*'()" 147 | doAssert encodeURIComponent(set3) == "ABC%20abc%20123" # (the space gets encoded as %20) 148 | 149 | doAssert encodeQueryComponent(" ?&=#+%!<>#\"{}|\\^[]`☺\t:/@$'()*,;") == "+%3F%26%3D%23%2B%25%21%3C%3E%23%22%7B%7D%7C%5C%5E%5B%5D%60%E2%98%BA%09%3A%2F%40%24%27%28%29%2A%2C%3B" 150 | 151 | block: 152 | let test = "?url=1&two=2" 153 | let url = parseUrl(test) 154 | doAssert url.path == "" 155 | doAssert url.paths == @[] 156 | doAssert $url.query == "url=1&two=2" 157 | 158 | block: 159 | var url: Url 160 | url.path = "/a/b/c" 161 | doAssert url.paths == @["a", "b", "c"] 162 | 163 | block: 164 | var url: Url 165 | url.path = "/a/b/c/" 166 | doAssert url.paths == @["a", "b", "c", ""] 167 | 168 | block: 169 | var url: Url 170 | url.path = "a/b/c" 171 | doAssert url.paths == @["a", "b", "c"] 172 | url.path = "" 173 | doAssert url.paths == @[] 174 | 175 | block: 176 | var url: Url 177 | url.path = "/" 178 | echo url.paths 179 | doAssert url.paths == @[] 180 | 181 | block: 182 | var url: Url 183 | url.path = "//" 184 | echo url.paths 185 | doAssert url.paths == @["", ""] 186 | 187 | block: 188 | let url = parseUrl("?param=?") 189 | doAssert $url.query == "param=%3F" 190 | 191 | block: 192 | let targetId = 399 193 | 194 | var url: Url 195 | url.scheme = "https" 196 | url.hostname = "ssd.jpl.nasa.gov" 197 | url.path = "/api/horizons.api" 198 | url.query["format"] = "text" 199 | url.query["COMMAND"] = "'" & $targetId & "'" 200 | url.query["STEP_SIZE"] = "'1 d'" 201 | 202 | doAssert $url == "https://ssd.jpl.nasa.gov/api/horizons.api?format=text&COMMAND=%27399%27&STEP_SIZE=%271+d%27" 203 | 204 | block: 205 | doAssertRaises CatchableError: 206 | discard parseUrl("/abc%ghi/?param=cde%hij#def%ijk") 207 | 208 | block: 209 | doAssertRaises CatchableError: 210 | discard parseUrl("https://site.com/%yy") 211 | 212 | block: 213 | var entries: seq[MultipartEntry] 214 | entries.add MultipartEntry( 215 | name: "input_text", 216 | fileName: "input.txt", 217 | contentType: "text/plain", 218 | payload: "foobar" 219 | ) 220 | entries.add MultipartEntry( 221 | name: "options", 222 | payload: "{\"utf8\":true}" 223 | ) 224 | let (contentType, body) = encodeMultipart(entries) 225 | 226 | doAssert contentType.startsWith("multipart/form-data; boundary=") 227 | let boundary = contentType[30 .. ^1] 228 | doAssert body.replace(boundary, "QQQ") == "--QQQ\r\nContent-Disposition: form-data; name=\"input_text\"; filename=\"input.txt\"\r\nContent-Type: text/plain\r\n\r\nfoobar\r\n--QQQ\r\nContent-Disposition: form-data; name=\"options\"\r\n\r\n{\"utf8\":true}\r\n--QQQ--\r\n" 229 | 230 | block: 231 | let url = "http://site.com#a#frag#ment" 232 | doAssert parseUrl(url).fragment == "a#frag#ment" 233 | 234 | block: 235 | let test = "https://www.elyrics.net/read/0-9/$tupid-young,-nocap-&-steelz-lyrics/i-can_t-change-lyrics.html" 236 | let url = parseUrl(test) 237 | doAssert url.query.len == 0 238 | 239 | block: 240 | let test = "https://website.com/%z" 241 | doAssertRaises CatchableError: 242 | discard parseUrl(test) 243 | 244 | block: # https://github.com/treeform/puppy/issues/100 245 | let test = "https://www.**.com/list=1,2" 246 | let url = parseUrl(test) 247 | doAssert $url == test 248 | -------------------------------------------------------------------------------- /tests/test_urls.nim: -------------------------------------------------------------------------------- 1 | import webby, webby/internal 2 | 3 | # Based on https://cs.opensource.google/go/go/+/refs/tags/go1.21.6:src/net/url/url_test.go 4 | 5 | type UrlTest = object 6 | input: string 7 | expected: Url # expected parse 8 | roundtrip: string # expected result of reserializing the URL; empty means same as `input` 9 | 10 | var urlTests: seq[UrlTest] 11 | urlTests.add(UrlTest( # no path 12 | input: "http://www.google.com", 13 | expected: Url(scheme: "http", hostname: "www.google.com") 14 | )) 15 | urlTests.add(UrlTest( # path 16 | input: "http://www.google.com/", 17 | expected: Url(scheme: "http", hostname: "www.google.com", path: "/") 18 | )) 19 | urlTests.add(UrlTest( # path with hex escaping 20 | input: "http://www.google.com/file%20one%26two", 21 | expected: Url(scheme: "http", hostname: "www.google.com", path: "/file one&two"), 22 | roundtrip: "http://www.google.com/file%20one&two" 23 | )) 24 | urlTests.add(UrlTest( # fragment with hex escaping 25 | input: "http://www.google.com/#file%20one%26two", 26 | expected: Url(scheme: "http", hostname: "www.google.com", path: "/", fragment: "file one&two"), 27 | roundtrip: "http://www.google.com/#file%20one&two" 28 | )) 29 | urlTests.add(UrlTest( # user 30 | input: "ftp://webmaster@www.google.com/", 31 | expected: Url(scheme: "ftp", username: "webmaster", hostname: "www.google.com", path: "/") 32 | )) 33 | urlTests.add(UrlTest( # escape sequence in username 34 | input: "ftp://john%20doe@www.google.com/", 35 | expected: Url(scheme: "ftp", username: "john doe", hostname: "www.google.com", path: "/") 36 | )) 37 | urlTests.add(UrlTest( # empty query 38 | input: "http://www.google.com/?", 39 | expected: Url(scheme: "http", hostname: "www.google.com", path: "/"), 40 | roundtrip: "http://www.google.com/" 41 | )) 42 | urlTests.add(UrlTest( # query ending in question mark 43 | input: "http://www.google.com/?foo=bar?", 44 | expected: Url(scheme: "http", hostname: "www.google.com", path: "/", query: @[("foo", "bar?")].QueryParams), 45 | roundtrip: "http://www.google.com/?foo=bar%3F" 46 | )) 47 | urlTests.add(UrlTest( # query 48 | input: "http://www.google.com/?q=go+language", 49 | expected: Url(scheme: "http", hostname: "www.google.com", path: "/", query: @[("q", "go language")].QueryParams) 50 | )) 51 | urlTests.add(UrlTest( # %20 outside query 52 | input: "http://www.google.com/a%20b?q=c+d", 53 | expected: Url(scheme: "http", hostname: "www.google.com", path: "/a b", query: @[("q", "c d")].QueryParams) 54 | )) 55 | urlTests.add(UrlTest( # path without leading /, so no parsing 56 | input: "http:www.google.com/?q=go+language", 57 | expected: Url(scheme: "http", opaque: "www.google.com/", query: @[("q", "go language")].QueryParams) 58 | )) 59 | urlTests.add(UrlTest( # path without leading /, so no parsing 60 | input: "http:%2f%2fwww.google.com/?q=go+language", 61 | expected: Url(scheme: "http", opaque: "%2f%2fwww.google.com/", query: @[("q", "go language")].QueryParams) 62 | )) 63 | # urlTests.add(UrlTest( # non-authority with path; see golang.org/issue/46059 64 | # input: "mailto:/webmaster@golang.org", 65 | # expected: Url(scheme: "mailto", path: "/webmaster@golang.org") 66 | # )) 67 | urlTests.add(UrlTest( # non-authority 68 | input: "mailto:webmaster@golang.org", 69 | expected: Url(scheme: "mailto", opaque: "webmaster@golang.org") 70 | )) 71 | urlTests.add(UrlTest( # unescaped :// in query should not create a scheme 72 | input: "/foo?query=http://bad", 73 | expected: Url(path: "/foo", query: @[("query", "http://bad")].QueryParams), 74 | roundtrip: "/foo?query=http%3A%2F%2Fbad" 75 | )) 76 | urlTests.add(UrlTest( # leading // without scheme should create an authority 77 | input: "//foo", 78 | expected: Url(hostname: "foo") 79 | )) 80 | urlTests.add(UrlTest( # leading // without scheme, with userinfo, path, and query 81 | input: "//user@foo/path?a=b", 82 | expected: Url(username: "user", hostname: "foo", path: "/path", query: @[("a", "b")].QueryParams) 83 | )) 84 | urlTests.add(UrlTest( # three leading slashes isn't an authority, but doesn't return an error 85 | input: "///threeslashes", 86 | expected: Url(path: "///threeslashes") 87 | )) 88 | urlTests.add(UrlTest( 89 | input: "http://user:password@google.com", 90 | expected: Url(scheme: "http", username: "user", password: "password", hostname: "google.com") 91 | )) 92 | urlTests.add(UrlTest( # unescaped @ in username should not confuse host 93 | input: "http://j@ne:password@google.com", 94 | expected: Url(scheme: "http", username: "j@ne", password: "password", hostname: "google.com"), 95 | roundtrip: "http://j%40ne:password@google.com" 96 | )) 97 | urlTests.add(UrlTest( # unescaped @ in password should not confuse host 98 | input: "http://jane:p@ssword@google.com", 99 | expected: Url(scheme: "http", username: "jane", password: "p@ssword", hostname: "google.com"), 100 | roundtrip: "http://jane:p%40ssword@google.com" 101 | )) 102 | urlTests.add(UrlTest( 103 | input: "http://j@ne:password@google.com/p@th?q=@go", 104 | expected: Url(scheme: "http", username: "j@ne", password: "password", hostname: "google.com", path: "/p@th", query: @[("q", "@go")].QueryParams), 105 | roundtrip: "http://j%40ne:password@google.com/p@th?q=%40go" 106 | )) 107 | urlTests.add(UrlTest( 108 | input: "http://www.google.com/?q=go+language#foo", 109 | expected: Url(scheme: "http", hostname: "www.google.com", path: "/", query: @[("q", "go language")].QueryParams, fragment: "foo") 110 | )) 111 | urlTests.add(UrlTest( 112 | input: "http://www.google.com/?q=go+language#foo&bar", 113 | expected: Url(scheme: "http", hostname: "www.google.com", path: "/", query: @[("q", "go language")].QueryParams, fragment: "foo&bar"), 114 | )) 115 | urlTests.add(UrlTest( 116 | input: "http://www.google.com/?q=go+language#foo%26bar", 117 | expected: Url(scheme: "http", hostname: "www.google.com", path: "/", query: @[("q", "go language")].QueryParams, fragment: "foo&bar"), 118 | roundtrip: "http://www.google.com/?q=go+language#foo&bar" 119 | )) 120 | urlTests.add(UrlTest( 121 | input: "file:///home/adg/rabbits", 122 | expected: Url(scheme: "file", path: "/home/adg/rabbits") 123 | )) 124 | urlTests.add(UrlTest( # Windows paths are no exception to the rule. See golang.org/issue/6027, especially comment #9. 125 | input: "file:///C:/FooBar/Baz.txt", 126 | expected: Url(scheme: "file", path: "/C:/FooBar/Baz.txt") 127 | )) 128 | urlTests.add(UrlTest( # case-insensitive scheme 129 | input: "MaIlTo:webmaster@golang.org", 130 | expected: Url(scheme: "mailto", opaque: "webmaster@golang.org"), 131 | roundtrip: "mailto:webmaster@golang.org" 132 | )) 133 | urlTests.add(UrlTest( # relative path 134 | input: "a/b/c", 135 | expected: Url(path: "a/b/c") 136 | )) 137 | urlTests.add(UrlTest( # escaped '?' in username and password 138 | input: "http://%3Fam:pa%3Fsword@google.com", 139 | expected: Url(scheme: "http", username: "?am", password: "pa?sword", hostname: "google.com") 140 | )) 141 | urlTests.add(UrlTest( # host subcomponent; IPv4 address in RFC 3986 142 | input: "http://192.168.0.1/", 143 | expected: Url(scheme: "http", hostname: "192.168.0.1", path: "/") 144 | )) 145 | urlTests.add(UrlTest( # host and port subcomponents; IPv4 address in RFC 3986 146 | input: "http://192.168.0.1:8080/", 147 | expected: Url(scheme: "http", hostname: "192.168.0.1", port: "8080", path: "/") 148 | )) 149 | urlTests.add(UrlTest( # host subcomponent; IPv6 address in RFC 3986 150 | input: "http://[fe80::1]/", 151 | expected: Url(scheme: "http", hostname: "[fe80::1]", path: "/") 152 | )) 153 | urlTests.add(UrlTest( # host and port subcomponents; IPv6 address in RFC 3986 154 | input: "http://[fe80::1]:8080/", 155 | expected: Url(scheme: "http", hostname: "[fe80::1]", port: "8080", path: "/") 156 | )) 157 | urlTests.add(UrlTest( # host subcomponent; IPv6 address with zone identifier in RFC 6874 158 | input: "http://[fe80::1%25en0]/", 159 | expected: Url(scheme: "http", hostname: "[fe80::1%en0]", path: "/") 160 | )) 161 | urlTests.add(UrlTest( # host and port subcomponents; IPv6 address with zone identifier in RFC 6874 162 | input: "http://[fe80::1%25en0]:8080/", 163 | expected: Url(scheme: "http", hostname: "[fe80::1%en0]", port: "8080", path: "/") 164 | )) 165 | urlTests.add(UrlTest( # host subcomponent; IPv6 address with zone identifier in RFC 6874 166 | input: "http://[fe80::1%25%65%6e%301-._~]/", # percent-encoded+unreserved zone identifier 167 | expected: Url(scheme: "http", hostname: "[fe80::1%en01-._~]", path: "/"), 168 | roundtrip: "http://[fe80::1%25en01-._~]/" 169 | )) 170 | urlTests.add(UrlTest( # host subcomponent; IPv6 address with zone identifier in RFC 6874 171 | input: "http://[fe80::1%25%65%6e%301-._~]:8080/", # percent-encoded+unreserved zone identifier 172 | expected: Url(scheme: "http", hostname: "[fe80::1%en01-._~]", port: "8080", path: "/"), 173 | roundtrip: "http://[fe80::1%25en01-._~]:8080/" 174 | )) 175 | urlTests.add(UrlTest( # alternate escapings of path survive round trip 176 | input: "http://rest.rsc.io/foo%2fbar/baz%2Fquux?alt=media", 177 | expected: Url(scheme: "http", hostname: "rest.rsc.io", path: "/foo/bar/baz/quux", query: @[("alt", "media")].QueryParams), 178 | roundtrip: "http://rest.rsc.io/foo/bar/baz/quux?alt=media" 179 | )) 180 | urlTests.add(UrlTest( # go issue 12036 181 | input: "mysql://a,b,c/bar", 182 | expected: Url(scheme: "mysql", hostname: "a,b,c", path: "/bar") 183 | )) 184 | urlTests.add(UrlTest( # worst case host, still round trips 185 | input: "scheme://!$&'()*+,;=hello!:1/path", 186 | expected: Url(scheme: "scheme", hostname: "!$&'()*+,;=hello!", port: "1", path: "/path") 187 | )) 188 | urlTests.add(UrlTest( # worst case path, still round trips 189 | input: "http://host/!$&'()*+,;=:@[hello]", 190 | expected: Url(scheme: "http", hostname: "host", path: "/!$&'()*+,;=:@[hello]"), 191 | roundtrip: "http://host/%21$&%27%28%29%2A+,;=:@%5Bhello%5D" 192 | )) 193 | urlTests.add(UrlTest( # golang.org/issue/5684 194 | input: "http://example.com/oid/[order_id]", 195 | expected: Url(scheme: "http", hostname: "example.com", path: "/oid/[order_id]"), 196 | roundtrip: "http://example.com/oid/%5Border_id%5D" 197 | )) 198 | urlTests.add(UrlTest( 199 | input: "http://192.168.0.2:8080/foo", 200 | expected: Url(scheme: "http", hostname: "192.168.0.2", port: "8080", path: "/foo") 201 | )) 202 | urlTests.add(UrlTest( # golang.org/issue/12200 (colon with empty port) 203 | input: "http://192.168.0.2:/foo", 204 | expected: Url(scheme: "http", hostname: "192.168.0.2", path: "/foo"), 205 | roundtrip: "http://192.168.0.2/foo" 206 | )) 207 | urlTests.add(UrlTest( # malformed IPv6 but still accepted. 208 | input: "http://2b01:e34:ef40:7730:8e70:5aff:fefe:edac:8080/foo", 209 | expected: Url(scheme: "http", hostname: "2b01:e34:ef40:7730:8e70:5aff:fefe:edac", port: "8080", path: "/foo") 210 | )) 211 | urlTests.add(UrlTest( # malformed IPv6 but still accepted. 212 | input: "http://2b01:e34:ef40:7730:8e70:5aff:fefe:edac:/foo", 213 | expected: Url(scheme: "http", hostname: "2b01:e34:ef40:7730:8e70:5aff:fefe:edac", path: "/foo"), 214 | roundtrip: "http://2b01:e34:ef40:7730:8e70:5aff:fefe:edac/foo" 215 | )) 216 | urlTests.add(UrlTest( 217 | input: "http://[2b01:e34:ef40:7730:8e70:5aff:fefe:edac]:8080/foo", 218 | expected: Url(scheme: "http", hostname: "[2b01:e34:ef40:7730:8e70:5aff:fefe:edac]", port: "8080", path: "/foo") 219 | )) 220 | urlTests.add(UrlTest( 221 | input: "http://[2b01:e34:ef40:7730:8e70:5aff:fefe:edac]:/foo", 222 | expected: Url(scheme: "http", hostname: "[2b01:e34:ef40:7730:8e70:5aff:fefe:edac]", path: "/foo"), 223 | roundtrip: "http://[2b01:e34:ef40:7730:8e70:5aff:fefe:edac]/foo" 224 | )) 225 | urlTests.add(UrlTest( # golang.org/issue/7991 and golang.org/issue/12719 (non-ascii %-encoded in host) 226 | input: "http://hello.世界.com/foo", 227 | expected: Url(scheme: "http", hostname: "hello.世界.com", path: "/foo"), 228 | roundtrip: "http://hello.%E4%B8%96%E7%95%8C.com/foo" 229 | )) 230 | urlTests.add(UrlTest( 231 | input: "http://hello.%e4%b8%96%e7%95%8c.com/foo", 232 | expected: Url(scheme: "http", hostname: "hello.世界.com", path: "/foo"), 233 | roundtrip: "http://hello.%E4%B8%96%E7%95%8C.com/foo" 234 | )) 235 | urlTests.add(UrlTest( 236 | input: "http://hello.%E4%B8%96%E7%95%8C.com/foo", 237 | expected: Url(scheme: "http", hostname: "hello.世界.com", path: "/foo") 238 | )) 239 | urlTests.add(UrlTest( # golang.org/issue/10433 (path beginning with //) 240 | input: "http://example.com//foo", 241 | expected: Url(scheme: "http", hostname: "example.com", path: "//foo") 242 | )) 243 | urlTests.add(UrlTest( # test that we can reparse the host names we accept. 244 | input: "myscheme://authority<\"hi\">/foo", 245 | expected: Url(scheme: "myscheme", hostname: "authority<\"hi\">", path: "/foo") 246 | )) 247 | # Spaces in hosts are disallowed but escaped spaces in IPv6 scope IDs are grudgingly OK. 248 | # This happens on Windows. golang.org/issue/14002 249 | urlTests.add(UrlTest( 250 | input: "tcp://[2020::2020:20:2020:2020%25Windows%20Loves%20Spaces]:2020", 251 | expected: Url(scheme: "tcp", hostname: "[2020::2020:20:2020:2020%Windows Loves Spaces]", port: "2020") 252 | )) 253 | urlTests.add(UrlTest( # test we can roundtrip magnet url https://golang.org/issue/20054 254 | input: "magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a", 255 | expected: Url(scheme: "magnet", query: @[("xt", "urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a")].QueryParams), 256 | roundtrip: "magnet:?xt=urn%3Abtih%3Ac12fe1c06bba254a9dc9f519b335aa7c1367a88a" 257 | )) 258 | urlTests.add(UrlTest( 259 | input: "mailto:?subject=hi", 260 | expected: Url(scheme: "mailto", query: @[("subject", "hi")].QueryParams) 261 | )) 262 | 263 | for urlTest in urlTests: 264 | # echo "input = ", urlTest.input 265 | let parsed = parseUrl(urlTest.input) 266 | # echo "parsed = ", parsed 267 | # echo "expected = ", urlTest.expected 268 | doAssert parsed == urlTest.expected 269 | if urlTest.roundtrip == "": 270 | doAssert $parsed == urlTest.input 271 | else: 272 | doAssert $parsed == urlTest.roundtrip 273 | # echo "----" 274 | 275 | ### 276 | 277 | const pathThatLooksSchemeRelative = "//not.a.user@not.a.host/just/a/path" 278 | 279 | var parseRequestUrlTests = newSeq[(string, bool)]() 280 | parseRequestUrlTests.add(("http://foo.com", true)) 281 | parseRequestUrlTests.add(("http://foo.com/", true)) 282 | parseRequestUrlTests.add(("http://foo.com/path", true)) 283 | parseRequestUrlTests.add(("/", true)) 284 | parseRequestUrlTests.add((pathThatLooksSchemeRelative, true)) 285 | parseRequestUrlTests.add(("//not.a.user@%66%6f%6f.com/just/a/path/also", true)) 286 | parseRequestUrlTests.add(("*", true)) 287 | parseRequestUrlTests.add(("http://192.168.0.1/", true)) 288 | parseRequestUrlTests.add(("http://192.168.0.1:8080/", true)) 289 | parseRequestUrlTests.add(("http://[fe80::1]/", true)) 290 | parseRequestUrlTests.add(("http://[fe80::1]:8080/", true)) 291 | 292 | # Tests exercising RFC 6874 compliance: 293 | parseRequestUrlTests.add(("http://[fe80::1%25en0]/", true)) # with alphanum zone identifier 294 | parseRequestUrlTests.add(("http://[fe80::1%25en0]:8080/", true)) # with alphanum zone identifier 295 | parseRequestUrlTests.add(("http://[fe80::1%25%65%6e%301-._~]/", true)) # with percent-encoded+unreserved zone identifier 296 | parseRequestUrlTests.add(("http://[fe80::1%25%65%6e%301-._~]:8080/", true)) # with percent-encoded+unreserved zone identifier 297 | 298 | # parseRequestUrlTests.add(("foo.html", false)) 299 | # parseRequestUrlTests.add(("../dir/", false)) 300 | # parseRequestUrlTests.add((" http://foo.com", false)) 301 | # parseRequestUrlTests.add(("http://192.168.0.%31/", false)) 302 | # parseRequestUrlTests.add(("http://192.168.0.%31:8080/", false)) 303 | # parseRequestUrlTests.add(("http://[fe80::%31]/", false)) 304 | # parseRequestUrlTests.add(("http://[fe80::%31]:8080/", false)) 305 | # parseRequestUrlTests.add(("http://[fe80::%31%25en0]/", false)) 306 | # parseRequestUrlTests.add(("http://[fe80::%31%25en0]:8080/", false)) 307 | 308 | # These two cases are valid as textual representations as 309 | # described in RFC 4007, but are not valid as address 310 | # literals with IPv6 zone identifiers in URIs as described in 311 | # RFC 6874. 312 | # parseRequestUrlTests.add(("http://[fe80::1%en0]/", false)) 313 | # parseRequestUrlTests.add(("http://[fe80::1%en0]:8080/", false)) 314 | 315 | for (url, expectedValid) in parseRequestUrlTests: 316 | if expectedValid: 317 | discard parseUrl(url) 318 | else: 319 | doAssertRaises CatchableError: 320 | discard parseUrl(url) 321 | 322 | ### 323 | 324 | doAssert escape(" ?&=#+%!<>#\"{}|\\^[]`☺\t:/@$'()*,;", EncodePathSegment) == 325 | "%20%3F&=%23+%25%21%3C%3E%23%22%7B%7D%7C%5C%5E%5B%5D%60%E2%98%BA%09:%2F@$%27%28%29%2A%2C%3B" 326 | -------------------------------------------------------------------------------- /webby.nimble: -------------------------------------------------------------------------------- 1 | version = "0.2.1" 2 | author = "Andre von Houck and Ryan Oldenburg" 3 | description = "Common HTTP data structures and functionality" 4 | license = "MIT" 5 | 6 | srcDir = "src" 7 | 8 | requires "nim >= 1.2.2" 9 | --------------------------------------------------------------------------------