├── .gitignore
├── LICENSE.txt
├── README.md
├── dub.json
├── dub.selections.json
└── source
    └── url.d


/.gitignore:
--------------------------------------------------------------------------------
1 | .dub/
2 | __test__library__
3 | docs.json
4 | docs/
5 | __dummy.html
6 | liburld.a
7 | *.swp
8 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 Christopher Wright
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
 4 | associated documentation files (the "Software"), to deal in the Software without restriction,
 5 | including without limitation the rights to use, copy, modify, merge, publish, distribute,
 6 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
 7 | furnished to do so, subject to the following conditions:
 8 | 
 9 | The above copyright notice and this permission notice shall be included in all copies or substantial
10 | portions of the Software.
11 | 
12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
13 | NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
14 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
15 | OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # urld
 2 | URL handling for D
 3 | 
 4 | # Motivation
 5 | D's standard library has nothing for working with URLs.
 6 | 
 7 | Vibe.d can work with URLs. However, Vibe is big. Also, we want to work easily with query strings,
 8 | which vibe.d doesn't allow.
 9 | 
10 | # Recent Breaking Changes
11 | 
12 | - v3.0:
13 | 	* urld does no automatic URL decoding
14 | 	* urld only automatically encodes non-ASCII characters
15 | 	* `URL(string)` constructor parses a URL rather than assigning the scheme only
16 | 
17 | 
18 | # Installation
19 | Add `"urld": "~>3.0.0"` to your `dub.json`.
20 | 
21 | # Usage
22 | 
23 | Parse a URL:
24 | 
25 | ```D
26 | auto url = "ircs://irc.freenode.com/#d".parseURL;
27 | auto url = URL("ircs://irc.freenode.com/#d");
28 | ```
29 | 
30 | Construct one from scratch, laboriously:
31 | 
32 | ```D
33 | URL url;
34 | with (url) {
35 | 	scheme = "soap.beep";
36 | 	host = "beep.example.net";
37 | 	port = 1772;
38 | 	path = "/serverinfo/info";
39 |   queryParams.add("token", "my-api-token");
40 | }
41 | curl.get(url);
42 | ```
43 | 
44 | Unicode domain names:
45 | 
46 | ```D
47 | auto url = "http://☃.com/".parseURL;
48 | writeln(url.toString);               // http://xn--n3h.com/
49 | writeln(url.toHumanReadableString);  // http://☃.com/
50 | ```
51 | 
52 | Implicit conversion to strings for use with other libraries that expect URLs as strings:
53 | 
54 | ```D
55 | import std.net.curl;
56 | auto couchdbURL = "http://couch.local:8815".parseURL;
57 | writeln(get(couchdbURL ~ "users/bob.dobbs@subgenius.org"));
58 | ```
59 | 
60 | Autodetect ports:
61 | 
62 | ```D
63 | assert(parseURL("http://example.org").port == 80);
64 | assert(parseURL("http://example.org:5326").port == 5326);
65 | ```
66 | 
67 | URLs of maximum complexity:
68 | 
69 | ```D
70 | auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment");
71 | assert(url.scheme == "redis");
72 | assert(url.user == "admin");
73 | assert(url.pass == "password");
74 | // etc
75 | ```
76 | 
77 | URLs of minimum complexity:
78 | 
79 | ```D
80 | assert(parseURL("example.org").toString == "http://example.org/");
81 | ```
82 | 
83 | Canonicalization:
84 | 
85 | ```D
86 | assert(parseURL("http://example.org:80").toString == "http://example.org/");
87 | ```
88 | 


--------------------------------------------------------------------------------
/dub.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "urld",
 3 | 	"description": "A URL parsing library",
 4 | 	"copyright": "Copyright © 2015, dhasenan",
 5 | 	"authors": ["dhasenan"],
 6 | 	"license": "MIT",
 7 | 	"dependencies": {
 8 | 	}
 9 | }
10 | 


--------------------------------------------------------------------------------
/dub.selections.json:
--------------------------------------------------------------------------------
1 | {
2 | 	"fileVersion": 1,
3 | 	"versions": {}
4 | }
5 | 


--------------------------------------------------------------------------------
/source/url.d:
--------------------------------------------------------------------------------
   1 | /**
   2 | 	* A URL handling library.
   3 | 	*
   4 | 	* URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional
   5 | 	* elements like port, path, username, and password.
   6 | 	*
   7 | 	* This module aims to make it simple to muck about with them.
   8 | 	*
   9 | 	* Example usage:
  10 | 	* ---
  11 | 	* auto url = "ssh://me:password@192.168.0.8/".parseURL;
  12 | 	* auto files = system("ssh", url.toString, "ls").splitLines;
  13 | 	* foreach (file; files) {
  14 | 	*		system("scp", url ~ file, ".");
  15 | 	* }
  16 | 	* ---
  17 | 	*
  18 | 	* License: The MIT license.
  19 | 	*/
  20 | module url;
  21 | 
  22 | import std.conv;
  23 | import std.string;
  24 | 
  25 | pure:
  26 | @safe:
  27 | 
  28 | /// An exception thrown when something bad happens with URLs.
  29 | class URLException : Exception
  30 | {
  31 | 	this(string msg) pure { super(msg); }
  32 | }
  33 | 
  34 | /**
  35 | 	* A mapping from schemes to their default ports.
  36 | 	*
  37 |   * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to
  38 | 	* use even if they use ports. Entries here should be treated as best guesses.
  39 |   */
  40 | enum ushort[string] schemeToDefaultPort = [
  41 |     "aaa": 3868,
  42 |     "aaas": 5658,
  43 |     "acap": 674,
  44 |     "amqp": 5672,
  45 |     "cap": 1026,
  46 |     "coap": 5683,
  47 |     "coaps": 5684,
  48 |     "dav": 443,
  49 |     "dict": 2628,
  50 |     "ftp": 21,
  51 |     "git": 9418,
  52 |     "go": 1096,
  53 |     "gopher": 70,
  54 |     "http": 80,
  55 |     "https": 443,
  56 |     "ws": 80,
  57 |     "wss": 443,
  58 |     "iac": 4569,
  59 |     "icap": 1344,
  60 |     "imap": 143,
  61 |     "ipp": 631,
  62 |     "ipps": 631,  // yes, they're both mapped to port 631
  63 |     "irc": 6667,  // De facto default port, not the IANA reserved port.
  64 |     "ircs": 6697,
  65 |     "iris": 702,  // defaults to iris.beep
  66 |     "iris.beep": 702,
  67 |     "iris.lwz": 715,
  68 |     "iris.xpc": 713,
  69 |     "iris.xpcs": 714,
  70 |     "jabber": 5222,  // client-to-server
  71 |     "ldap": 389,
  72 |     "ldaps": 636,
  73 |     "msrp": 2855,
  74 |     "msrps": 2855,
  75 |     "mtqp": 1038,
  76 |     "mupdate": 3905,
  77 |     "news": 119,
  78 |     "nfs": 2049,
  79 |     "pop": 110,
  80 |     "redis": 6379,
  81 |     "reload": 6084,
  82 |     "rsync": 873,
  83 |     "rtmfp": 1935,
  84 |     "rtsp": 554,
  85 |     "shttp": 80,
  86 |     "sieve": 4190,
  87 |     "sip": 5060,
  88 |     "sips": 5061,
  89 |     "smb": 445,
  90 |     "smtp": 25,
  91 |     "snews": 563,
  92 |     "snmp": 161,
  93 |     "soap.beep": 605,
  94 |     "ssh": 22,
  95 |     "stun": 3478,
  96 |     "stuns": 5349,
  97 |     "svn": 3690,
  98 |     "teamspeak": 9987,
  99 |     "telnet": 23,
 100 |     "tftp": 69,
 101 |     "tip": 3372,
 102 | ];
 103 | 
 104 | /**
 105 | 	* A collection of query parameters.
 106 | 	*
 107 | 	* This is effectively a multimap of string -> strings.
 108 | 	*/
 109 | struct QueryParams
 110 | {
 111 |     hash_t toHash() const nothrow @safe
 112 |     {
 113 |         return typeid(params).getHash(&params);
 114 |     }
 115 | 
 116 | pure:
 117 |     import std.typecons;
 118 |     alias Tuple!(string, "key", string, "value") Param;
 119 |     Param[] params;
 120 | 
 121 |     @property size_t length() const {
 122 |         return params.length;
 123 |     }
 124 | 
 125 |     /// Get a range over the query parameter values for the given key.
 126 |     auto opIndex(string key) const
 127 |     {
 128 |         import std.algorithm.searching : find;
 129 |         import std.algorithm.iteration : map;
 130 |         return params.find!(x => x.key == key).map!(x => x.value);
 131 |     }
 132 | 
 133 |     /// Add a query parameter with the given key and value.
 134 |     /// If one already exists, there will now be two query parameters with the given name.
 135 |     void add(string key, string value) {
 136 |         params ~= Param(key, value);
 137 |     }
 138 | 
 139 |     /// Add a query parameter with the given key and value.
 140 |     /// If there are any existing parameters with the same key, they are removed and overwritten.
 141 |     void overwrite(string key, string value) {
 142 |         for (int i = 0; i < params.length; i++) {
 143 |             if (params[i].key == key) {
 144 |                 params[i] = params[$-1];
 145 |                 params.length--;
 146 |             }
 147 |         }
 148 |         params ~= Param(key, value);
 149 |     }
 150 | 
 151 |     private struct QueryParamRange
 152 |     {
 153 | pure:
 154 |         size_t i;
 155 |         const(Param)[] params;
 156 |         bool empty() { return i >= params.length; }
 157 |         void popFront() { i++; }
 158 |         Param front() { return params[i]; }
 159 |     }
 160 | 
 161 |     /**
 162 |      * A range over the query parameters.
 163 |      *
 164 |      * Usage:
 165 |      * ---
 166 |      * foreach (key, value; url.queryParams) {}
 167 |      * ---
 168 |      */
 169 |     auto range() const
 170 |     {
 171 |         return QueryParamRange(0, this.params);
 172 |     }
 173 |     /// ditto
 174 |     alias range this;
 175 | 
 176 |     /// Convert this set of query parameters into a query string.
 177 |     string toString() const {
 178 |         import std.array : Appender;
 179 |         Appender!string s;
 180 |         bool first = true;
 181 |         foreach (tuple; this) {
 182 |             if (!first) {
 183 |                 s ~= '&';
 184 |             }
 185 |             first = false;
 186 |             s ~= tuple.key.percentEncodeUnicodeOnly;
 187 |             if (tuple.value.length > 0) {
 188 |                 s ~= '=';
 189 |                 s ~= tuple.value.percentEncodeUnicodeOnly;
 190 |             }
 191 |         }
 192 |         return s.data;
 193 |     }
 194 | 
 195 |     /// Clone this set of query parameters.
 196 |     QueryParams dup()
 197 |     {
 198 |         QueryParams other = this;
 199 |         other.params = params.dup;
 200 |         return other;
 201 |     }
 202 | 
 203 |     int opCmp(const ref QueryParams other) const
 204 |     {
 205 |         for (int i = 0; i < params.length && i < other.params.length; i++)
 206 |         {
 207 |             auto c = cmp(params[i].key, other.params[i].key);
 208 |             if (c != 0) return c;
 209 |             c = cmp(params[i].value, other.params[i].value);
 210 |             if (c != 0) return c;
 211 |         }
 212 |         if (params.length > other.params.length) return 1;
 213 |         if (params.length < other.params.length) return -1;
 214 |         return 0;
 215 |     }
 216 | }
 217 | 
 218 | /**
 219 | 	* A Unique Resource Locator.
 220 | 	*
 221 | 	* URLs can be parsed (see parseURL) and implicitly convert to strings.
 222 | 	*/
 223 | struct URL
 224 | {
 225 |     hash_t toHash() const @safe nothrow
 226 |     {
 227 |         return asTuple().toHash();
 228 |     }
 229 | 
 230 | pure:
 231 | 	/// The URL scheme. For instance, ssh, ftp, or https.
 232 | 	string scheme;
 233 | 
 234 | 	/// The username in this URL. Usually absent. If present, there will also be a password.
 235 | 	string user;
 236 | 
 237 | 	/// The password in this URL. Usually absent.
 238 | 	string pass;
 239 | 
 240 | 	/// The hostname.
 241 | 	string host;
 242 | 
 243 | 	this(string s)
 244 | 	{
 245 | 		this = s.parseURL;
 246 | 	}
 247 | 
 248 | 	/**
 249 | 	  * The port.
 250 | 		*
 251 | 	  * This is inferred from the scheme if it isn't present in the URL itself.
 252 | 	  * If the scheme is not known and the port is not present, the port will be given as 0.
 253 | 	  * For some schemes, port will not be sensible -- for instance, file or chrome-extension.
 254 | 	  *
 255 | 	  * If you explicitly need to detect whether the user provided a port, check the providedPort
 256 | 	  * field.
 257 | 	  */
 258 | 	@property ushort port() const nothrow
 259 |     {
 260 | 		if (providedPort != 0) {
 261 | 			return providedPort;
 262 | 		}
 263 | 		if (auto p = scheme in schemeToDefaultPort) {
 264 | 			return *p;
 265 | 		}
 266 | 		return 0;
 267 | 	}
 268 | 
 269 | 	/**
 270 | 	  * Set the port.
 271 | 		*
 272 | 		* This sets the providedPort field and is provided for convenience.
 273 | 		*/
 274 | 	@property ushort port(ushort value) nothrow
 275 |     {
 276 | 		return providedPort = value;
 277 | 	}
 278 | 
 279 | 	/// The port that was explicitly provided in the URL.
 280 | 	ushort providedPort;
 281 | 
 282 | 	/**
 283 | 	  * The path.
 284 | 	  *
 285 | 	  * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is
 286 | 	  * "/news/story/17774".
 287 | 	  */
 288 | 	string path;
 289 | 
 290 | 	/**
 291 | 		* The query parameters associated with this URL.
 292 | 		*/
 293 | 	QueryParams queryParams;
 294 | 
 295 | 	/**
 296 | 	  * The fragment. In web documents, this typically refers to an anchor element.
 297 | 	  * For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2".
 298 | 	  */
 299 | 	string fragment;
 300 | 
 301 | 	/**
 302 | 	  * Convert this URL to a string.
 303 | 	  * The string is properly formatted and usable for, eg, a web request.
 304 | 	  */
 305 | 	string toString() const
 306 |     {
 307 | 		return toString(false);
 308 | 	}
 309 | 
 310 | 	/**
 311 | 		* Convert this URL to a string.
 312 |         *
 313 | 		* The string is intended to be human-readable rather than machine-readable.
 314 | 		*/
 315 | 	string toHumanReadableString() const
 316 |     {
 317 | 		return toString(true);
 318 | 	}
 319 | 
 320 |     ///
 321 |     unittest
 322 |     {
 323 |         auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL;
 324 |         assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye", url.toString);
 325 |         assert(url.toHumanReadableString == "https://☂.☃.org/?hi=bye", url.toString);
 326 |     }
 327 | 
 328 |     unittest
 329 |     {
 330 |         assert("http://example.org/some_path".parseURL.toHumanReadableString ==
 331 |                 "http://example.org/some_path");
 332 |     }
 333 | 
 334 |     /**
 335 |       * Convert the path and query string of this URL to a string.
 336 |       */
 337 |     string toPathAndQueryString() const
 338 |     {
 339 |         if (queryParams.length > 0)
 340 |         {
 341 |             return path ~ '?' ~ queryParams.toString;
 342 |         }
 343 |         return path;
 344 |     }
 345 | 
 346 |     ///
 347 |     unittest
 348 |     {
 349 |         auto u = "http://example.org/index?page=12".parseURL;
 350 |         auto pathAndQuery = u.toPathAndQueryString();
 351 |         assert(pathAndQuery == "/index?page=12", pathAndQuery);
 352 |     }
 353 | 
 354 | 	private string toString(bool humanReadable) const
 355 |     {
 356 |         import std.array : Appender;
 357 |         Appender!string s;
 358 |         s ~= scheme;
 359 |         s ~= "://";
 360 |         if (user) {
 361 |             s ~= humanReadable ? user : user.percentEncodeUnicodeOnly;
 362 |             s ~= ":";
 363 |             s ~= humanReadable ? pass : pass.percentEncodeUnicodeOnly;
 364 |             s ~= "@";
 365 |         }
 366 |         s ~= humanReadable ? host : host.toPuny;
 367 |         if (providedPort) {
 368 |             if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) {
 369 |                 s ~= ":";
 370 |                 s ~= providedPort.to!string;
 371 |             }
 372 |         }
 373 |         string p = path;
 374 |         if (p.length == 0 || p == "/") {
 375 |             s ~= '/';
 376 |         } else {
 377 |             if (humanReadable) {
 378 |                 s ~= p;
 379 |             } else {
 380 |                 if (p[0] == '/') {
 381 |                     p = p[1..$];
 382 |                 }
 383 |                 foreach (part; p.split('/')) {
 384 |                     s ~= '/';
 385 |                     s ~= part.percentEncodeUnicodeOnly;
 386 |                 }
 387 |             }
 388 |         }
 389 |         if (queryParams.length) {
 390 |             s ~= '?';
 391 |             s ~= queryParams.toString;
 392 |         }		if (fragment) {
 393 |             s ~= '#';
 394 |             if (humanReadable) {
 395 |                 s ~= fragment;
 396 |             } else {
 397 |                 s ~= fragment.percentEncodeUnicodeOnly;
 398 |             }
 399 |         }
 400 |         return s.data;
 401 | 	}
 402 | 
 403 | 	/// Implicitly convert URLs to strings.
 404 | 	alias toString this;
 405 | 
 406 |     /**
 407 |       Compare two URLs.
 408 | 
 409 |       I tried to make the comparison produce a sort order that seems natural, so it's not identical
 410 |       to sorting based on .toString(). For instance, username/password have lower priority than
 411 |       host. The scheme has higher priority than port but lower than host.
 412 | 
 413 |       While the output of this is guaranteed to provide a total ordering, and I've attempted to make
 414 |       it human-friendly, it isn't guaranteed to be consistent between versions. The implementation
 415 |       and its results can change without a minor version increase.
 416 |     */
 417 |     int opCmp(const URL other) const
 418 |     {
 419 |         return asTuple.opCmp(other.asTuple);
 420 |     }
 421 | 
 422 |     private auto asTuple() const nothrow
 423 |     {
 424 |         import std.typecons : tuple;
 425 |         return tuple(host, scheme, port, user, pass, path, queryParams);
 426 |     }
 427 | 
 428 |     /// Equality checks.
 429 |     bool opEquals(string other) const
 430 |     {
 431 |         URL o;
 432 |         if (!tryParseURL(other, o))
 433 |         {
 434 |             return false;
 435 |         }
 436 |         return asTuple() == o.asTuple();
 437 |     }
 438 | 
 439 |     /// Ditto
 440 |     bool opEquals(ref const URL other) const
 441 |     {
 442 |         return asTuple() == other.asTuple();
 443 |     }
 444 | 
 445 |     /// Ditto
 446 |     bool opEquals(const URL other) const
 447 |     {
 448 |         return asTuple() == other.asTuple();
 449 |     }
 450 | 
 451 |     unittest
 452 |     {
 453 |         import std.algorithm, std.array, std.format;
 454 |         assert("http://example.org/some_path".parseURL > "http://example.org/other_path".parseURL);
 455 |         alias sorted = std.algorithm.sort;
 456 |         auto parsedURLs =
 457 |         [
 458 |             "http://example.org/some_path",
 459 |             "http://example.org:81/other_path",
 460 |             "http://example.org/other_path",
 461 |             "https://example.org/first_path",
 462 |             "http://example.xyz/other_other_path",
 463 |             "http://me:secret@blog.ikeran.org/wp_admin",
 464 |         ].map!(x => x.parseURL).array;
 465 |         auto urls = sorted(parsedURLs).map!(x => x.toHumanReadableString).array;
 466 |         auto expected =
 467 |         [
 468 |             "http://me:secret@blog.ikeran.org/wp_admin",
 469 |             "http://example.org/other_path",
 470 |             "http://example.org/some_path",
 471 |             "http://example.org:81/other_path",
 472 |             "https://example.org/first_path",
 473 |             "http://example.xyz/other_other_path",
 474 |         ];
 475 |         assert(cmp(urls, expected) == 0, "expected:\n%s\ngot:\n%s".format(expected, urls));
 476 |     }
 477 | 
 478 |     unittest
 479 |     {
 480 |         auto a = "http://x.org/a?b=c".parseURL;
 481 |         auto b = "http://x.org/a?d=e".parseURL;
 482 |         auto c = "http://x.org/a?b=a".parseURL;
 483 |         assert(a < b);
 484 |         assert(c < b);
 485 |         assert(c < a);
 486 |     }
 487 | 
 488 | 	/**
 489 | 		* The append operator (~).
 490 | 		*
 491 | 		* The append operator for URLs returns a new URL with the given string appended as a path
 492 | 		* element to the URL's path. It only adds new path elements (or sequences of path elements).
 493 | 		*
 494 | 		* Don't worry about path separators; whether you include them or not, it will just work.
 495 | 		*
 496 | 		* Query elements are copied.
 497 | 		*
 498 | 		* Examples:
 499 | 		* ---
 500 | 		* auto random = "http://testdata.org/random".parseURL;
 501 | 		* auto randInt = random ~ "int";
 502 | 		* writeln(randInt);  // prints "http://testdata.org/random/int"
 503 | 		* ---
 504 | 		*/
 505 | 	URL opBinary(string op : "~")(string subsequentPath) {
 506 | 		URL other = this;
 507 | 		other ~= subsequentPath;
 508 | 		other.queryParams = queryParams.dup;
 509 | 		return other;
 510 | 	}
 511 | 
 512 | 	/**
 513 | 		* The append-in-place operator (~=).
 514 | 		*
 515 | 		* The append operator for URLs adds a path element to this URL. It only adds new path elements
 516 | 		* (or sequences of path elements).
 517 | 		*
 518 | 		* Don't worry about path separators; whether you include them or not, it will just work.
 519 | 		*
 520 | 		* Examples:
 521 | 		* ---
 522 | 		* auto random = "http://testdata.org/random".parseURL;
 523 | 		* random ~= "int";
 524 | 		* writeln(random);  // prints "http://testdata.org/random/int"
 525 | 		* ---
 526 | 		*/
 527 | 	URL opOpAssign(string op : "~")(string subsequentPath) {
 528 | 		if (path.endsWith("/")) {
 529 | 			if (subsequentPath.startsWith("/")) {
 530 | 				path ~= subsequentPath[1..$];
 531 | 			} else {
 532 | 				path ~= subsequentPath;
 533 | 			}
 534 | 		} else {
 535 | 			if (!subsequentPath.startsWith("/")) {
 536 | 				path ~= '/';
 537 | 			}
 538 | 			path ~= subsequentPath;
 539 | 		}
 540 | 		return this;
 541 | 	}
 542 | 
 543 |     /**
 544 |         * Convert a relative URL to an absolute URL.
 545 |         *
 546 |         * This is designed so that you can scrape a webpage and quickly convert links within the
 547 |         * page to URLs you can actually work with, but you're clever; I'm sure you'll find more uses
 548 |         * for it.
 549 |         *
 550 |         * It's biased toward HTTP family URLs; as one quirk, "//" is interpreted as "same scheme,
 551 |         * different everything else", which might not be desirable for all schemes.
 552 |         *
 553 |         * This only handles URLs, not URIs; if you pass in 'mailto:bob.dobbs@subgenius.org', for
 554 |         * instance, this will give you our best attempt to parse it as a URL.
 555 |         *
 556 |         * Examples:
 557 |         * ---
 558 |         * auto base = "https://example.org/passworddb?secure=false".parseURL;
 559 |         *
 560 |         * // Download https://example.org/passworddb/by-username/dhasenan
 561 |         * download(base.resolve("by-username/dhasenan"));
 562 |         *
 563 |         * // Download https://example.org/static/style.css
 564 |         * download(base.resolve("/static/style.css"));
 565 |         *
 566 |         * // Download https://cdn.example.net/jquery.js
 567 |         * download(base.resolve("https://cdn.example.net/jquery.js"));
 568 |         * ---
 569 |         */
 570 |     URL resolve(string other)
 571 |     {
 572 |         if (other.length == 0) return this;
 573 |         if (other[0] == '/')
 574 |         {
 575 |             if (other.length > 1 && other[1] == '/')
 576 |             {
 577 |                 // Uncommon syntax: a link like "//wikimedia.org" means "same scheme, switch URL"
 578 |                 return parseURL(this.scheme ~ ':' ~ other);
 579 |             }
 580 |         }
 581 |         else
 582 |         {
 583 |             auto schemeSep = other.indexOf("://");
 584 |             if (schemeSep >= 0 && schemeSep < other.indexOf("/"))
 585 |             // separate URL
 586 |             {
 587 |                 return other.parseURL;
 588 |             }
 589 |         }
 590 | 
 591 |         URL ret = this;
 592 |         ret.path = "";
 593 |         ret.queryParams = ret.queryParams.init;
 594 |         if (other[0] != '/')
 595 |         {
 596 |             // relative to something
 597 |             if (!this.path.length)
 598 |             {
 599 |                 // nothing to be relative to
 600 |                 other = "/" ~ other;
 601 |             }
 602 |             else if (this.path[$-1] == '/')
 603 |             {
 604 |                 // directory-style path for the current thing
 605 |                 // resolve relative to this directory
 606 |                 other = this.path ~ other;
 607 |             }
 608 |             else
 609 |             {
 610 |                 // this is a file-like thing
 611 |                 // find the 'directory' and relative to that
 612 |                 other = this.path[0..this.path.lastIndexOf('/') + 1] ~ other;
 613 |             }
 614 |         }
 615 |         // collapse /foo/../ to /
 616 |         if (other.indexOf("/../") >= 0)
 617 |         {
 618 |             import std.array : Appender, array;
 619 |             import std.string : split;
 620 |             import std.algorithm.iteration : joiner, filter;
 621 |             string[] parts = other.split('/');
 622 |             for (int i = 0; i < parts.length; i++)
 623 |             {
 624 |                 if (parts[i] == "..")
 625 |                 {
 626 |                     for (int j = i - 1; j >= 0; j--)
 627 |                     {
 628 |                         if (parts[j] != null)
 629 |                         {
 630 |                             parts[j] = null;
 631 |                             parts[i] = null;
 632 |                             break;
 633 |                         }
 634 |                     }
 635 |                 }
 636 |             }
 637 |             other = "/" ~ parts.filter!(x => x != null).joiner("/").to!string;
 638 |         }
 639 |         parsePathAndQuery(ret, other);
 640 |         return ret;
 641 |     }
 642 | 
 643 |     unittest
 644 |     {
 645 |         auto a = "http://alcyius.com/dndtools/index.html".parseURL;
 646 |         auto b = a.resolve("contacts/index.html");
 647 |         assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html");
 648 |     }
 649 | 
 650 |     unittest
 651 |     {
 652 |         auto a = "http://alcyius.com/dndtools/index.html?a=b".parseURL;
 653 |         auto b = a.resolve("contacts/index.html?foo=bar");
 654 |         assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html?foo=bar");
 655 |     }
 656 | 
 657 |     unittest
 658 |     {
 659 |         auto a = "http://alcyius.com/dndtools/index.html".parseURL;
 660 |         auto b = a.resolve("../index.html");
 661 |         assert(b.toString == "http://alcyius.com/index.html", b.toString);
 662 |     }
 663 | 
 664 |     unittest
 665 |     {
 666 |         auto a = "http://alcyius.com/dndtools/foo/bar/index.html".parseURL;
 667 |         auto b = a.resolve("../index.html");
 668 |         assert(b.toString == "http://alcyius.com/dndtools/foo/index.html", b.toString);
 669 |     }
 670 | }
 671 | 
 672 | /**
 673 | 	* Parse a URL from a string.
 674 | 	*
 675 | 	* This attempts to parse a wide range of URLs as people might actually type them. Some mistakes
 676 | 	* may be made. However, any URL in a correct format will be parsed correctly.
 677 | 	*/
 678 | bool tryParseURL(string value, out URL url)
 679 | {
 680 | 	url = URL.init;
 681 | 	// scheme:[//[user:password@]host[:port]][/]path[?query][#fragment]
 682 | 	// Scheme is optional in common use. We infer 'http' if it's not given.
 683 | 	auto i = value.indexOf("//");
 684 | 	if (i > -1) {
 685 | 		if (i > 1) {
 686 | 			url.scheme = value[0..i-1];
 687 | 		}
 688 | 		value = value[i+2 .. $];
 689 | 	} else {
 690 | 		url.scheme = "http";
 691 | 	}
 692 |   // Check for an ipv6 hostname.
 693 | 	// [user:password@]host[:port]][/]path[?query][#fragment
 694 | 	i = value.indexOfAny([':', '/', '[']);
 695 | 	if (i == -1) {
 696 | 		// Just a hostname.
 697 | 		url.host = value.fromPuny;
 698 | 		return true;
 699 | 	}
 700 | 
 701 | 	if (value[i] == ':') {
 702 | 		// This could be between username and password, or it could be between host and port.
 703 | 		auto j = value.indexOfAny(['@', '/']);
 704 | 		if (j > -1 && value[j] == '@') {
 705 | 			try {
 706 | 				url.user = value[0..i];
 707 | 				url.pass = value[i+1 .. j];
 708 | 			} catch (URLException) {
 709 | 				return false;
 710 | 			}
 711 | 			value = value[j+1 .. $];
 712 | 		}
 713 | 	}
 714 | 
 715 | 	// It's trying to be a host/port, not a user/pass.
 716 | 	i = value.indexOfAny([':', '/', '[']);
 717 | 	if (i == -1) {
 718 | 		url.host = value.fromPuny;
 719 | 		return true;
 720 | 	}
 721 | 
 722 | 	// Find the hostname. It's either an ipv6 address (which has special rules) or not (which doesn't
 723 | 	// have special rules). -- The main sticking point is that ipv6 addresses have colons, which we
 724 | 	// handle specially, and are offset with square brackets.
 725 | 	if (value[i] == '[') {
 726 | 		auto j = value[i..$].indexOf(']');
 727 | 		if (j < 0) {
 728 | 			// unterminated ipv6 addr
 729 | 			return false;
 730 | 		}
 731 | 		// includes square brackets
 732 | 		url.host = value[i .. i+j+1];
 733 | 		value = value[i+j+1 .. $];
 734 | 		if (value.length == 0) {
 735 | 			// read to end of string; we finished parse
 736 | 			return true;
 737 | 		}
 738 | 		if (value[0] != ':' && value[0] != '?' && value[0] != '/') {
 739 | 			return false;
 740 | 		}
 741 | 	} else {
 742 | 		// Normal host.
 743 | 		url.host = value[0..i].fromPuny;
 744 | 		value = value[i .. $];
 745 | 	}
 746 | 
 747 | 	if (value[0] == ':') {
 748 | 		auto end = value.indexOf('/');
 749 | 		if (end == -1) {
 750 | 			end = value.length;
 751 | 		}
 752 | 		try {
 753 | 			url.port = value[1 .. end].to!ushort;
 754 | 		} catch (ConvException) {
 755 | 			return false;
 756 | 		}
 757 | 		value = value[end .. $];
 758 | 		if (value.length == 0) {
 759 | 			return true;
 760 | 		}
 761 | 	}
 762 |     return parsePathAndQuery(url, value);
 763 | }
 764 | 
 765 | private bool parsePathAndQuery(ref URL url, string value)
 766 | {
 767 |     auto i = value.indexOfAny("?#");
 768 |     if (i == -1)
 769 |     {
 770 |         url.path = value;
 771 |         return true;
 772 |     }
 773 | 
 774 |     try
 775 |     {
 776 |         url.path = value[0..i];
 777 |     }
 778 |     catch (URLException)
 779 |     {
 780 |         return false;
 781 |     }
 782 | 
 783 |     auto c = value[i];
 784 |     value = value[i + 1 .. $];
 785 |     if (c == '?')
 786 |     {
 787 |         i = value.indexOf('#');
 788 |         string query;
 789 |         if (i < 0)
 790 |         {
 791 |             query = value;
 792 |             value = null;
 793 |         }
 794 |         else
 795 |         {
 796 |             query = value[0..i];
 797 |             value = value[i + 1 .. $];
 798 |         }
 799 |         auto queries = query.split('&');
 800 |         foreach (q; queries)
 801 |         {
 802 |             auto j = q.indexOf('=');
 803 |             string key, val;
 804 |             if (j < 0)
 805 |             {
 806 |                 key = q;
 807 |             }
 808 |             else
 809 |             {
 810 |                 key = q[0..j];
 811 |                 val = q[j + 1 .. $];
 812 |             }
 813 |             try
 814 |             {
 815 |                 key = key;
 816 |                 val = val;
 817 |             }
 818 |             catch (URLException)
 819 |             {
 820 |                 return false;
 821 |             }
 822 |             url.queryParams.add(key, val);
 823 |         }
 824 |     }
 825 | 
 826 |     try
 827 |     {
 828 |         url.fragment = value;
 829 |     }
 830 |     catch (URLException)
 831 |     {
 832 |         return false;
 833 |     }
 834 | 
 835 |     return true;
 836 | }
 837 | 
 838 | unittest {
 839 | 	{
 840 | 		// Basic.
 841 | 		URL url;
 842 | 		with (url) {
 843 | 			scheme = "https";
 844 | 			host = "example.org";
 845 | 			path = "/foo/bar";
 846 | 			queryParams.add("hello", "world");
 847 | 			queryParams.add("gibe", "clay");
 848 | 			fragment = "frag";
 849 | 		}
 850 | 		assert(
 851 | 				// Not sure what order it'll come out in.
 852 | 				url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
 853 | 				url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
 854 | 				url.toString);
 855 | 	}
 856 | 	{
 857 | 		// Percent encoded.
 858 | 		URL url;
 859 | 		with (url) {
 860 | 			scheme = "https";
 861 | 			host = "example.org";
 862 | 			path = "/f☃o";
 863 | 			queryParams.add("❄", "❀");
 864 | 			fragment = "ş";
 865 | 		}
 866 | 		assert(
 867 | 				url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80#%C5%9F",
 868 | 				url.toString);
 869 | 	}
 870 | 	{
 871 | 		// Port, user, pass.
 872 | 		URL url;
 873 | 		with (url) {
 874 | 			scheme = "https";
 875 | 			host = "example.org";
 876 | 			user = "dhasenan";
 877 | 			pass = "itsasecret";
 878 | 			port = 17;
 879 | 		}
 880 | 		assert(
 881 | 				url.toString == "https://dhasenan:itsasecret@example.org:17/",
 882 | 				url.toString);
 883 | 	}
 884 | 	{
 885 | 		// Query with no path.
 886 | 		URL url;
 887 | 		with (url) {
 888 | 			scheme = "https";
 889 | 			host = "example.org";
 890 | 			queryParams.add("hi", "bye");
 891 | 		}
 892 | 		assert(
 893 | 				url.toString == "https://example.org/?hi=bye",
 894 | 				url.toString);
 895 | 	}
 896 | }
 897 | 
 898 | unittest
 899 | {
 900 | 	auto url = "//foo/bar".parseURL;
 901 | 	assert(url.host == "foo", "expected host foo, got " ~ url.host);
 902 | 	assert(url.path == "/bar");
 903 | }
 904 | 
 905 | unittest
 906 | {
 907 |     import std.stdio : writeln;
 908 |     auto url = "file:///foo/bar".parseURL;
 909 |     assert(url.host == null);
 910 |     assert(url.port == 0);
 911 |     assert(url.scheme == "file");
 912 |     assert(url.path == "/foo/bar");
 913 |     assert(url.toString == "file:///foo/bar");
 914 |     assert(url.queryParams.empty);
 915 |     assert(url.fragment == null);
 916 | }
 917 | 
 918 | unittest
 919 | {
 920 | 	// ipv6 hostnames!
 921 | 	{
 922 | 		// full range of data
 923 | 		auto url = parseURL("https://bob:secret@[::1]:2771/foo/bar");
 924 | 		assert(url.scheme == "https", url.scheme);
 925 | 		assert(url.user == "bob", url.user);
 926 | 		assert(url.pass == "secret", url.pass);
 927 | 		assert(url.host == "[::1]", url.host);
 928 | 		assert(url.port == 2771, url.port.to!string);
 929 | 		assert(url.path == "/foo/bar", url.path);
 930 | 	}
 931 | 
 932 | 	// minimal
 933 | 	{
 934 | 		auto url = parseURL("[::1]");
 935 | 		assert(url.host == "[::1]", url.host);
 936 | 	}
 937 | 
 938 | 	// some random bits
 939 | 	{
 940 | 		auto url = parseURL("http://[::1]/foo");
 941 | 		assert(url.scheme == "http", url.scheme);
 942 | 		assert(url.host == "[::1]", url.host);
 943 | 		assert(url.path == "/foo", url.path);
 944 | 	}
 945 | 
 946 | 	{
 947 | 		auto url = parseURL("https://[2001:0db8:0:0:0:0:1428:57ab]/?login=true#justkidding");
 948 | 		assert(url.scheme == "https");
 949 | 		assert(url.host == "[2001:0db8:0:0:0:0:1428:57ab]");
 950 | 		assert(url.path == "/");
 951 | 		assert(url.fragment == "justkidding");
 952 | 	}
 953 | }
 954 | 
 955 | unittest
 956 | {
 957 | 	auto url = "localhost:5984".parseURL;
 958 | 	auto url2 = url ~ "db1";
 959 | 	assert(url2.toString == "http://localhost:5984/db1", url2.toString);
 960 | 	auto url3 = url2 ~ "_all_docs";
 961 | 	assert(url3.toString == "http://localhost:5984/db1/_all_docs", url3.toString);
 962 | }
 963 | 
 964 | ///
 965 | unittest {
 966 | 	{
 967 | 		// Basic.
 968 | 		URL url;
 969 | 		with (url) {
 970 | 			scheme = "https";
 971 | 			host = "example.org";
 972 | 			path = "/foo/bar";
 973 | 			queryParams.add("hello", "world");
 974 | 			queryParams.add("gibe", "clay");
 975 | 			fragment = "frag";
 976 | 		}
 977 | 		assert(
 978 | 				// Not sure what order it'll come out in.
 979 | 				url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
 980 | 				url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
 981 | 				url.toString);
 982 | 	}
 983 | 	{
 984 | 		// Passing an array of query values.
 985 | 		URL url;
 986 | 		with (url) {
 987 | 			scheme = "https";
 988 | 			host = "example.org";
 989 | 			path = "/foo/bar";
 990 | 			queryParams.add("hello", "world");
 991 | 			queryParams.add("hello", "aether");
 992 | 			fragment = "frag";
 993 | 		}
 994 | 		assert(
 995 | 				// Not sure what order it'll come out in.
 996 | 				url.toString == "https://example.org/foo/bar?hello=world&hello=aether#frag" ||
 997 | 				url.toString == "https://example.org/foo/bar?hello=aether&hello=world#frag",
 998 | 				url.toString);
 999 | 	}
1000 | 	{
1001 | 		// Percent encoded.
1002 | 		URL url;
1003 | 		with (url) {
1004 | 			scheme = "https";
1005 | 			host = "example.org";
1006 | 			path = "/f☃o";
1007 | 			queryParams.add("❄", "❀");
1008 | 			queryParams.add("[", "]");
1009 | 			fragment = "ş";
1010 | 		}
1011 | 		assert(
1012 | 				// Not sure what order it'll come out in.
1013 | 				url.toString ==
1014 | 				"https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&[=]#%C5%9F" ||
1015 | 				url.toString == "https://example.org/f%E2%98%83o?[=]&%E2%9D%84=%E2%9D%80#%C5%9F",
1016 | 				url.toString);
1017 | 	}
1018 | 	{
1019 | 		// Port, user, pass.
1020 | 		URL url;
1021 | 		with (url) {
1022 | 			scheme = "https";
1023 | 			host = "example.org";
1024 | 			user = "dhasenan";
1025 | 			pass = "itsasecret";
1026 | 			port = 17;
1027 | 		}
1028 | 		assert(
1029 | 				url.toString == "https://dhasenan:itsasecret@example.org:17/",
1030 | 				url.toString);
1031 | 	}
1032 | 	{
1033 | 		// Query with no path.
1034 | 		URL url;
1035 | 		with (url) {
1036 | 			scheme = "https";
1037 | 			host = "example.org";
1038 | 			queryParams.add("hi", "bye");
1039 | 		}
1040 | 		assert(
1041 | 				url.toString == "https://example.org/?hi=bye",
1042 | 				url.toString);
1043 | 	}
1044 | }
1045 | 
1046 | unittest {
1047 |     // Percent encoding shouldn't happen until .toString
1048 |     auto url = "http://example.org/á".parseURL;
1049 |     assert(url.path == "/á", url.path);
1050 | }
1051 | 
1052 | unittest {
1053 | 	// Percent decoding.
1054 | 
1055 | 	// http://#:!:@
1056 | 	auto urlString = "http://%23:%21%3A@example.org/%7B?%3B&%26=%3D#%23hash";
1057 | 	auto url = urlString.parseURL;
1058 | 	assert(url.user == "%23");
1059 | 	assert(url.pass == "%21%3A");
1060 | 	assert(url.host == "example.org");
1061 | 	assert(url.path == "/%7B");
1062 | 	assert(url.queryParams["%26"].front == "%3D");
1063 | 	assert(url.queryParams["%3B"].front == "");
1064 | 	assert(url.fragment == "%23hash");
1065 | 
1066 | 	// Round trip.
1067 | 	assert(urlString == urlString.parseURL.toString, urlString.parseURL.toString);
1068 | 	assert(urlString == urlString.parseURL.toString.parseURL.toString);
1069 | }
1070 | 
1071 | unittest {
1072 | 	auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL;
1073 | 	assert(url.host == "☂.☃.org", url.host);
1074 | }
1075 | 
1076 | unittest {
1077 | 	auto url = "https://☂.☃.org/?hi=bye".parseURL;
1078 | 	assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye");
1079 | }
1080 | 
1081 | ///
1082 | unittest {
1083 | 	// There's an existing path.
1084 | 	auto url = parseURL("http://example.org/foo");
1085 | 	URL url2;
1086 | 	// No slash? Assume it needs a slash.
1087 | 	assert((url ~ "bar").toString == "http://example.org/foo/bar");
1088 | 	// With slash? Don't add another.
1089 | 	url2 = url ~ "/bar";
1090 | 	assert(url2.toString == "http://example.org/foo/bar", url2.toString);
1091 | 	url ~= "bar";
1092 | 	assert(url.toString == "http://example.org/foo/bar");
1093 | 
1094 | 	// Path already ends with a slash; don't add another.
1095 | 	url = parseURL("http://example.org/foo/");
1096 | 	assert((url ~ "bar").toString == "http://example.org/foo/bar");
1097 | 	// Still don't add one even if you're appending with a slash.
1098 | 	assert((url ~ "/bar").toString == "http://example.org/foo/bar");
1099 | 	url ~= "/bar";
1100 | 	assert(url.toString == "http://example.org/foo/bar");
1101 | 
1102 | 	// No path.
1103 | 	url = parseURL("http://example.org");
1104 | 	assert((url ~ "bar").toString == "http://example.org/bar");
1105 | 	assert((url ~ "/bar").toString == "http://example.org/bar");
1106 | 	url ~= "bar";
1107 | 	assert(url.toString == "http://example.org/bar");
1108 | 
1109 | 	// Path is just a slash.
1110 | 	url = parseURL("http://example.org/");
1111 | 	assert((url ~ "bar").toString == "http://example.org/bar");
1112 | 	assert((url ~ "/bar").toString == "http://example.org/bar");
1113 | 	url ~= "bar";
1114 | 	assert(url.toString == "http://example.org/bar", url.toString);
1115 | 
1116 | 	// No path, just fragment.
1117 | 	url = "ircs://irc.freenode.com/#d".parseURL;
1118 | 	assert(url.toString == "ircs://irc.freenode.com/#d", url.toString);
1119 | }
1120 | unittest
1121 | {
1122 |     // basic resolve()
1123 |     {
1124 |         auto base = "https://example.org/this/".parseURL;
1125 |         assert(base.resolve("that") == "https://example.org/this/that");
1126 |         assert(base.resolve("/that") == "https://example.org/that");
1127 |         assert(base.resolve("//example.net/that") == "https://example.net/that");
1128 |     }
1129 | 
1130 |     // ensure we don't preserve query params
1131 |     {
1132 |         auto base = "https://example.org/this?query=value&other=value2".parseURL;
1133 |         assert(base.resolve("that") == "https://example.org/that");
1134 |         assert(base.resolve("/that") == "https://example.org/that");
1135 |         assert(base.resolve("tother/that") == "https://example.org/tother/that");
1136 |         assert(base.resolve("//example.net/that") == "https://example.net/that");
1137 |     }
1138 | }
1139 | 
1140 | 
1141 | unittest
1142 | {
1143 | 	import std.net.curl;
1144 | 	auto url = "http://example.org".parseURL;
1145 | 	assert(is(typeof(std.net.curl.get(url))));
1146 | }
1147 | 
1148 | /**
1149 | 	* Parse the input string as a URL.
1150 | 	*
1151 | 	* Throws:
1152 | 	*   URLException if the string was in an incorrect format.
1153 | 	*/
1154 | URL parseURL(string value) {
1155 | 	URL url;
1156 | 	if (tryParseURL(value, url)) {
1157 | 		return url;
1158 | 	}
1159 | 	throw new URLException("failed to parse URL " ~ value);
1160 | }
1161 | 
1162 | ///
1163 | unittest {
1164 | 	{
1165 | 		// Infer scheme
1166 | 		auto u1 = parseURL("example.org");
1167 | 		assert(u1.scheme == "http");
1168 | 		assert(u1.host == "example.org");
1169 | 		assert(u1.path == "");
1170 | 		assert(u1.port == 80);
1171 | 		assert(u1.providedPort == 0);
1172 | 		assert(u1.fragment == "");
1173 | 	}
1174 | 	{
1175 | 		// Simple host and scheme
1176 | 		auto u1 = parseURL("https://example.org");
1177 | 		assert(u1.scheme == "https");
1178 | 		assert(u1.host == "example.org");
1179 | 		assert(u1.path == "");
1180 | 		assert(u1.port == 443);
1181 | 		assert(u1.providedPort == 0);
1182 | 	}
1183 | 	{
1184 | 		// With path
1185 | 		auto u1 = parseURL("https://example.org/foo/bar");
1186 | 		assert(u1.scheme == "https");
1187 | 		assert(u1.host == "example.org");
1188 | 		assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
1189 | 		assert(u1.port == 443);
1190 | 		assert(u1.providedPort == 0);
1191 | 	}
1192 | 	{
1193 | 		// With explicit port
1194 | 		auto u1 = parseURL("https://example.org:1021/foo/bar");
1195 | 		assert(u1.scheme == "https");
1196 | 		assert(u1.host == "example.org");
1197 | 		assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
1198 | 		assert(u1.port == 1021);
1199 | 		assert(u1.providedPort == 1021);
1200 | 	}
1201 | 	{
1202 | 		// With user
1203 | 		auto u1 = parseURL("https://bob:secret@example.org/foo/bar");
1204 | 		assert(u1.scheme == "https");
1205 | 		assert(u1.host == "example.org");
1206 | 		assert(u1.path == "/foo/bar");
1207 | 		assert(u1.port == 443);
1208 | 		assert(u1.user == "bob");
1209 | 		assert(u1.pass == "secret");
1210 | 	}
1211 | 	{
1212 | 		// With user, URL-encoded
1213 | 		auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar");
1214 | 		assert(u1.scheme == "https");
1215 | 		assert(u1.host == "example.org");
1216 | 		assert(u1.path == "/foo/bar");
1217 | 		assert(u1.port == 443);
1218 | 		assert(u1.user == "bob%21");
1219 | 		assert(u1.pass == "secret%21%3F");
1220 | 	}
1221 | 	{
1222 | 		// With user and port and path
1223 | 		auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar");
1224 | 		assert(u1.scheme == "https");
1225 | 		assert(u1.host == "example.org");
1226 | 		assert(u1.path == "/foo/bar");
1227 | 		assert(u1.port == 2210);
1228 | 		assert(u1.user == "bob");
1229 | 		assert(u1.pass == "secret");
1230 | 		assert(u1.fragment == "");
1231 | 	}
1232 | 	{
1233 | 		// With query string
1234 | 		auto u1 = parseURL("https://example.org/?login=true");
1235 | 		assert(u1.scheme == "https");
1236 | 		assert(u1.host == "example.org");
1237 | 		assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
1238 | 		assert(u1.queryParams["login"].front == "true");
1239 | 		assert(u1.fragment == "");
1240 | 	}
1241 | 	{
1242 | 		// With query string and fragment
1243 | 		auto u1 = parseURL("https://example.org/?login=true#justkidding");
1244 | 		assert(u1.scheme == "https");
1245 | 		assert(u1.host == "example.org");
1246 | 		assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
1247 | 		assert(u1.queryParams["login"].front == "true");
1248 | 		assert(u1.fragment == "justkidding");
1249 | 	}
1250 | }
1251 | 
1252 | unittest {
1253 | 	assert(parseURL("http://example.org").port == 80);
1254 | 	assert(parseURL("http://example.org:5326").port == 5326);
1255 | 
1256 | 	auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment");
1257 | 	assert(url.scheme == "redis");
1258 | 	assert(url.user == "admin");
1259 | 	assert(url.pass == "password");
1260 | 
1261 | 	assert(parseURL("example.org").toString == "http://example.org/");
1262 | 	assert(parseURL("http://example.org:80").toString == "http://example.org/");
1263 | 
1264 | 	assert(parseURL("localhost:8070").toString == "http://localhost:8070/");
1265 | }
1266 | 
1267 | /**
1268 | 	 * Percent-encode non-ASCII characters in a string.
1269 | 	 */
1270 | string percentEncodeUnicodeOnly(string raw) {
1271 | 	import std.array : Appender;
1272 | 	Appender!string app;
1273 | 	foreach (char c; raw) {
1274 | 		if (cast(ubyte)c >= 0b1000_0000) {
1275 | 			app ~= format("%%%02X", cast(ubyte)c);
1276 | 		} else {
1277 | 			app ~= c;
1278 | 		}
1279 | 	}
1280 | 	return app.data;
1281 | }
1282 | 
1283 | /**
1284 | 	* Percent-encode a string.
1285 | 	*
1286 | 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
1287 | 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
1288 | 	* everything else, there is percent encoding.
1289 | 	*/
1290 | string percentEncode(string raw) {
1291 | 	// We *must* encode these characters: :/?#[]@!$&'()*+,;="
1292 | 	// We *can* encode any other characters.
1293 | 	// We *should not* encode alpha, numeric, or -._~.
1294 | 	import std.utf : encode;
1295 | 	import std.array : Appender;
1296 | 	Appender!string app;
1297 | 	foreach (dchar d; raw) {
1298 | 		if (('a' <= d && 'z' >= d) ||
1299 | 				('A' <= d && 'Z' >= d) ||
1300 | 				('0' <= d && '9' >= d) ||
1301 | 				d == '-' || d == '.' || d == '_' || d == '~') {
1302 | 			app ~= d;
1303 | 			continue;
1304 | 		}
1305 | 		// Something simple like a space character? Still in 7-bit ASCII?
1306 | 		// Then we get a single-character string out of it and just encode
1307 | 		// that one bit.
1308 | 		// Something not in 7-bit ASCII? Then we percent-encode each octet
1309 | 		// in the UTF-8 encoding (and hope the server understands UTF-8).
1310 | 		char[] c;
1311 | 		encode(c, d);
1312 | 		auto bytes = cast(ubyte[])c;
1313 | 		foreach (b; bytes) {
1314 | 			app ~= format("%%%02X", b);
1315 | 		}
1316 | 	}
1317 | 	return cast(string)app.data;
1318 | }
1319 | 
1320 | ///
1321 | unittest {
1322 | 	assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding");
1323 | 	assert(percentEncode("~~--..__") == "~~--..__");
1324 | 	assert(percentEncode("0123456789") == "0123456789");
1325 | 
1326 | 	string e;
1327 | 
1328 | 	e = percentEncode("☃");
1329 | 	assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e);
1330 | }
1331 | 
1332 | /**
1333 | 	* Percent-decode a string.
1334 | 	*
1335 | 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
1336 | 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
1337 | 	* everything else, there is percent encoding.
1338 | 	*
1339 | 	* This explicitly ensures that the result is a valid UTF-8 string.
1340 | 	*/
1341 | string percentDecode(string encoded)
1342 | {
1343 |     import std.utf : validate, UTFException;
1344 | 	auto raw = percentDecodeRaw(encoded);
1345 | 	auto s = cast(string) raw;
1346 |     try
1347 |     {
1348 |         validate(s);
1349 |     }
1350 |     catch (UTFException e)
1351 |     {
1352 |         return encoded;
1353 |     }
1354 | 	return s;
1355 | }
1356 | 
1357 | ///
1358 | unittest {
1359 | 	assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding");
1360 | 	assert(percentDecode("~~--..__") == "~~--..__");
1361 | 	assert(percentDecode("0123456789") == "0123456789");
1362 | 
1363 | 	string e;
1364 | 
1365 | 	e = percentDecode("%E2%98%83");
1366 | 	assert(e == "☃", "expected a snowman but got" ~ e);
1367 | 
1368 | 	e = percentDecode("%e2%98%83");
1369 | 	assert(e == "☃", "expected a snowman but got" ~ e);
1370 | 
1371 | 	try {
1372 | 		// %ES is an invalid percent sequence: 'S' is not a hex digit.
1373 | 		percentDecode("%es");
1374 | 		assert(false, "expected exception not thrown");
1375 | 	} catch (URLException) {
1376 | 	}
1377 | 
1378 | 	try {
1379 | 		percentDecode("%e");
1380 | 		assert(false, "expected exception not thrown");
1381 | 	} catch (URLException) {
1382 | 	}
1383 | }
1384 | 
1385 | /**
1386 | 	* Percent-decode a string into a ubyte array.
1387 | 	*
1388 | 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
1389 | 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
1390 | 	* everything else, there is percent encoding.
1391 | 	*
1392 | 	* This yields a ubyte array and will not perform validation on the output. However, an improperly
1393 | 	* formatted input string will result in a URLException.
1394 | 	*/
1395 | immutable(ubyte)[] percentDecodeRaw(string encoded)
1396 | {
1397 | 	// We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now.
1398 |     import std.array : Appender;
1399 | 	Appender!(immutable(ubyte)[]) app;
1400 | 	for (int i = 0; i < encoded.length; i++) {
1401 | 		if (encoded[i] != '%') {
1402 | 			app ~= encoded[i];
1403 | 			continue;
1404 | 		}
1405 | 		if (i >= encoded.length - 2) {
1406 | 			throw new URLException("Invalid percent encoded value: expected two characters after " ~
1407 | 					"percent symbol. Error at index " ~ i.to!string);
1408 | 		}
1409 | 		if (isHex(encoded[i + 1]) && isHex(encoded[i + 2])) {
1410 | 			auto b = fromHex(encoded[i + 1]);
1411 | 			auto c = fromHex(encoded[i + 2]);
1412 | 			app ~= cast(ubyte)((b << 4) | c);
1413 | 		} else {
1414 | 			throw new URLException("Invalid percent encoded value: expected two hex digits after " ~
1415 | 					"percent symbol. Error at index " ~ i.to!string);
1416 | 		}
1417 | 		i += 2;
1418 | 	}
1419 | 	return app.data;
1420 | }
1421 | 
1422 | private bool isHex(char c) {
1423 | 	return ('0' <= c && '9' >= c) ||
1424 | 		('a' <= c && 'f' >= c) ||
1425 | 		('A' <= c && 'F' >= c);
1426 | }
1427 | 
1428 | private ubyte fromHex(char s) {
1429 | 	enum caseDiff = 'a' - 'A';
1430 | 	if (s >= 'a' && s <= 'z') {
1431 | 		s -= caseDiff;
1432 | 	}
1433 | 	return cast(ubyte)("0123456789ABCDEF".indexOf(s));
1434 | }
1435 | 
1436 | private string toPuny(string unicodeHostname)
1437 | {
1438 |     if (unicodeHostname.length == 0) return "";
1439 |     if (unicodeHostname[0] == '[')
1440 |     {
1441 |         // It's an ipv6 name.
1442 |         return unicodeHostname;
1443 |     }
1444 | 	bool mustEncode = false;
1445 | 	foreach (i, dchar d; unicodeHostname) {
1446 | 		auto c = cast(uint) d;
1447 | 		if (c > 0x80) {
1448 | 			mustEncode = true;
1449 | 			break;
1450 | 		}
1451 | 		if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) {
1452 | 			throw new URLException(
1453 | 					format(
1454 | 						"domain name '%s' contains illegal character '%s' at position %s",
1455 | 						unicodeHostname, d, i));
1456 | 		}
1457 | 	}
1458 | 	if (!mustEncode) {
1459 | 		return unicodeHostname;
1460 | 	}
1461 |     import std.algorithm.iteration : map;
1462 | 	return unicodeHostname.split('.').map!punyEncode.join(".");
1463 | }
1464 | 
1465 | private string fromPuny(string hostname)
1466 | {
1467 |     import std.algorithm.iteration : map;
1468 | 	return hostname.split('.').map!punyDecode.join(".");
1469 | }
1470 | 
1471 | private {
1472 | 	enum delimiter = '-';
1473 | 	enum marker = "xn--";
1474 | 	enum ulong damp = 700;
1475 | 	enum ulong tmin = 1;
1476 | 	enum ulong tmax = 26;
1477 | 	enum ulong skew = 38;
1478 | 	enum ulong base = 36;
1479 | 	enum ulong initialBias = 72;
1480 | 	enum dchar initialN = cast(dchar)128;
1481 | 
1482 | 	ulong adapt(ulong delta, ulong numPoints, bool firstTime) {
1483 | 		if (firstTime) {
1484 | 			delta /= damp;
1485 | 		} else {
1486 | 			delta /= 2;
1487 | 		}
1488 | 		delta += delta / numPoints;
1489 | 		ulong k = 0;
1490 | 		while (delta > ((base - tmin) * tmax) / 2) {
1491 | 			delta /= (base - tmin);
1492 | 			k += base;
1493 | 		}
1494 | 		return k + (((base - tmin + 1) * delta) / (delta + skew));
1495 | 	}
1496 | }
1497 | 
1498 | /**
1499 | 	* Encode the input string using the Punycode algorithm.
1500 | 	*
1501 | 	* Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked
1502 | 	* with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com"
1503 | 	* in Punycode, you will get "xn--m3h.xn--n3h.com".
1504 | 	*
1505 | 	* In order to puny-encode a domain name, you must split it into its components. The following will
1506 | 	* typically suffice:
1507 | 	* ---
1508 | 	* auto domain = "☂.☃.com";
1509 | 	* auto encodedDomain = domain.splitter(".").map!(punyEncode).join(".");
1510 | 	* ---
1511 | 	*/
1512 | string punyEncode(string input)
1513 | {
1514 |     import std.array : Appender;
1515 | 	ulong delta = 0;
1516 | 	dchar n = initialN;
1517 | 	auto i = 0;
1518 | 	auto bias = initialBias;
1519 | 	Appender!string output;
1520 | 	output ~= marker;
1521 | 	auto pushed = 0;
1522 | 	auto codePoints = 0;
1523 | 	foreach (dchar c; input) {
1524 | 		codePoints++;
1525 | 		if (c <= initialN) {
1526 | 			output ~= c;
1527 | 			pushed++;
1528 | 		}
1529 | 	}
1530 | 	if (pushed < codePoints) {
1531 | 		if (pushed > 0) {
1532 | 			output ~= delimiter;
1533 | 		}
1534 | 	} else {
1535 | 		// No encoding to do.
1536 | 		return input;
1537 | 	}
1538 | 	bool first = true;
1539 | 	while (pushed < codePoints) {
1540 | 		auto best = dchar.max;
1541 | 		foreach (dchar c; input) {
1542 | 			if (n <= c && c < best) {
1543 | 				best = c;
1544 | 			}
1545 | 		}
1546 | 		if (best == dchar.max) {
1547 | 			throw new URLException("failed to find a new codepoint to process during punyencode");
1548 | 		}
1549 | 		delta += (best - n) * (pushed + 1);
1550 | 		if (delta > uint.max) {
1551 | 			// TODO better error message
1552 | 			throw new URLException("overflow during punyencode");
1553 | 		}
1554 | 		n = best;
1555 | 		foreach (dchar c; input) {
1556 | 			if (c < n) {
1557 | 				delta++;
1558 | 			}
1559 | 			if (c == n) {
1560 | 				ulong q = delta;
1561 | 				auto k = base;
1562 | 				while (true) {
1563 | 					ulong t;
1564 | 					if (k <= bias) {
1565 | 						t = tmin;
1566 | 					} else if (k >= bias + tmax) {
1567 | 						t = tmax;
1568 | 					} else {
1569 | 						t = k - bias;
1570 | 					}
1571 | 					if (q < t) {
1572 | 						break;
1573 | 					}
1574 | 					output ~= digitToBasic(t + ((q - t) % (base - t)));
1575 | 					q = (q - t) / (base - t);
1576 | 					k += base;
1577 | 				}
1578 | 				output ~= digitToBasic(q);
1579 | 				pushed++;
1580 | 				bias = adapt(delta, pushed, first);
1581 | 				first = false;
1582 | 				delta = 0;
1583 | 			}
1584 | 		}
1585 | 		delta++;
1586 | 		n++;
1587 | 	}
1588 | 	return cast(string)output.data;
1589 | }
1590 | 
1591 | /**
1592 | 	* Decode the input string using the Punycode algorithm.
1593 | 	*
1594 | 	* Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked
1595 | 	* with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com"
1596 | 	* in Punycode, you will get "xn--m3h.xn--n3h.com".
1597 | 	*
1598 | 	* In order to puny-decode a domain name, you must split it into its components. The following will
1599 | 	* typically suffice:
1600 | 	* ---
1601 | 	* auto domain = "xn--m3h.xn--n3h.com";
1602 | 	* auto decodedDomain = domain.splitter(".").map!(punyDecode).join(".");
1603 | 	* ---
1604 | 	*/
1605 | string punyDecode(string input) {
1606 | 	if (!input.startsWith(marker)) {
1607 | 		return input;
1608 | 	}
1609 | 	input = input[marker.length..$];
1610 | 
1611 | 	// let n = initial_n
1612 | 	dchar n = cast(dchar)128;
1613 | 
1614 | 	// let i = 0
1615 | 	// let bias = initial_bias
1616 | 	// let output = an empty string indexed from 0
1617 | 	size_t i = 0;
1618 | 	auto bias = initialBias;
1619 | 	dchar[] output;
1620 | 	// This reserves a bit more than necessary, but it should be more efficient overall than just
1621 | 	// appending and inserting volo-nolo.
1622 | 	output.reserve(input.length);
1623 | 
1624 |  	// consume all code points before the last delimiter (if there is one)
1625 |  	//   and copy them to output, fail on any non-basic code point
1626 |  	// if more than zero code points were consumed then consume one more
1627 |  	//   (which will be the last delimiter)
1628 | 	auto end = input.lastIndexOf(delimiter);
1629 | 	if (end > -1) {
1630 | 		foreach (dchar c; input[0..end]) {
1631 | 			output ~= c;
1632 | 		}
1633 | 		input = input[end+1 .. $];
1634 | 	}
1635 | 
1636 |  	// while the input is not exhausted do begin
1637 | 	size_t pos = 0;
1638 | 	while (pos < input.length) {
1639 |  	//   let oldi = i
1640 |  	//   let w = 1
1641 | 		auto oldi = i;
1642 | 		auto w = 1;
1643 |  	//   for k = base to infinity in steps of base do begin
1644 | 		for (ulong k = base; k < uint.max; k += base) {
1645 |  	//     consume a code point, or fail if there was none to consume
1646 | 			// Note that the input is all ASCII, so we can simply index the input string bytewise.
1647 | 			auto c = input[pos];
1648 | 			pos++;
1649 |  	//     let digit = the code point's digit-value, fail if it has none
1650 | 			auto digit = basicToDigit(c);
1651 |  	//     let i = i + digit * w, fail on overflow
1652 | 			i += digit * w;
1653 |  	//     let t = tmin if k <= bias {+ tmin}, or
1654 |  	//             tmax if k >= bias + tmax, or k - bias otherwise
1655 | 			ulong t;
1656 | 			if (k <= bias) {
1657 | 				t = tmin;
1658 | 			} else if (k >= bias + tmax) {
1659 | 				t = tmax;
1660 | 			} else {
1661 | 				t = k - bias;
1662 | 			}
1663 |  	//     if digit < t then break
1664 | 			if (digit < t) {
1665 | 				break;
1666 | 			}
1667 |  	//     let w = w * (base - t), fail on overflow
1668 | 			w *= (base - t);
1669 |  	//   end
1670 | 		}
1671 |  	//   let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?)
1672 | 		bias = adapt(i - oldi, output.length + 1, oldi == 0);
1673 |  	//   let n = n + i div (length(output) + 1), fail on overflow
1674 | 		n += i / (output.length + 1);
1675 |  	//   let i = i mod (length(output) + 1)
1676 | 		i %= (output.length + 1);
1677 |  	//   {if n is a basic code point then fail}
1678 | 		// (We aren't actually going to fail here; it's clear what this means.)
1679 |  	//   insert n into output at position i
1680 |         import std.array : insertInPlace;
1681 | 		(() @trusted { output.insertInPlace(i, cast(dchar)n); })();  // should be @safe but isn't marked
1682 |  	//   increment i
1683 | 		i++;
1684 |  	// end
1685 | 	}
1686 | 	return output.to!string;
1687 | }
1688 | 
1689 | // Lifted from punycode.js.
1690 | private dchar digitToBasic(ulong digit) {
1691 | 	return cast(dchar)(digit + 22 + 75 * (digit < 26));
1692 | }
1693 | 
1694 | // Lifted from punycode.js.
1695 | private uint basicToDigit(char c) {
1696 | 	auto codePoint = cast(uint)c;
1697 | 	if (codePoint - 48 < 10) {
1698 | 		return codePoint - 22;
1699 | 	}
1700 | 	if (codePoint - 65 < 26) {
1701 | 		return codePoint - 65;
1702 | 	}
1703 | 	if (codePoint - 97 < 26) {
1704 | 		return codePoint - 97;
1705 | 	}
1706 | 	return base;
1707 | }
1708 | 
1709 | unittest {
1710 | 	{
1711 | 		auto a = "b\u00FCcher";
1712 | 		assert(punyEncode(a) == "xn--bcher-kva");
1713 | 	}
1714 | 	{
1715 | 		auto a = "b\u00FCc\u00FCher";
1716 | 		assert(punyEncode(a) == "xn--bcher-kvab");
1717 | 	}
1718 | 	{
1719 | 		auto a = "ýbücher";
1720 | 		auto b = punyEncode(a);
1721 | 		assert(b == "xn--bcher-kvaf", b);
1722 | 	}
1723 | 
1724 | 	{
1725 | 		auto a = "mañana";
1726 | 		assert(punyEncode(a) == "xn--maana-pta");
1727 | 	}
1728 | 
1729 | 	{
1730 | 		auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
1731 | 			~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F";
1732 | 		auto b = punyEncode(a);
1733 | 		assert(b == "xn--egbpdaj6bu4bxfgehfvwxn", b);
1734 | 	}
1735 | 	import std.stdio;
1736 | }
1737 | 
1738 | unittest {
1739 | 	{
1740 | 		auto b = punyDecode("xn--egbpdaj6bu4bxfgehfvwxn");
1741 | 		assert(b == "ليهمابتكلموشعربي؟", b);
1742 | 	}
1743 | 	{
1744 | 		assert(punyDecode("xn--maana-pta") == "mañana");
1745 | 	}
1746 | }
1747 | 
1748 | unittest {
1749 | 	import std.string, std.algorithm, std.array, std.range;
1750 | 	{
1751 | 		auto domain = "xn--m3h.xn--n3h.com";
1752 | 		auto decodedDomain = domain.splitter(".").map!(punyDecode).join(".");
1753 | 		assert(decodedDomain == "☂.☃.com", decodedDomain);
1754 | 	}
1755 | 	{
1756 | 		auto domain = "☂.☃.com";
1757 | 		auto decodedDomain = domain.splitter(".").map!(punyEncode).join(".");
1758 | 		assert(decodedDomain == "xn--m3h.xn--n3h.com", decodedDomain);
1759 | 	}
1760 | }
1761 | 
1762 | unittest {
1763 |     // this has percent-encoded non-unicode data
1764 |     auto u = "http://domain.example/%E9%E9%E9".parseURL;
1765 |     assert(u.toString == "http://domain.example/%E9%E9%E9", "toString: " ~ u.toString);
1766 |     assert(u.toHumanReadableString == "http://domain.example/%E9%E9%E9",
1767 |             "toHumanReadableString: " ~ u.toHumanReadableString);
1768 | }
1769 | 
1770 | unittest {
1771 | 	assert(URL("http://example.org") == parseURL("http://example.org"));
1772 | }
1773 | 


--------------------------------------------------------------------------------