url source code

1 /**
2 	* A URL handling library.
3 	*
4 	* URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional
5 	* elements like port, path, username, and password.
6 	*
7 	* This module aims to make it simple to muck about with them.
8 	*
9 	* Example usage:
10 	* ---
11 	* auto url = "ssh://me:password@192.168.0.8/".parseURL;
12 	* auto files = system("ssh", url.toString, "ls").splitLines;
13 	* foreach (file; files) {
14 	*		system("scp", url ~ file, ".");
15 	* }
16 	* ---
17 	*
18 	* License: The MIT license.
19 	*/
20 module url;
21 
22 import std.algorithm;
23 import std.array;
24 import std.conv;
25 import std.encoding;
26 import std.string;
27 import std.utf;
28 
29 @safe:
30 
31 /// An exception thrown when something bad happens with URLs.
32 class URLException : Exception {
33 	this(string msg) { super(msg); }
34 }
35 
36 /**
37 	* A mapping from schemes to their default ports.
38 	*
39   * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to
40 	* use even if they use ports. Entries here should be treated as best guesses.
41   */
42 ushort[string] schemeToDefaultPort;
43 
44 static this() {
45 	schemeToDefaultPort = [
46 		"aaa": 3868,
47 		"aaas": 5658,
48 		"acap": 674,
49 		"amqp": 5672,
50 		"cap": 1026,
51 		"coap": 5683,
52 		"coaps": 5684,
53 		"dav": 443,
54 		"dict": 2628,
55 		"ftp": 21,
56 		"git": 9418,
57 		"go": 1096,
58 		"gopher": 70,
59 		"http": 80,
60 		"https": 443,
61 		"iac": 4569,
62 		"icap": 1344,
63 		"imap": 143,
64 		"ipp": 631,
65 		"ipps": 631,  // yes, they're both mapped to port 631
66 		"irc": 6667,  // De facto default port, not the IANA reserved port.
67 		"ircs": 6697,
68 		"iris": 702,  // defaults to iris.beep
69 		"iris.beep": 702,
70 		"iris.lwz": 715,
71 		"iris.xpc": 713,
72 		"iris.xpcs": 714,
73 		"jabber": 5222,  // client-to-server
74 		"ldap": 389,
75 		"ldaps": 636,
76 		"msrp": 2855,
77 		"msrps": 2855,
78 		"mtqp": 1038,
79 		"mupdate": 3905,
80 		"news": 119,
81 		"nfs": 2049,
82 		"pop": 110,
83 		"redis": 6379,
84 		"reload": 6084,
85 		"rsync": 873,
86 		"rtmfp": 1935,
87 		"rtsp": 554,
88 		"shttp": 80,
89 		"sieve": 4190,
90 		"sip": 5060,
91 		"sips": 5061,
92 		"smb": 445,
93 		"smtp": 25,
94 		"snews": 563,
95 		"snmp": 161,
96 		"soap.beep": 605,
97 		"ssh": 22,
98 		"stun": 3478,
99 		"stuns": 5349,
100 		"svn": 3690,
101 		"teamspeak": 9987,
102 		"telnet": 23,
103 		"tftp": 69,
104 		"tip": 3372,
105 	];
106 }
107 
108 /**
109 	* A collection of query parameters.
110 	*
111 	* This is effectively a multimap of string -> strings.
112 	*/
113 struct QueryParams {
114 	import std.typecons;
115 	alias Tuple!(string, "key", string, "value") Param;
116 	Param[] params;
117 
118 	@property size_t length() {
119 		return params.length;
120 	}
121 
122 	/// Get a range over the query parameter values for the given key.
123 	auto opIndex(string key) {
124 		return params.find!(x => x.key == key).map!(x => x.value);
125 	}
126 
127 	/// Add a query parameter with the given key and value.
128 	/// If one already exists, there will now be two query parameters with the given name.
129 	void add(string key, string value) {
130 		params ~= Param(key, value);
131 	}
132 
133 	/// Add a query parameter with the given key and value.
134 	/// If there are any existing parameters with the same key, they are removed and overwritten.
135 	void overwrite(string key, string value) {
136 		for (int i = 0; i < params.length; i++) {
137 			if (params[i].key == key) {
138 				params[i] = params[$-1];
139 				params.length--;
140 			}
141 		}
142 		params ~= Param(key, value);
143 	}
144 
145 	private struct QueryParamRange {
146 		size_t i;
147 		const(Param)[] params;
148 		bool empty() { return i >= params.length; }
149 		void popFront() { i++; }
150 		Param front() { return params[i]; }
151 	}
152 
153 	/**
154 		* A range over the query parameters.
155 		*
156 		* Usage:
157 		* ---
158 		* foreach (key, value; url.queryParams) {}
159 		* ---
160 		*/
161 	auto range() {
162 		return QueryParamRange(0, this.params);
163 	}
164 	/// ditto
165 	alias range this;
166 }
167 
168 /**
169 	* A Unique Resource Locator.
170 	* 
171 	* URLs can be parsed (see parseURL) and implicitly convert to strings.
172 	*/
173 struct URL {
174 	/// The URL scheme. For instance, ssh, ftp, or https.
175 	string scheme;
176 
177 	/// The username in this URL. Usually absent. If present, there will also be a password.
178 	string user;
179 
180 	/// The password in this URL. Usually absent.
181 	string pass;
182 
183 	/// The hostname.
184 	string host;
185 
186 	/**
187 	  * The port.
188 		*
189 	  * This is inferred from the scheme if it isn't present in the URL itself.
190 	  * If the scheme is not known and the port is not present, the port will be given as 0.
191 	  * For some schemes, port will not be sensible -- for instance, file or chrome-extension.
192 	  *
193 	  * If you explicitly need to detect whether the user provided a port, check the providedPort
194 	  * field.
195 	  */
196 	@property ushort port() {
197 		if (providedPort != 0) {
198 			return providedPort;
199 		}
200 		if (auto p = scheme in schemeToDefaultPort) {
201 			return *p;
202 		}
203 		return 0;
204 	}
205 
206 	/**
207 	  * Set the port.
208 		*
209 		* This sets the providedPort field and is provided for convenience.
210 		*/
211 	@property ushort port(ushort value) {
212 		return providedPort = value;
213 	}
214 
215 	/// The port that was explicitly provided in the URL.
216 	ushort providedPort;
217 
218 	/**
219 	  * The path.
220 	  *
221 	  * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is
222 	  * "/news/story/17774".
223 	  */
224 	string path;
225 
226 	/**
227 		* Deprecated: this disallows multiple values for the same query string. Please use queryParams
228 		* instead.
229 		* 
230 	  * The query string elements.
231 	  *
232 	  * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the query string
233 	  * elements will be ["visited": "false"].
234 	  *
235 	  * Similarly, in the URL https://bbc.co.uk/news?item, the query string elements will be
236 	  * ["item": ""].
237 	  *
238 	  * This field is mutable, so be cautious.
239 	  */
240 	deprecated("use queryParams") string[string] query;
241 
242 	/**
243 		* The query parameters associated with this URL.
244 		*/
245 	QueryParams queryParams;
246 
247 	/**
248 	  * The fragment. In web documents, this typically refers to an anchor element.
249 	  * For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2".
250 	  */
251 	string fragment;
252 
253 	/**
254 	  * Convert this URL to a string.
255 	  * The string is properly formatted and usable for, eg, a web request.
256 	  */
257 	string toString() {
258 		return toString(false);
259 	}
260 
261 	/**
262 		* Convert this URL to a string.
263 		* The string is intended to be human-readable rather than machine-readable.
264 		*/
265 	string toHumanReadableString() {
266 		return toString(true);
267 	}
268 
269 	private string toString(bool humanReadable) {
270 		Appender!string s;
271 		s ~= scheme;
272 		s ~= "://";
273 		if (user) {
274 			s ~= humanReadable ? user : user.percentEncode;
275 			s ~= ":";
276 			s ~= humanReadable ? pass : pass.percentEncode;
277 			s ~= "@";
278 		}
279 		s ~= humanReadable ? host : host.toPuny;
280 		if (providedPort) {
281 			if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) {
282 				s ~= ":";
283 				s ~= providedPort.to!string;
284 			}
285 		}
286 		string p = path;
287 		if (p.length == 0 || p == "/") {
288 			s ~= '/';
289 		} else {
290 			if (p[0] == '/') {
291 				p = p[1..$];
292 			}
293 			if (humanReadable) {
294 				s ~= p;
295 			} else {
296 				foreach (part; p.split('/')) {
297 					s ~= '/';
298 					s ~= part.percentEncode;
299 				}
300 			}
301 		}
302 		if (queryParams.length) {
303 			bool first = true;
304 			s ~= '?';
305 			foreach (k, v; queryParams) {
306 				if (!first) {
307 					s ~= '&';
308 				}
309 				first = false;
310 				s ~= k.percentEncode;
311 				if (v.length > 0) {
312 					s ~= '=';
313 					s ~= v.percentEncode;
314 				}
315 			}
316 		} else if (query) {
317 			s ~= '?';
318 			bool first = true;
319 			foreach (k, v; query) {
320 				if (!first) {
321 					s ~= '&';
322 				}
323 				first = false;
324 				s ~= k.percentEncode;
325 				if (v.length > 0) {
326 					s ~= '=';
327 					s ~= v.percentEncode;
328 				}
329 			}
330 		}
331 		if (fragment) {
332 			s ~= '#';
333 			s ~= fragment.percentEncode;
334 		}
335 		return s.data;
336 	}
337 
338 	/// Implicitly convert URLs to strings.
339 	alias toString this;
340 
341 	/**
342 		* The append operator (~).
343 		*
344 		* The append operator for URLs returns a new URL with the given string appended as a path
345 		* element to the URL's path. It only adds new path elements (or sequences of path elements).
346 		*
347 		* Don't worry about path separators; whether you include them or not, it will just work.
348 		*
349 		* Query elements are copied.
350 		*
351 		* Examples:
352 		* ---
353 		* auto random = "http://testdata.org/random".parseURL;
354 		* auto randInt = random ~ "int";
355 		* writeln(randInt);  // prints "http://testdata.org/random/int"
356 		* ---
357 		*/
358 	URL opBinary(string op : "~")(string subsequentPath) {
359 		URL other = this;
360 		other ~= subsequentPath;
361 		if (query) {
362 			other.query = other.query.dup;
363 		}
364 		return other;
365 	}
366 
367 	/**
368 		* The append-in-place operator (~=).
369 		*
370 		* The append operator for URLs adds a path element to this URL. It only adds new path elements
371 		* (or sequences of path elements).
372 		*
373 		* Don't worry about path separators; whether you include them or not, it will just work.
374 		*
375 		* Examples:
376 		* ---
377 		* auto random = "http://testdata.org/random".parseURL;
378 		* random ~= "int";
379 		* writeln(random);  // prints "http://testdata.org/random/int"
380 		* ---
381 		*/
382 	URL opOpAssign(string op : "~")(string subsequentPath) {
383 		if (path.endsWith("/")) {
384 			if (subsequentPath.startsWith("/")) {
385 				path ~= subsequentPath[1..$];
386 			} else {
387 				path ~= subsequentPath;
388 			}
389 		} else {
390 			if (!subsequentPath.startsWith("/")) {
391                 path ~= '/';
392             }
393 			path ~= subsequentPath;
394 		}
395 		return this;
396 	}
397 }
398 
399 /**
400 	* Parse a URL from a string.
401 	*
402 	* This attempts to parse a wide range of URLs as people might actually type them. Some mistakes
403 	* may be made. However, any URL in a correct format will be parsed correctly.
404 	*/
405 bool tryParseURL(string value, out URL url) {
406 	url = URL.init;
407 	// scheme:[//[user:password@]host[:port]][/]path[?query][#fragment]
408 	// Scheme is optional in common use. We infer 'http' if it's not given.
409 	auto i = value.indexOf("//");
410 	if (i > -1) {
411         if (i > 1) {
412             url.scheme = value[0..i-1];
413         }
414 		value = value[i+2 .. $];
415 	} else {
416 		url.scheme = "http";
417 	}
418 	// [user:password@]host[:port]][/]path[?query][#fragment
419 	i = value.indexOfAny([':', '/']);
420 	if (i == -1) {
421 		// Just a hostname.
422 		url.host = value.fromPuny;
423 		return true;
424 	}
425 
426 	if (value[i] == ':') {
427 		// This could be between username and password, or it could be between host and port.
428 		auto j = value.indexOfAny(['@', '/']);
429 		if (j > -1 && value[j] == '@') {
430 			try {
431 				url.user = value[0..i].percentDecode;
432 				url.pass = value[i+1 .. j].percentDecode;
433 			} catch (URLException) {
434 				return false;
435 			}
436 			value = value[j+1 .. $];
437 		}
438 	}
439 
440 	// It's trying to be a host/port, not a user/pass.
441 	i = value.indexOfAny([':', '/']);
442 	if (i == -1) {
443 		url.host = value.fromPuny;
444 		return true;
445 	}
446 	url.host = value[0..i].fromPuny;
447 	value = value[i .. $];
448 	if (value[0] == ':') {
449 		auto end = value.indexOf('/');
450 		if (end == -1) {
451 			end = value.length;
452 		}
453 		try {
454 			url.port = value[1 .. end].to!ushort;
455 		} catch (ConvException) {
456 			return false;
457 		}
458 		value = value[end .. $];
459 		if (value.length == 0) {
460 			return true;
461 		}
462 	}
463 
464 	i = value.indexOfAny("?#");
465 	if (i == -1) {
466 		url.path = value.percentDecode;
467 		return true;
468 	}
469 
470 	try {
471 		url.path = value[0..i].percentDecode;
472 	} catch (URLException) {
473 		return false;
474 	}
475 	auto c = value[i];
476 	value = value[i + 1 .. $];
477 	if (c == '?') {
478 		i = value.indexOf('#');
479 		string query;
480 		if (i < 0) {
481 			query = value;
482 			value = null;
483 		} else {
484 			query = value[0..i];
485 			value = value[i + 1 .. $];
486 		}
487 		auto queries = query.split('&');
488 		foreach (q; queries) {
489 			auto j = q.indexOf('=');
490 			string key, val;
491 			if (j < 0) {
492 				key = q;
493 			} else {
494 				key = q[0..j];
495 				val = q[j + 1 .. $];
496 			}
497 			try {
498 				key = key.percentDecode;
499 				val = val.percentDecode;
500 			} catch (URLException) {
501 				return false;
502 			}
503 			url.query[key] = val;
504 			url.queryParams.add(key, val);
505 		}
506 	}
507 
508 	try {
509 		url.fragment = value.percentDecode;
510 	} catch (URLException) {
511 		return false;
512 	}
513 
514 	return true;
515 }
516 
517 unittest {
518 	{
519 		// Basic.
520 		URL url;
521 		with (url) {
522 			scheme = "https";
523 			host = "example.org";
524 			path = "/foo/bar";
525 			query["hello"] = "world";
526 			query["gibe"] = "clay";
527 			fragment = "frag";
528 		}
529 		assert(
530 				// Not sure what order it'll come out in.
531 				url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
532 				url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
533 				url.toString);
534 	}
535 	{
536 		// Percent encoded.
537 		URL url;
538 		with (url) {
539 			scheme = "https";
540 			host = "example.org";
541 			path = "/f☃o";
542 			query["❄"] = "❀";
543 			query["["] = "]";
544 			fragment = "ş";
545 		}
546 		assert(
547 				// Not sure what order it'll come out in.
548 				url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" ||
549 				url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F",
550 				url.toString);
551 	}
552 	{
553 		// Port, user, pass.
554 		URL url;
555 		with (url) {
556 			scheme = "https";
557 			host = "example.org";
558 			user = "dhasenan";
559 			pass = "itsasecret";
560 			port = 17;
561 		}
562 		assert(
563 				url.toString == "https://dhasenan:itsasecret@example.org:17/",
564 				url.toString);
565 	}
566 	{
567 		// Query with no path.
568 		URL url;
569 		with (url) {
570 			scheme = "https";
571 			host = "example.org";
572 			query["hi"] = "bye";
573 		}
574 		assert(
575 				url.toString == "https://example.org/?hi=bye",
576 				url.toString);
577 	}
578 }
579 
580 unittest
581 {
582     auto url = "//foo/bar".parseURL;
583     assert(url.host == "foo", "expected host foo, got " ~ url.host);
584     assert(url.path == "/bar");
585 }
586 
587 unittest
588 {
589     auto url = "localhost:5984".parseURL;
590     auto url2 = url ~ "db1";
591     assert(url2.toString == "http://localhost:5984/db1", url2.toString);
592     auto url3 = url2 ~ "_all_docs";
593     assert(url3.toString == "http://localhost:5984/db1/_all_docs", url3.toString);
594 }
595 
596 ///
597 unittest {
598 	{
599 		// Basic.
600 		URL url;
601 		with (url) {
602 			scheme = "https";
603 			host = "example.org";
604 			path = "/foo/bar";
605 			queryParams.add("hello", "world");
606 			queryParams.add("gibe", "clay");
607 			fragment = "frag";
608 		}
609 		assert(
610 				// Not sure what order it'll come out in.
611 				url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
612 				url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
613 				url.toString);
614 	}
615 	{
616 		// Passing an array of query values.
617 		URL url;
618 		with (url) {
619 			scheme = "https";
620 			host = "example.org";
621 			path = "/foo/bar";
622 			queryParams.add("hello", "world");
623 			queryParams.add("hello", "aether");
624 			fragment = "frag";
625 		}
626 		assert(
627 				// Not sure what order it'll come out in.
628 				url.toString == "https://example.org/foo/bar?hello=world&hello=aether#frag" ||
629 				url.toString == "https://example.org/foo/bar?hello=aether&hello=world#frag",
630 				url.toString);
631 	}
632 	{
633 		// Percent encoded.
634 		URL url;
635 		with (url) {
636 			scheme = "https";
637 			host = "example.org";
638 			path = "/f☃o";
639 			queryParams.add("❄", "❀");
640 			queryParams.add("[", "]");
641 			fragment = "ş";
642 		}
643 		assert(
644 				// Not sure what order it'll come out in.
645 				url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" ||
646 				url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F",
647 				url.toString);
648 	}
649 	{
650 		// Port, user, pass.
651 		URL url;
652 		with (url) {
653 			scheme = "https";
654 			host = "example.org";
655 			user = "dhasenan";
656 			pass = "itsasecret";
657 			port = 17;
658 		}
659 		assert(
660 				url.toString == "https://dhasenan:itsasecret@example.org:17/",
661 				url.toString);
662 	}
663 	{
664 		// Query with no path.
665 		URL url;
666 		with (url) {
667 			scheme = "https";
668 			host = "example.org";
669 			queryParams.add("hi", "bye");
670 		}
671 		assert(
672 				url.toString == "https://example.org/?hi=bye",
673 				url.toString);
674 	}
675 }
676 
677 unittest {
678 	// Percent decoding.
679 
680 	// http://#:!:@
681 	auto urlString = "http://%23:%21%3A@example.org/%7B/%7D?%3B&%26=%3D#%23hash";
682 	auto url = urlString.parseURL;
683 	assert(url.user == "#");
684 	assert(url.pass == "!:");
685 	assert(url.host == "example.org");
686 	assert(url.path == "/{/}");
687 	assert(url.queryParams[";"].front == "");
688 	assert(url.queryParams["&"].front == "=");
689 	assert(url.fragment == "#hash");
690 
691 	// Round trip.
692 	assert(urlString == urlString.parseURL.toString, urlString.parseURL.toString);
693 	assert(urlString == urlString.parseURL.toString.parseURL.toString);
694 }
695 
696 unittest {
697 	auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL;
698 	assert(url.host == "☂.☃.org", url.host);
699 }
700 
701 unittest {
702 	auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL;
703 	assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye", url.toString);
704 	assert(url.toHumanReadableString == "https://☂.☃.org/?hi=bye", url.toString);
705 }
706 
707 unittest {
708 	auto url = "https://☂.☃.org/?hi=bye".parseURL;
709 	assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye");
710 }
711 
712 ///
713 unittest {
714 	// There's an existing path.
715 	auto url = parseURL("http://example.org/foo");
716     URL url2;
717 	// No slash? Assume it needs a slash.
718 	assert((url ~ "bar").toString == "http://example.org/foo/bar");
719 	// With slash? Don't add another.
720     url2 = url ~ "/bar";
721 	assert(url2.toString == "http://example.org/foo/bar", url2.toString);
722 	url ~= "bar";
723 	assert(url.toString == "http://example.org/foo/bar");
724 
725 	// Path already ends with a slash; don't add another.
726 	url = parseURL("http://example.org/foo/");
727 	assert((url ~ "bar").toString == "http://example.org/foo/bar");
728 	// Still don't add one even if you're appending with a slash.
729 	assert((url ~ "/bar").toString == "http://example.org/foo/bar");
730 	url ~= "/bar";
731 	assert(url.toString == "http://example.org/foo/bar");
732 
733 	// No path.
734 	url = parseURL("http://example.org");
735 	assert((url ~ "bar").toString == "http://example.org/bar");
736 	assert((url ~ "/bar").toString == "http://example.org/bar");
737 	url ~= "bar";
738 	assert(url.toString == "http://example.org/bar");
739 
740 	// Path is just a slash.
741 	url = parseURL("http://example.org/");
742 	assert((url ~ "bar").toString == "http://example.org/bar");
743 	assert((url ~ "/bar").toString == "http://example.org/bar");
744 	url ~= "bar";
745 	assert(url.toString == "http://example.org/bar", url.toString);
746 
747 	// No path, just fragment.
748 	url = "ircs://irc.freenode.com/#d".parseURL;
749 	assert(url.toString == "ircs://irc.freenode.com/#d", url.toString);
750 }
751 
752 unittest {
753 	import std.net.curl;
754 	auto url = "http://example.org".parseURL;
755 	assert(is(typeof(std.net.curl.get(url))));
756 }
757 
758 /**
759 	* Parse the input string as a URL.
760 	*
761 	* Throws:
762 	*   URLException if the string was in an incorrect format.
763 	*/
764 URL parseURL(string value) {
765 	URL url;
766 	if (tryParseURL(value, url)) {
767 		return url;
768 	}
769 	throw new URLException("failed to parse URL " ~ value);
770 }
771 
772 ///
773 unittest {
774 	{
775 		// Infer scheme
776 		auto u1 = parseURL("example.org");
777 		assert(u1.scheme == "http");
778 		assert(u1.host == "example.org");
779 		assert(u1.path == "");
780 		assert(u1.port == 80);
781 		assert(u1.providedPort == 0);
782 		assert(u1.fragment == "");
783 	}
784 	{
785 		// Simple host and scheme
786 		auto u1 = parseURL("https://example.org");
787 		assert(u1.scheme == "https");
788 		assert(u1.host == "example.org");
789 		assert(u1.path == "");
790 		assert(u1.port == 443);
791 		assert(u1.providedPort == 0);
792 	}
793 	{
794 		// With path
795 		auto u1 = parseURL("https://example.org/foo/bar");
796 		assert(u1.scheme == "https");
797 		assert(u1.host == "example.org");
798 		assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
799 		assert(u1.port == 443);
800 		assert(u1.providedPort == 0);
801 	}
802 	{
803 		// With explicit port
804 		auto u1 = parseURL("https://example.org:1021/foo/bar");
805 		assert(u1.scheme == "https");
806 		assert(u1.host == "example.org");
807 		assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
808 		assert(u1.port == 1021);
809 		assert(u1.providedPort == 1021);
810 	}
811 	{
812 		// With user
813 		auto u1 = parseURL("https://bob:secret@example.org/foo/bar");
814 		assert(u1.scheme == "https");
815 		assert(u1.host == "example.org");
816 		assert(u1.path == "/foo/bar");
817 		assert(u1.port == 443);
818 		assert(u1.user == "bob");
819 		assert(u1.pass == "secret");
820 	}
821 	{
822 		// With user, URL-encoded
823 		auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar");
824 		assert(u1.scheme == "https");
825 		assert(u1.host == "example.org");
826 		assert(u1.path == "/foo/bar");
827 		assert(u1.port == 443);
828 		assert(u1.user == "bob!");
829 		assert(u1.pass == "secret!?");
830 	}
831 	{
832 		// With user and port and path
833 		auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar");
834 		assert(u1.scheme == "https");
835 		assert(u1.host == "example.org");
836 		assert(u1.path == "/foo/bar");
837 		assert(u1.port == 2210);
838 		assert(u1.user == "bob");
839 		assert(u1.pass == "secret");
840 		assert(u1.fragment == "");
841 	}
842 	{
843 		// With query string
844 		auto u1 = parseURL("https://example.org/?login=true");
845 		assert(u1.scheme == "https");
846 		assert(u1.host == "example.org");
847 		assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
848 		assert(u1.queryParams["login"].front == "true");
849 		assert(u1.fragment == "");
850 	}
851 	{
852 		// With query string and fragment
853 		auto u1 = parseURL("https://example.org/?login=true#justkidding");
854 		assert(u1.scheme == "https");
855 		assert(u1.host == "example.org");
856 		assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
857 		assert(u1.queryParams["login"].front == "true");
858 		assert(u1.fragment == "justkidding");
859 	}
860 	{
861 		// With URL-encoded values
862 		auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E");
863 		assert(u1.scheme == "https");
864 		assert(u1.host == "example.org");
865 		assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path);
866 		assert(u1.queryParams["❄"].front == "=");
867 		assert(u1.fragment == "^");
868 	}
869 }
870 
871 unittest {
872 	assert(parseURL("http://example.org").port == 80);
873 	assert(parseURL("http://example.org:5326").port == 5326);
874 
875 	auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment");
876 	assert(url.scheme == "redis");
877 	assert(url.user == "admin");
878 	assert(url.pass == "password");
879 
880 	assert(parseURL("example.org").toString == "http://example.org/");
881 	assert(parseURL("http://example.org:80").toString == "http://example.org/");
882 
883 	assert(parseURL("localhost:8070").toString == "http://localhost:8070/");
884 }
885 
886 /**
887 	* Percent-encode a string.
888 	*
889 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
890 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
891 	* everything else, there is percent encoding.
892 	*/
893 string percentEncode(string raw) {
894 	// We *must* encode these characters: :/?#[]@!$&'()*+,;="
895 	// We *can* encode any other characters.
896 	// We *should not* encode alpha, numeric, or -._~.
897 	Appender!string app;
898 	foreach (dchar d; raw) {
899 		if (('a' <= d && 'z' >= d) ||
900 				('A' <= d && 'Z' >= d) ||
901 				('0' <= d && '9' >= d) ||
902 				d == '-' || d == '.' || d == '_' || d == '~') {
903 			app ~= d;
904 			continue;
905 		}
906 		// Something simple like a space character? Still in 7-bit ASCII?
907 		// Then we get a single-character string out of it and just encode
908 		// that one bit.
909 		// Something not in 7-bit ASCII? Then we percent-encode each octet
910 		// in the UTF-8 encoding (and hope the server understands UTF-8).
911 		char[] c;
912 		encode(c, d);
913 		auto bytes = cast(ubyte[])c;
914 		foreach (b; bytes) {
915 			app ~= format("%%%02X", b);
916 		}
917 	}
918 	return cast(string)app.data;
919 }
920 
921 ///
922 unittest {
923 	assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding");
924 	assert(percentEncode("~~--..__") == "~~--..__");
925 	assert(percentEncode("0123456789") == "0123456789");
926 
927 	string e;
928 
929 	e = percentEncode("☃");
930 	assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e);
931 }
932 
933 /**
934 	* Percent-decode a string.
935 	*
936 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
937 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
938 	* everything else, there is percent encoding.
939 	*
940 	* This explicitly ensures that the result is a valid UTF-8 string.
941 	*/
942 @trusted string percentDecode(string encoded) {
943 	ubyte[] raw = percentDecodeRaw(encoded);
944 	// This cast is not considered @safe because it converts from one pointer type to another.
945 	// However, it's 1-byte values in either case, no reference types, so this won't result in any
946 	// memory safety errors. We also check for validity immediately.
947 	auto s = cast(string) raw;
948 	if (!s.isValid) {
949 		// TODO(dhasenan): 
950 		throw new URLException("input contains invalid UTF data");
951 	}
952 	return s;
953 }
954 
955 ///
956 unittest {
957 	assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding");
958 	assert(percentDecode("~~--..__") == "~~--..__");
959 	assert(percentDecode("0123456789") == "0123456789");
960 
961 	string e;
962 
963 	e = percentDecode("%E2%98%83");
964 	assert(e == "☃", "expected a snowman but got" ~ e);
965 }
966 
967 /**
968 	* Percent-decode a string into a ubyte array.
969 	*
970 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
971 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
972 	* everything else, there is percent encoding.
973 	*
974 	* This yields a ubyte array and will not perform validation on the output. However, an improperly
975 	* formatted input string will result in a URLException.
976 	*/
977 ubyte[] percentDecodeRaw(string encoded) {
978 	// We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now.
979 	Appender!(ubyte[]) app;
980 	for (int i = 0; i < encoded.length; i++) {
981 		if (encoded[i] != '%') {
982 			app ~= encoded[i];
983 			continue;
984 		}
985 		if (i >= encoded.length - 2) {
986 			throw new URLException("Invalid percent encoded value: expected two characters after " ~
987 					"percent symbol. Error at index " ~ i.to!string);
988 		}
989 		auto b = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 1]));
990 		auto c = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 2]));
991 		app ~= cast(ubyte)((b << 4) | c);
992 		i += 2;
993 	}
994 	return app.data;
995 }
996 
997 private string toPuny(string unicodeHostname) {
998 	bool mustEncode = false;
999 	foreach (i, dchar d; unicodeHostname) {
1000 		auto c = cast(uint) d;
1001 		if (c > 0x80) {
1002 			mustEncode = true;
1003 			break;
1004 		}
1005 		if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) {
1006 			throw new URLException(
1007 					format(
1008 						"domain name '%s' contains illegal character '%s' at position %s",
1009 						unicodeHostname, d, i));
1010 		}
1011 	}
1012 	if (!mustEncode) {
1013 		return unicodeHostname;
1014 	}
1015 	return unicodeHostname.split('.').map!punyEncode.join(".");
1016 }
1017 
1018 private string fromPuny(string hostname) {
1019 	return hostname.split('.').map!punyDecode.join(".");
1020 }
1021 
1022 private {
1023 	enum delimiter = '-';
1024 	enum marker = "xn--";
1025 	enum ulong damp = 700;
1026 	enum ulong tmin = 1;
1027 	enum ulong tmax = 26;
1028 	enum ulong skew = 38;
1029 	enum ulong base = 36;
1030 	enum ulong initialBias = 72;
1031 	enum dchar initialN = cast(dchar)128;
1032 
1033 	ulong adapt(ulong delta, ulong numPoints, bool firstTime) {
1034 		if (firstTime) {
1035 			delta /= damp;
1036 		} else {
1037 			delta /= 2;
1038 		}
1039 		delta += delta / numPoints;
1040 		ulong k = 0;
1041 		while (delta > ((base - tmin) * tmax) / 2) {
1042 			delta /= (base - tmin);
1043 			k += base;
1044 		}
1045 		return k + (((base - tmin + 1) * delta) / (delta + skew));
1046 	}
1047 }
1048 
1049 /**
1050 	* Encode the input string using the Punycode algorithm.
1051 	*
1052 	* Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked
1053 	* with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com"
1054 	* in Punycode, you will get "xn--m3h.xn--n3h.com".
1055 	*
1056 	* In order to puny-encode a domain name, you must split it into its components. The following will
1057 	* typically suffice:
1058 	* ---
1059 	* auto domain = "☂.☃.com";
1060 	* auto encodedDomain = domain.splitter(".").map!(punyEncode).join(".");
1061 	* ---
1062 	*/
1063 string punyEncode(string input) {
1064 	ulong delta = 0;
1065 	dchar n = initialN;
1066 	auto i = 0;
1067 	auto bias = initialBias;
1068 	Appender!string output;
1069 	output ~= marker;
1070 	auto pushed = 0;
1071 	auto codePoints = 0;
1072 	foreach (dchar c; input) {
1073 		codePoints++;
1074 		if (c <= initialN) {
1075 			output ~= c;
1076 			pushed++;
1077 		}
1078 	}
1079 	if (pushed < codePoints) {
1080 		if (pushed > 0) {
1081 			output ~= delimiter;
1082 		}
1083 	} else {
1084 		// No encoding to do.
1085 		return input;
1086 	}
1087 	bool first = true;
1088 	while (pushed < codePoints) {
1089 		auto best = dchar.max;
1090 		foreach (dchar c; input) {
1091 			if (n <= c && c < best) {
1092 				best = c;
1093 			}
1094 		}
1095 		if (best == dchar.max) {
1096 			throw new URLException("failed to find a new codepoint to process during punyencode");
1097 		}
1098 		delta += (best - n) * (pushed + 1);
1099 		if (delta > uint.max) {
1100 			// TODO better error message
1101 			throw new URLException("overflow during punyencode");
1102 		}
1103 		n = best;
1104 		foreach (dchar c; input) {
1105 			if (c < n) {
1106 				delta++;
1107 			}
1108 			if (c == n) {
1109 				ulong q = delta;
1110 				auto k = base;
1111 				while (true) {
1112 					ulong t;
1113 					if (k <= bias) {
1114 						t = tmin;
1115 					} else if (k >= bias + tmax) {
1116 						t = tmax;
1117 					} else {
1118 						t = k - bias;
1119 					}
1120 					if (q < t) {
1121 						break;
1122 					}
1123 					output ~= digitToBasic(t + ((q - t) % (base - t)));
1124 					q = (q - t) / (base - t);
1125 					k += base;
1126 				}
1127 				output ~= digitToBasic(q);
1128 				pushed++;
1129 				bias = adapt(delta, pushed, first);
1130 				first = false;
1131 				delta = 0;
1132 			}
1133 		}
1134 		delta++;
1135 		n++;
1136 	}
1137 	return cast(string)output.data;
1138 }
1139 
1140 /**
1141 	* Decode the input string using the Punycode algorithm.
1142 	*
1143 	* Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked
1144 	* with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com"
1145 	* in Punycode, you will get "xn--m3h.xn--n3h.com".
1146 	*
1147 	* In order to puny-decode a domain name, you must split it into its components. The following will
1148 	* typically suffice:
1149 	* ---
1150 	* auto domain = "xn--m3h.xn--n3h.com";
1151 	* auto decodedDomain = domain.splitter(".").map!(punyDecode).join(".");
1152 	* ---
1153 	*/
1154 string punyDecode(string input) {
1155 	if (!input.startsWith(marker)) {
1156 		return input;
1157 	}
1158 	input = input[marker.length..$];
1159 
1160  	// let n = initial_n
1161 	dchar n = cast(dchar)128;
1162 
1163  	// let i = 0
1164  	// let bias = initial_bias
1165  	// let output = an empty string indexed from 0
1166 	ulong i = 0;
1167 	auto bias = initialBias;
1168 	dchar[] output;
1169 	// This reserves a bit more than necessary, but it should be more efficient overall than just
1170 	// appending and inserting volo-nolo.
1171 	output.reserve(input.length);
1172 
1173  	// consume all code points before the last delimiter (if there is one)
1174  	//   and copy them to output, fail on any non-basic code point
1175  	// if more than zero code points were consumed then consume one more
1176  	//   (which will be the last delimiter)
1177 	auto end = input.lastIndexOf(delimiter);
1178 	if (end > -1) {
1179 		foreach (dchar c; input[0..end]) {
1180 			output ~= c;
1181 		}
1182 		input = input[end+1 .. $];
1183 	}
1184 
1185  	// while the input is not exhausted do begin
1186 	ulong pos = 0;
1187 	while (pos < input.length) {
1188  	//   let oldi = i
1189  	//   let w = 1
1190 		auto oldi = i;
1191 		auto w = 1;
1192  	//   for k = base to infinity in steps of base do begin
1193 		for (ulong k = base; k < uint.max; k += base) {
1194  	//     consume a code point, or fail if there was none to consume
1195 			// Note that the input is all ASCII, so we can simply index the input string bytewise.
1196 			auto c = input[pos];
1197 			pos++;
1198  	//     let digit = the code point's digit-value, fail if it has none
1199 			auto digit = basicToDigit(c);
1200  	//     let i = i + digit * w, fail on overflow
1201 			i += digit * w;
1202  	//     let t = tmin if k <= bias {+ tmin}, or
1203  	//             tmax if k >= bias + tmax, or k - bias otherwise
1204 			ulong t;
1205 			if (k <= bias) {
1206 				t = tmin;
1207 			} else if (k >= bias + tmax) {
1208 				t = tmax;
1209 			} else {
1210 				t = k - bias;
1211 			}
1212  	//     if digit < t then break
1213 			if (digit < t) {
1214 				break;
1215 			}
1216  	//     let w = w * (base - t), fail on overflow
1217 			w *= (base - t);
1218  	//   end
1219 		}
1220  	//   let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?)
1221 		bias = adapt(i - oldi, output.length + 1, oldi == 0);
1222  	//   let n = n + i div (length(output) + 1), fail on overflow
1223 		n += i / (output.length + 1);
1224  	//   let i = i mod (length(output) + 1)
1225 		i %= (output.length + 1);
1226  	//   {if n is a basic code point then fail}
1227 		// (We aren't actually going to fail here; it's clear what this means.)
1228  	//   insert n into output at position i
1229 		(() @trusted { output.insertInPlace(i, cast(dchar)n); })();  // should be @safe but isn't marked
1230  	//   increment i
1231 		i++;
1232  	// end
1233 	}
1234 	return output.to!string;
1235 }
1236 
1237 // Lifted from punycode.js.
1238 private dchar digitToBasic(ulong digit) {
1239 	return cast(dchar)(digit + 22 + 75 * (digit < 26));
1240 }
1241 
1242 // Lifted from punycode.js.
1243 private uint basicToDigit(char c) {
1244 	auto codePoint = cast(uint)c;
1245 	if (codePoint - 48 < 10) {
1246 		return codePoint - 22;
1247 	}
1248 	if (codePoint - 65 < 26) {
1249 		return codePoint - 65;
1250 	}
1251 	if (codePoint - 97 < 26) {
1252 		return codePoint - 97;
1253 	}
1254 	return base;
1255 }
1256 
1257 unittest {
1258 	{
1259 		auto a = "b\u00FCcher";
1260 		assert(punyEncode(a) == "xn--bcher-kva");
1261 	}
1262 	{
1263 		auto a = "b\u00FCc\u00FCher";
1264 		assert(punyEncode(a) == "xn--bcher-kvab");
1265 	}
1266 	{
1267 		auto a = "ýbücher";
1268 		auto b = punyEncode(a);
1269 		assert(b == "xn--bcher-kvaf", b);
1270 	}
1271 
1272 	{
1273 		auto a = "mañana";
1274 		assert(punyEncode(a) == "xn--maana-pta");
1275 	}
1276 
1277 	{
1278 		auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
1279 			~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F";
1280 		auto b = punyEncode(a);
1281 		assert(b == "xn--egbpdaj6bu4bxfgehfvwxn", b);
1282 	}
1283 	import std.stdio;
1284 }
1285 
1286 unittest {
1287 	{
1288 		auto b = punyDecode("xn--egbpdaj6bu4bxfgehfvwxn");
1289 		assert(b == "ليهمابتكلموشعربي؟", b);
1290 	}
1291 	{
1292 		assert(punyDecode("xn--maana-pta") == "mañana");
1293 	}
1294 }
1295 
1296 unittest {
1297 	import std.string, std.algorithm, std.array, std.range;
1298 	{
1299 		auto domain = "xn--m3h.xn--n3h.com";
1300 		auto decodedDomain = domain.splitter(".").map!(punyDecode).join(".");
1301 		assert(decodedDomain == "☂.☃.com", decodedDomain);
1302 	}
1303 	{
1304 		auto domain = "☂.☃.com";
1305 		auto decodedDomain = domain.splitter(".").map!(punyEncode).join(".");
1306 		assert(decodedDomain == "xn--m3h.xn--n3h.com", decodedDomain);
1307 	}
1308 }