url source code

1 /**
2 	* A URL handling library.
3 	*
4 	* URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional
5 	* elements like port, path, username, and password.
6 	*
7 	* This module aims to make it simple to muck about with them.
8 	*
9 	* Example usage:
10 	* ---
11 	* auto url = "ssh://me:password@192.168.0.8/".parseURL;
12 	* auto files = system("ssh", url.toString, "ls").splitLines;
13 	* foreach (file; files) {
14 	*		system("scp", url ~ file, ".");
15 	* }
16 	* ---
17 	*
18 	* License: The MIT license.
19 	*/
20 module url;
21 
22 import std.algorithm;
23 import std.array;
24 import std.conv;
25 import std.encoding;
26 import std.string;
27 import std.utf;
28 
29 @safe:
30 
31 /// An exception thrown when something bad happens with URLs.
32 class URLException : Exception {
33 	this(string msg) { super(msg); }
34 }
35 
36 /**
37 	* A mapping from schemes to their default ports.
38 	*
39   * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to
40 	* use even if they use ports. Entries here should be treated as best guesses.
41   */
42 ushort[string] schemeToDefaultPort;
43 
44 static this() {
45 	schemeToDefaultPort = [
46 		"aaa": 3868,
47 		"aaas": 5658,
48 		"acap": 674,
49 		"amqp": 5672,
50 		"cap": 1026,
51 		"coap": 5683,
52 		"coaps": 5684,
53 		"dav": 443,
54 		"dict": 2628,
55 		"ftp": 21,
56 		"git": 9418,
57 		"go": 1096,
58 		"gopher": 70,
59 		"http": 80,
60 		"https": 443,
61 		"ws": 80,
62 		"wss": 443,
63 		"iac": 4569,
64 		"icap": 1344,
65 		"imap": 143,
66 		"ipp": 631,
67 		"ipps": 631,  // yes, they're both mapped to port 631
68 		"irc": 6667,  // De facto default port, not the IANA reserved port.
69 		"ircs": 6697,
70 		"iris": 702,  // defaults to iris.beep
71 		"iris.beep": 702,
72 		"iris.lwz": 715,
73 		"iris.xpc": 713,
74 		"iris.xpcs": 714,
75 		"jabber": 5222,  // client-to-server
76 		"ldap": 389,
77 		"ldaps": 636,
78 		"msrp": 2855,
79 		"msrps": 2855,
80 		"mtqp": 1038,
81 		"mupdate": 3905,
82 		"news": 119,
83 		"nfs": 2049,
84 		"pop": 110,
85 		"redis": 6379,
86 		"reload": 6084,
87 		"rsync": 873,
88 		"rtmfp": 1935,
89 		"rtsp": 554,
90 		"shttp": 80,
91 		"sieve": 4190,
92 		"sip": 5060,
93 		"sips": 5061,
94 		"smb": 445,
95 		"smtp": 25,
96 		"snews": 563,
97 		"snmp": 161,
98 		"soap.beep": 605,
99 		"ssh": 22,
100 		"stun": 3478,
101 		"stuns": 5349,
102 		"svn": 3690,
103 		"teamspeak": 9987,
104 		"telnet": 23,
105 		"tftp": 69,
106 		"tip": 3372,
107 	];
108 }
109 
110 /**
111 	* A collection of query parameters.
112 	*
113 	* This is effectively a multimap of string -> strings.
114 	*/
115 struct QueryParams {
116 	import std.typecons;
117 	alias Tuple!(string, "key", string, "value") Param;
118 	Param[] params;
119 
120 	@property size_t length() {
121 		return params.length;
122 	}
123 
124 	/// Get a range over the query parameter values for the given key.
125 	auto opIndex(string key) {
126 		return params.find!(x => x.key == key).map!(x => x.value);
127 	}
128 
129 	/// Add a query parameter with the given key and value.
130 	/// If one already exists, there will now be two query parameters with the given name.
131 	void add(string key, string value) {
132 		params ~= Param(key, value);
133 	}
134 
135 	/// Add a query parameter with the given key and value.
136 	/// If there are any existing parameters with the same key, they are removed and overwritten.
137 	void overwrite(string key, string value) {
138 		for (int i = 0; i < params.length; i++) {
139 			if (params[i].key == key) {
140 				params[i] = params[$-1];
141 				params.length--;
142 			}
143 		}
144 		params ~= Param(key, value);
145 	}
146 
147 	private struct QueryParamRange {
148 		size_t i;
149 		const(Param)[] params;
150 		bool empty() { return i >= params.length; }
151 		void popFront() { i++; }
152 		Param front() { return params[i]; }
153 	}
154 
155 	/**
156 		* A range over the query parameters.
157 		*
158 		* Usage:
159 		* ---
160 		* foreach (key, value; url.queryParams) {}
161 		* ---
162 		*/
163 	auto range() {
164 		return QueryParamRange(0, this.params);
165 	}
166 	/// ditto
167 	alias range this;
168 }
169 
170 /**
171 	* A Unique Resource Locator.
172 	* 
173 	* URLs can be parsed (see parseURL) and implicitly convert to strings.
174 	*/
175 struct URL {
176 	/// The URL scheme. For instance, ssh, ftp, or https.
177 	string scheme;
178 
179 	/// The username in this URL. Usually absent. If present, there will also be a password.
180 	string user;
181 
182 	/// The password in this URL. Usually absent.
183 	string pass;
184 
185 	/// The hostname.
186 	string host;
187 
188 	/**
189 	  * The port.
190 		*
191 	  * This is inferred from the scheme if it isn't present in the URL itself.
192 	  * If the scheme is not known and the port is not present, the port will be given as 0.
193 	  * For some schemes, port will not be sensible -- for instance, file or chrome-extension.
194 	  *
195 	  * If you explicitly need to detect whether the user provided a port, check the providedPort
196 	  * field.
197 	  */
198 	@property ushort port() {
199 		if (providedPort != 0) {
200 			return providedPort;
201 		}
202 		if (auto p = scheme in schemeToDefaultPort) {
203 			return *p;
204 		}
205 		return 0;
206 	}
207 
208 	/**
209 	  * Set the port.
210 		*
211 		* This sets the providedPort field and is provided for convenience.
212 		*/
213 	@property ushort port(ushort value) {
214 		return providedPort = value;
215 	}
216 
217 	/// The port that was explicitly provided in the URL.
218 	ushort providedPort;
219 
220 	/**
221 	  * The path.
222 	  *
223 	  * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is
224 	  * "/news/story/17774".
225 	  */
226 	string path;
227 
228 	/**
229 		* Deprecated: this disallows multiple values for the same query string. Please use queryParams
230 		* instead.
231 		* 
232 	  * The query string elements.
233 	  *
234 	  * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the query string
235 	  * elements will be ["visited": "false"].
236 	  *
237 	  * Similarly, in the URL https://bbc.co.uk/news?item, the query string elements will be
238 	  * ["item": ""].
239 	  *
240 	  * This field is mutable, so be cautious.
241 	  */
242 	deprecated("use queryParams") string[string] query;
243 
244 	/**
245 		* The query parameters associated with this URL.
246 		*/
247 	QueryParams queryParams;
248 
249 	/**
250 	  * The fragment. In web documents, this typically refers to an anchor element.
251 	  * For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2".
252 	  */
253 	string fragment;
254 
255 	/**
256 	  * Convert this URL to a string.
257 	  * The string is properly formatted and usable for, eg, a web request.
258 	  */
259 	string toString() {
260 		return toString(false);
261 	}
262 
263 	/**
264 		* Convert this URL to a string.
265 		* The string is intended to be human-readable rather than machine-readable.
266 		*/
267 	string toHumanReadableString() {
268 		return toString(true);
269 	}
270 
271 	private string toString(bool humanReadable) {
272 		Appender!string s;
273 		s ~= scheme;
274 		s ~= "://";
275 		if (user) {
276 			s ~= humanReadable ? user : user.percentEncode;
277 			s ~= ":";
278 			s ~= humanReadable ? pass : pass.percentEncode;
279 			s ~= "@";
280 		}
281 		s ~= humanReadable ? host : host.toPuny;
282 		if (providedPort) {
283 			if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) {
284 				s ~= ":";
285 				s ~= providedPort.to!string;
286 			}
287 		}
288 		string p = path;
289 		if (p.length == 0 || p == "/") {
290 			s ~= '/';
291 		} else {
292 			if (p[0] == '/') {
293 				p = p[1..$];
294 			}
295 			if (humanReadable) {
296 				s ~= p;
297 			} else {
298 				foreach (part; p.split('/')) {
299 					s ~= '/';
300 					s ~= part.percentEncode;
301 				}
302 			}
303 		}
304 		if (queryParams.length) {
305 			bool first = true;
306 			s ~= '?';
307 			foreach (k, v; queryParams) {
308 				if (!first) {
309 					s ~= '&';
310 				}
311 				first = false;
312 				s ~= k.percentEncode;
313 				if (v.length > 0) {
314 					s ~= '=';
315 					s ~= v.percentEncode;
316 				}
317 			}
318 		} else if (query) {
319 			s ~= '?';
320 			bool first = true;
321 			foreach (k, v; query) {
322 				if (!first) {
323 					s ~= '&';
324 				}
325 				first = false;
326 				s ~= k.percentEncode;
327 				if (v.length > 0) {
328 					s ~= '=';
329 					s ~= v.percentEncode;
330 				}
331 			}
332 		}
333 		if (fragment) {
334 			s ~= '#';
335 			s ~= fragment.percentEncode;
336 		}
337 		return s.data;
338 	}
339 
340 	/// Implicitly convert URLs to strings.
341 	alias toString this;
342 
343 	/**
344 		* The append operator (~).
345 		*
346 		* The append operator for URLs returns a new URL with the given string appended as a path
347 		* element to the URL's path. It only adds new path elements (or sequences of path elements).
348 		*
349 		* Don't worry about path separators; whether you include them or not, it will just work.
350 		*
351 		* Query elements are copied.
352 		*
353 		* Examples:
354 		* ---
355 		* auto random = "http://testdata.org/random".parseURL;
356 		* auto randInt = random ~ "int";
357 		* writeln(randInt);  // prints "http://testdata.org/random/int"
358 		* ---
359 		*/
360 	URL opBinary(string op : "~")(string subsequentPath) {
361 		URL other = this;
362 		other ~= subsequentPath;
363 		if (query) {
364 			other.query = other.query.dup;
365 		}
366 		return other;
367 	}
368 
369 	/**
370 		* The append-in-place operator (~=).
371 		*
372 		* The append operator for URLs adds a path element to this URL. It only adds new path elements
373 		* (or sequences of path elements).
374 		*
375 		* Don't worry about path separators; whether you include them or not, it will just work.
376 		*
377 		* Examples:
378 		* ---
379 		* auto random = "http://testdata.org/random".parseURL;
380 		* random ~= "int";
381 		* writeln(random);  // prints "http://testdata.org/random/int"
382 		* ---
383 		*/
384 	URL opOpAssign(string op : "~")(string subsequentPath) {
385 		if (path.endsWith("/")) {
386 			if (subsequentPath.startsWith("/")) {
387 				path ~= subsequentPath[1..$];
388 			} else {
389 				path ~= subsequentPath;
390 			}
391 		} else {
392 			if (!subsequentPath.startsWith("/")) {
393                 path ~= '/';
394             }
395 			path ~= subsequentPath;
396 		}
397 		return this;
398 	}
399 }
400 
401 /**
402 	* Parse a URL from a string.
403 	*
404 	* This attempts to parse a wide range of URLs as people might actually type them. Some mistakes
405 	* may be made. However, any URL in a correct format will be parsed correctly.
406 	*/
407 bool tryParseURL(string value, out URL url) {
408 	url = URL.init;
409 	// scheme:[//[user:password@]host[:port]][/]path[?query][#fragment]
410 	// Scheme is optional in common use. We infer 'http' if it's not given.
411 	auto i = value.indexOf("//");
412 	if (i > -1) {
413         if (i > 1) {
414             url.scheme = value[0..i-1];
415         }
416 		value = value[i+2 .. $];
417 	} else {
418 		url.scheme = "http";
419 	}
420 	// [user:password@]host[:port]][/]path[?query][#fragment
421 	i = value.indexOfAny([':', '/']);
422 	if (i == -1) {
423 		// Just a hostname.
424 		url.host = value.fromPuny;
425 		return true;
426 	}
427 
428 	if (value[i] == ':') {
429 		// This could be between username and password, or it could be between host and port.
430 		auto j = value.indexOfAny(['@', '/']);
431 		if (j > -1 && value[j] == '@') {
432 			try {
433 				url.user = value[0..i].percentDecode;
434 				url.pass = value[i+1 .. j].percentDecode;
435 			} catch (URLException) {
436 				return false;
437 			}
438 			value = value[j+1 .. $];
439 		}
440 	}
441 
442 	// It's trying to be a host/port, not a user/pass.
443 	i = value.indexOfAny([':', '/']);
444 	if (i == -1) {
445 		url.host = value.fromPuny;
446 		return true;
447 	}
448 	url.host = value[0..i].fromPuny;
449 	value = value[i .. $];
450 	if (value[0] == ':') {
451 		auto end = value.indexOf('/');
452 		if (end == -1) {
453 			end = value.length;
454 		}
455 		try {
456 			url.port = value[1 .. end].to!ushort;
457 		} catch (ConvException) {
458 			return false;
459 		}
460 		value = value[end .. $];
461 		if (value.length == 0) {
462 			return true;
463 		}
464 	}
465 
466 	i = value.indexOfAny("?#");
467 	if (i == -1) {
468 		url.path = value.percentDecode;
469 		return true;
470 	}
471 
472 	try {
473 		url.path = value[0..i].percentDecode;
474 	} catch (URLException) {
475 		return false;
476 	}
477 	auto c = value[i];
478 	value = value[i + 1 .. $];
479 	if (c == '?') {
480 		i = value.indexOf('#');
481 		string query;
482 		if (i < 0) {
483 			query = value;
484 			value = null;
485 		} else {
486 			query = value[0..i];
487 			value = value[i + 1 .. $];
488 		}
489 		auto queries = query.split('&');
490 		foreach (q; queries) {
491 			auto j = q.indexOf('=');
492 			string key, val;
493 			if (j < 0) {
494 				key = q;
495 			} else {
496 				key = q[0..j];
497 				val = q[j + 1 .. $];
498 			}
499 			try {
500 				key = key.percentDecode;
501 				val = val.percentDecode;
502 			} catch (URLException) {
503 				return false;
504 			}
505 			url.query[key] = val;
506 			url.queryParams.add(key, val);
507 		}
508 	}
509 
510 	try {
511 		url.fragment = value.percentDecode;
512 	} catch (URLException) {
513 		return false;
514 	}
515 
516 	return true;
517 }
518 
519 unittest {
520 	{
521 		// Basic.
522 		URL url;
523 		with (url) {
524 			scheme = "https";
525 			host = "example.org";
526 			path = "/foo/bar";
527 			query["hello"] = "world";
528 			query["gibe"] = "clay";
529 			fragment = "frag";
530 		}
531 		assert(
532 				// Not sure what order it'll come out in.
533 				url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
534 				url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
535 				url.toString);
536 	}
537 	{
538 		// Percent encoded.
539 		URL url;
540 		with (url) {
541 			scheme = "https";
542 			host = "example.org";
543 			path = "/f☃o";
544 			query["❄"] = "❀";
545 			query["["] = "]";
546 			fragment = "ş";
547 		}
548 		assert(
549 				// Not sure what order it'll come out in.
550 				url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" ||
551 				url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F",
552 				url.toString);
553 	}
554 	{
555 		// Port, user, pass.
556 		URL url;
557 		with (url) {
558 			scheme = "https";
559 			host = "example.org";
560 			user = "dhasenan";
561 			pass = "itsasecret";
562 			port = 17;
563 		}
564 		assert(
565 				url.toString == "https://dhasenan:itsasecret@example.org:17/",
566 				url.toString);
567 	}
568 	{
569 		// Query with no path.
570 		URL url;
571 		with (url) {
572 			scheme = "https";
573 			host = "example.org";
574 			query["hi"] = "bye";
575 		}
576 		assert(
577 				url.toString == "https://example.org/?hi=bye",
578 				url.toString);
579 	}
580 }
581 
582 unittest
583 {
584     auto url = "//foo/bar".parseURL;
585     assert(url.host == "foo", "expected host foo, got " ~ url.host);
586     assert(url.path == "/bar");
587 }
588 
589 unittest
590 {
591     auto url = "localhost:5984".parseURL;
592     auto url2 = url ~ "db1";
593     assert(url2.toString == "http://localhost:5984/db1", url2.toString);
594     auto url3 = url2 ~ "_all_docs";
595     assert(url3.toString == "http://localhost:5984/db1/_all_docs", url3.toString);
596 }
597 
598 ///
599 unittest {
600 	{
601 		// Basic.
602 		URL url;
603 		with (url) {
604 			scheme = "https";
605 			host = "example.org";
606 			path = "/foo/bar";
607 			queryParams.add("hello", "world");
608 			queryParams.add("gibe", "clay");
609 			fragment = "frag";
610 		}
611 		assert(
612 				// Not sure what order it'll come out in.
613 				url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
614 				url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
615 				url.toString);
616 	}
617 	{
618 		// Passing an array of query values.
619 		URL url;
620 		with (url) {
621 			scheme = "https";
622 			host = "example.org";
623 			path = "/foo/bar";
624 			queryParams.add("hello", "world");
625 			queryParams.add("hello", "aether");
626 			fragment = "frag";
627 		}
628 		assert(
629 				// Not sure what order it'll come out in.
630 				url.toString == "https://example.org/foo/bar?hello=world&hello=aether#frag" ||
631 				url.toString == "https://example.org/foo/bar?hello=aether&hello=world#frag",
632 				url.toString);
633 	}
634 	{
635 		// Percent encoded.
636 		URL url;
637 		with (url) {
638 			scheme = "https";
639 			host = "example.org";
640 			path = "/f☃o";
641 			queryParams.add("❄", "❀");
642 			queryParams.add("[", "]");
643 			fragment = "ş";
644 		}
645 		assert(
646 				// Not sure what order it'll come out in.
647 				url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" ||
648 				url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F",
649 				url.toString);
650 	}
651 	{
652 		// Port, user, pass.
653 		URL url;
654 		with (url) {
655 			scheme = "https";
656 			host = "example.org";
657 			user = "dhasenan";
658 			pass = "itsasecret";
659 			port = 17;
660 		}
661 		assert(
662 				url.toString == "https://dhasenan:itsasecret@example.org:17/",
663 				url.toString);
664 	}
665 	{
666 		// Query with no path.
667 		URL url;
668 		with (url) {
669 			scheme = "https";
670 			host = "example.org";
671 			queryParams.add("hi", "bye");
672 		}
673 		assert(
674 				url.toString == "https://example.org/?hi=bye",
675 				url.toString);
676 	}
677 }
678 
679 unittest {
680 	// Percent decoding.
681 
682 	// http://#:!:@
683 	auto urlString = "http://%23:%21%3A@example.org/%7B/%7D?%3B&%26=%3D#%23hash";
684 	auto url = urlString.parseURL;
685 	assert(url.user == "#");
686 	assert(url.pass == "!:");
687 	assert(url.host == "example.org");
688 	assert(url.path == "/{/}");
689 	assert(url.queryParams[";"].front == "");
690 	assert(url.queryParams["&"].front == "=");
691 	assert(url.fragment == "#hash");
692 
693 	// Round trip.
694 	assert(urlString == urlString.parseURL.toString, urlString.parseURL.toString);
695 	assert(urlString == urlString.parseURL.toString.parseURL.toString);
696 }
697 
698 unittest {
699 	auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL;
700 	assert(url.host == "☂.☃.org", url.host);
701 }
702 
703 unittest {
704 	auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL;
705 	assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye", url.toString);
706 	assert(url.toHumanReadableString == "https://☂.☃.org/?hi=bye", url.toString);
707 }
708 
709 unittest {
710 	auto url = "https://☂.☃.org/?hi=bye".parseURL;
711 	assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye");
712 }
713 
714 ///
715 unittest {
716 	// There's an existing path.
717 	auto url = parseURL("http://example.org/foo");
718     URL url2;
719 	// No slash? Assume it needs a slash.
720 	assert((url ~ "bar").toString == "http://example.org/foo/bar");
721 	// With slash? Don't add another.
722     url2 = url ~ "/bar";
723 	assert(url2.toString == "http://example.org/foo/bar", url2.toString);
724 	url ~= "bar";
725 	assert(url.toString == "http://example.org/foo/bar");
726 
727 	// Path already ends with a slash; don't add another.
728 	url = parseURL("http://example.org/foo/");
729 	assert((url ~ "bar").toString == "http://example.org/foo/bar");
730 	// Still don't add one even if you're appending with a slash.
731 	assert((url ~ "/bar").toString == "http://example.org/foo/bar");
732 	url ~= "/bar";
733 	assert(url.toString == "http://example.org/foo/bar");
734 
735 	// No path.
736 	url = parseURL("http://example.org");
737 	assert((url ~ "bar").toString == "http://example.org/bar");
738 	assert((url ~ "/bar").toString == "http://example.org/bar");
739 	url ~= "bar";
740 	assert(url.toString == "http://example.org/bar");
741 
742 	// Path is just a slash.
743 	url = parseURL("http://example.org/");
744 	assert((url ~ "bar").toString == "http://example.org/bar");
745 	assert((url ~ "/bar").toString == "http://example.org/bar");
746 	url ~= "bar";
747 	assert(url.toString == "http://example.org/bar", url.toString);
748 
749 	// No path, just fragment.
750 	url = "ircs://irc.freenode.com/#d".parseURL;
751 	assert(url.toString == "ircs://irc.freenode.com/#d", url.toString);
752 }
753 
754 unittest {
755 	import std.net.curl;
756 	auto url = "http://example.org".parseURL;
757 	assert(is(typeof(std.net.curl.get(url))));
758 }
759 
760 /**
761 	* Parse the input string as a URL.
762 	*
763 	* Throws:
764 	*   URLException if the string was in an incorrect format.
765 	*/
766 URL parseURL(string value) {
767 	URL url;
768 	if (tryParseURL(value, url)) {
769 		return url;
770 	}
771 	throw new URLException("failed to parse URL " ~ value);
772 }
773 
774 ///
775 unittest {
776 	{
777 		// Infer scheme
778 		auto u1 = parseURL("example.org");
779 		assert(u1.scheme == "http");
780 		assert(u1.host == "example.org");
781 		assert(u1.path == "");
782 		assert(u1.port == 80);
783 		assert(u1.providedPort == 0);
784 		assert(u1.fragment == "");
785 	}
786 	{
787 		// Simple host and scheme
788 		auto u1 = parseURL("https://example.org");
789 		assert(u1.scheme == "https");
790 		assert(u1.host == "example.org");
791 		assert(u1.path == "");
792 		assert(u1.port == 443);
793 		assert(u1.providedPort == 0);
794 	}
795 	{
796 		// With path
797 		auto u1 = parseURL("https://example.org/foo/bar");
798 		assert(u1.scheme == "https");
799 		assert(u1.host == "example.org");
800 		assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
801 		assert(u1.port == 443);
802 		assert(u1.providedPort == 0);
803 	}
804 	{
805 		// With explicit port
806 		auto u1 = parseURL("https://example.org:1021/foo/bar");
807 		assert(u1.scheme == "https");
808 		assert(u1.host == "example.org");
809 		assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
810 		assert(u1.port == 1021);
811 		assert(u1.providedPort == 1021);
812 	}
813 	{
814 		// With user
815 		auto u1 = parseURL("https://bob:secret@example.org/foo/bar");
816 		assert(u1.scheme == "https");
817 		assert(u1.host == "example.org");
818 		assert(u1.path == "/foo/bar");
819 		assert(u1.port == 443);
820 		assert(u1.user == "bob");
821 		assert(u1.pass == "secret");
822 	}
823 	{
824 		// With user, URL-encoded
825 		auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar");
826 		assert(u1.scheme == "https");
827 		assert(u1.host == "example.org");
828 		assert(u1.path == "/foo/bar");
829 		assert(u1.port == 443);
830 		assert(u1.user == "bob!");
831 		assert(u1.pass == "secret!?");
832 	}
833 	{
834 		// With user and port and path
835 		auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar");
836 		assert(u1.scheme == "https");
837 		assert(u1.host == "example.org");
838 		assert(u1.path == "/foo/bar");
839 		assert(u1.port == 2210);
840 		assert(u1.user == "bob");
841 		assert(u1.pass == "secret");
842 		assert(u1.fragment == "");
843 	}
844 	{
845 		// With query string
846 		auto u1 = parseURL("https://example.org/?login=true");
847 		assert(u1.scheme == "https");
848 		assert(u1.host == "example.org");
849 		assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
850 		assert(u1.queryParams["login"].front == "true");
851 		assert(u1.fragment == "");
852 	}
853 	{
854 		// With query string and fragment
855 		auto u1 = parseURL("https://example.org/?login=true#justkidding");
856 		assert(u1.scheme == "https");
857 		assert(u1.host == "example.org");
858 		assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
859 		assert(u1.queryParams["login"].front == "true");
860 		assert(u1.fragment == "justkidding");
861 	}
862 	{
863 		// With URL-encoded values
864 		auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E");
865 		assert(u1.scheme == "https");
866 		assert(u1.host == "example.org");
867 		assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path);
868 		assert(u1.queryParams["❄"].front == "=");
869 		assert(u1.fragment == "^");
870 	}
871 }
872 
873 unittest {
874 	assert(parseURL("http://example.org").port == 80);
875 	assert(parseURL("http://example.org:5326").port == 5326);
876 
877 	auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment");
878 	assert(url.scheme == "redis");
879 	assert(url.user == "admin");
880 	assert(url.pass == "password");
881 
882 	assert(parseURL("example.org").toString == "http://example.org/");
883 	assert(parseURL("http://example.org:80").toString == "http://example.org/");
884 
885 	assert(parseURL("localhost:8070").toString == "http://localhost:8070/");
886 }
887 
888 /**
889 	* Percent-encode a string.
890 	*
891 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
892 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
893 	* everything else, there is percent encoding.
894 	*/
895 string percentEncode(string raw) {
896 	// We *must* encode these characters: :/?#[]@!$&'()*+,;="
897 	// We *can* encode any other characters.
898 	// We *should not* encode alpha, numeric, or -._~.
899 	Appender!string app;
900 	foreach (dchar d; raw) {
901 		if (('a' <= d && 'z' >= d) ||
902 				('A' <= d && 'Z' >= d) ||
903 				('0' <= d && '9' >= d) ||
904 				d == '-' || d == '.' || d == '_' || d == '~') {
905 			app ~= d;
906 			continue;
907 		}
908 		// Something simple like a space character? Still in 7-bit ASCII?
909 		// Then we get a single-character string out of it and just encode
910 		// that one bit.
911 		// Something not in 7-bit ASCII? Then we percent-encode each octet
912 		// in the UTF-8 encoding (and hope the server understands UTF-8).
913 		char[] c;
914 		encode(c, d);
915 		auto bytes = cast(ubyte[])c;
916 		foreach (b; bytes) {
917 			app ~= format("%%%02X", b);
918 		}
919 	}
920 	return cast(string)app.data;
921 }
922 
923 ///
924 unittest {
925 	assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding");
926 	assert(percentEncode("~~--..__") == "~~--..__");
927 	assert(percentEncode("0123456789") == "0123456789");
928 
929 	string e;
930 
931 	e = percentEncode("☃");
932 	assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e);
933 }
934 
935 /**
936 	* Percent-decode a string.
937 	*
938 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
939 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
940 	* everything else, there is percent encoding.
941 	*
942 	* This explicitly ensures that the result is a valid UTF-8 string.
943 	*/
944 @trusted string percentDecode(string encoded) {
945 	ubyte[] raw = percentDecodeRaw(encoded);
946 	// This cast is not considered @safe because it converts from one pointer type to another.
947 	// However, it's 1-byte values in either case, no reference types, so this won't result in any
948 	// memory safety errors. We also check for validity immediately.
949 	auto s = cast(string) raw;
950 	if (!s.isValid) {
951 		// TODO(dhasenan): 
952 		throw new URLException("input contains invalid UTF data");
953 	}
954 	return s;
955 }
956 
957 ///
958 unittest {
959 	assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding");
960 	assert(percentDecode("~~--..__") == "~~--..__");
961 	assert(percentDecode("0123456789") == "0123456789");
962 
963 	string e;
964 
965 	e = percentDecode("%E2%98%83");
966 	assert(e == "☃", "expected a snowman but got" ~ e);
967 
968 	e = percentDecode("%e2%98%83");
969 	assert(e == "☃", "expected a snowman but got" ~ e);
970 
971   try {
972     // %ES is an invalid percent sequence: 'S' is not a hex digit.
973     percentDecode("%es");
974     assert(false, "expected exception not thrown");
975   } catch (URLException) {
976   }
977 
978   try {
979     percentDecode("%e");
980     assert(false, "expected exception not thrown");
981   } catch (URLException) {
982   }
983 }
984 
985 /**
986 	* Percent-decode a string into a ubyte array.
987 	*
988 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
989 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
990 	* everything else, there is percent encoding.
991 	*
992 	* This yields a ubyte array and will not perform validation on the output. However, an improperly
993 	* formatted input string will result in a URLException.
994 	*/
995 ubyte[] percentDecodeRaw(string encoded) {
996 	// We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now.
997 	Appender!(ubyte[]) app;
998 	for (int i = 0; i < encoded.length; i++) {
999 		if (encoded[i] != '%') {
1000 			app ~= encoded[i];
1001 			continue;
1002 		}
1003 		if (i >= encoded.length - 2) {
1004 			throw new URLException("Invalid percent encoded value: expected two characters after " ~
1005 					"percent symbol. Error at index " ~ i.to!string);
1006 		}
1007     if (isHex(encoded[i + 1]) && isHex(encoded[i + 2])) {
1008       auto b = fromHex(encoded[i + 1]);
1009       auto c = fromHex(encoded[i + 2]);
1010       app ~= cast(ubyte)((b << 4) | c);
1011     } else {
1012       throw new URLException("Invalid percent encoded value: expected two hex digits after " ~
1013           "percent symbol. Error at index " ~ i.to!string);
1014     }
1015     i += 2;
1016 	}
1017 	return app.data;
1018 }
1019 
1020 private bool isHex(char c) {
1021   return ('0' <= c && '9' >= c) ||
1022     ('a' <= c && 'f' >= c) ||
1023     ('A' <= c && 'F' >= c);
1024 }
1025 
1026 private ubyte fromHex(char s) {
1027   enum caseDiff = 'a' - 'A';
1028   if (s >= 'a' && s <= 'z') {
1029     s -= caseDiff;
1030   }
1031   return cast(ubyte)("0123456789ABCDEF".indexOf(s));
1032 }
1033 
1034 private string toPuny(string unicodeHostname) {
1035 	bool mustEncode = false;
1036 	foreach (i, dchar d; unicodeHostname) {
1037 		auto c = cast(uint) d;
1038 		if (c > 0x80) {
1039 			mustEncode = true;
1040 			break;
1041 		}
1042 		if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) {
1043 			throw new URLException(
1044 					format(
1045 						"domain name '%s' contains illegal character '%s' at position %s",
1046 						unicodeHostname, d, i));
1047 		}
1048 	}
1049 	if (!mustEncode) {
1050 		return unicodeHostname;
1051 	}
1052 	return unicodeHostname.split('.').map!punyEncode.join(".");
1053 }
1054 
1055 private string fromPuny(string hostname) {
1056 	return hostname.split('.').map!punyDecode.join(".");
1057 }
1058 
1059 private {
1060 	enum delimiter = '-';
1061 	enum marker = "xn--";
1062 	enum ulong damp = 700;
1063 	enum ulong tmin = 1;
1064 	enum ulong tmax = 26;
1065 	enum ulong skew = 38;
1066 	enum ulong base = 36;
1067 	enum ulong initialBias = 72;
1068 	enum dchar initialN = cast(dchar)128;
1069 
1070 	ulong adapt(ulong delta, ulong numPoints, bool firstTime) {
1071 		if (firstTime) {
1072 			delta /= damp;
1073 		} else {
1074 			delta /= 2;
1075 		}
1076 		delta += delta / numPoints;
1077 		ulong k = 0;
1078 		while (delta > ((base - tmin) * tmax) / 2) {
1079 			delta /= (base - tmin);
1080 			k += base;
1081 		}
1082 		return k + (((base - tmin + 1) * delta) / (delta + skew));
1083 	}
1084 }
1085 
1086 /**
1087 	* Encode the input string using the Punycode algorithm.
1088 	*
1089 	* Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked
1090 	* with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com"
1091 	* in Punycode, you will get "xn--m3h.xn--n3h.com".
1092 	*
1093 	* In order to puny-encode a domain name, you must split it into its components. The following will
1094 	* typically suffice:
1095 	* ---
1096 	* auto domain = "☂.☃.com";
1097 	* auto encodedDomain = domain.splitter(".").map!(punyEncode).join(".");
1098 	* ---
1099 	*/
1100 string punyEncode(string input) {
1101 	ulong delta = 0;
1102 	dchar n = initialN;
1103 	auto i = 0;
1104 	auto bias = initialBias;
1105 	Appender!string output;
1106 	output ~= marker;
1107 	auto pushed = 0;
1108 	auto codePoints = 0;
1109 	foreach (dchar c; input) {
1110 		codePoints++;
1111 		if (c <= initialN) {
1112 			output ~= c;
1113 			pushed++;
1114 		}
1115 	}
1116 	if (pushed < codePoints) {
1117 		if (pushed > 0) {
1118 			output ~= delimiter;
1119 		}
1120 	} else {
1121 		// No encoding to do.
1122 		return input;
1123 	}
1124 	bool first = true;
1125 	while (pushed < codePoints) {
1126 		auto best = dchar.max;
1127 		foreach (dchar c; input) {
1128 			if (n <= c && c < best) {
1129 				best = c;
1130 			}
1131 		}
1132 		if (best == dchar.max) {
1133 			throw new URLException("failed to find a new codepoint to process during punyencode");
1134 		}
1135 		delta += (best - n) * (pushed + 1);
1136 		if (delta > uint.max) {
1137 			// TODO better error message
1138 			throw new URLException("overflow during punyencode");
1139 		}
1140 		n = best;
1141 		foreach (dchar c; input) {
1142 			if (c < n) {
1143 				delta++;
1144 			}
1145 			if (c == n) {
1146 				ulong q = delta;
1147 				auto k = base;
1148 				while (true) {
1149 					ulong t;
1150 					if (k <= bias) {
1151 						t = tmin;
1152 					} else if (k >= bias + tmax) {
1153 						t = tmax;
1154 					} else {
1155 						t = k - bias;
1156 					}
1157 					if (q < t) {
1158 						break;
1159 					}
1160 					output ~= digitToBasic(t + ((q - t) % (base - t)));
1161 					q = (q - t) / (base - t);
1162 					k += base;
1163 				}
1164 				output ~= digitToBasic(q);
1165 				pushed++;
1166 				bias = adapt(delta, pushed, first);
1167 				first = false;
1168 				delta = 0;
1169 			}
1170 		}
1171 		delta++;
1172 		n++;
1173 	}
1174 	return cast(string)output.data;
1175 }
1176 
1177 /**
1178 	* Decode the input string using the Punycode algorithm.
1179 	*
1180 	* Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked
1181 	* with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com"
1182 	* in Punycode, you will get "xn--m3h.xn--n3h.com".
1183 	*
1184 	* In order to puny-decode a domain name, you must split it into its components. The following will
1185 	* typically suffice:
1186 	* ---
1187 	* auto domain = "xn--m3h.xn--n3h.com";
1188 	* auto decodedDomain = domain.splitter(".").map!(punyDecode).join(".");
1189 	* ---
1190 	*/
1191 string punyDecode(string input) {
1192 	if (!input.startsWith(marker)) {
1193 		return input;
1194 	}
1195 	input = input[marker.length..$];
1196 
1197  	// let n = initial_n
1198 	dchar n = cast(dchar)128;
1199 
1200  	// let i = 0
1201  	// let bias = initial_bias
1202  	// let output = an empty string indexed from 0
1203 	ulong i = 0;
1204 	auto bias = initialBias;
1205 	dchar[] output;
1206 	// This reserves a bit more than necessary, but it should be more efficient overall than just
1207 	// appending and inserting volo-nolo.
1208 	output.reserve(input.length);
1209 
1210  	// consume all code points before the last delimiter (if there is one)
1211  	//   and copy them to output, fail on any non-basic code point
1212  	// if more than zero code points were consumed then consume one more
1213  	//   (which will be the last delimiter)
1214 	auto end = input.lastIndexOf(delimiter);
1215 	if (end > -1) {
1216 		foreach (dchar c; input[0..end]) {
1217 			output ~= c;
1218 		}
1219 		input = input[end+1 .. $];
1220 	}
1221 
1222  	// while the input is not exhausted do begin
1223 	ulong pos = 0;
1224 	while (pos < input.length) {
1225  	//   let oldi = i
1226  	//   let w = 1
1227 		auto oldi = i;
1228 		auto w = 1;
1229  	//   for k = base to infinity in steps of base do begin
1230 		for (ulong k = base; k < uint.max; k += base) {
1231  	//     consume a code point, or fail if there was none to consume
1232 			// Note that the input is all ASCII, so we can simply index the input string bytewise.
1233 			auto c = input[pos];
1234 			pos++;
1235  	//     let digit = the code point's digit-value, fail if it has none
1236 			auto digit = basicToDigit(c);
1237  	//     let i = i + digit * w, fail on overflow
1238 			i += digit * w;
1239  	//     let t = tmin if k <= bias {+ tmin}, or
1240  	//             tmax if k >= bias + tmax, or k - bias otherwise
1241 			ulong t;
1242 			if (k <= bias) {
1243 				t = tmin;
1244 			} else if (k >= bias + tmax) {
1245 				t = tmax;
1246 			} else {
1247 				t = k - bias;
1248 			}
1249  	//     if digit < t then break
1250 			if (digit < t) {
1251 				break;
1252 			}
1253  	//     let w = w * (base - t), fail on overflow
1254 			w *= (base - t);
1255  	//   end
1256 		}
1257  	//   let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?)
1258 		bias = adapt(i - oldi, output.length + 1, oldi == 0);
1259  	//   let n = n + i div (length(output) + 1), fail on overflow
1260 		n += i / (output.length + 1);
1261  	//   let i = i mod (length(output) + 1)
1262 		i %= (output.length + 1);
1263  	//   {if n is a basic code point then fail}
1264 		// (We aren't actually going to fail here; it's clear what this means.)
1265  	//   insert n into output at position i
1266 		(() @trusted { output.insertInPlace(i, cast(dchar)n); })();  // should be @safe but isn't marked
1267  	//   increment i
1268 		i++;
1269  	// end
1270 	}
1271 	return output.to!string;
1272 }
1273 
1274 // Lifted from punycode.js.
1275 private dchar digitToBasic(ulong digit) {
1276 	return cast(dchar)(digit + 22 + 75 * (digit < 26));
1277 }
1278 
1279 // Lifted from punycode.js.
1280 private uint basicToDigit(char c) {
1281 	auto codePoint = cast(uint)c;
1282 	if (codePoint - 48 < 10) {
1283 		return codePoint - 22;
1284 	}
1285 	if (codePoint - 65 < 26) {
1286 		return codePoint - 65;
1287 	}
1288 	if (codePoint - 97 < 26) {
1289 		return codePoint - 97;
1290 	}
1291 	return base;
1292 }
1293 
1294 unittest {
1295 	{
1296 		auto a = "b\u00FCcher";
1297 		assert(punyEncode(a) == "xn--bcher-kva");
1298 	}
1299 	{
1300 		auto a = "b\u00FCc\u00FCher";
1301 		assert(punyEncode(a) == "xn--bcher-kvab");
1302 	}
1303 	{
1304 		auto a = "ýbücher";
1305 		auto b = punyEncode(a);
1306 		assert(b == "xn--bcher-kvaf", b);
1307 	}
1308 
1309 	{
1310 		auto a = "mañana";
1311 		assert(punyEncode(a) == "xn--maana-pta");
1312 	}
1313 
1314 	{
1315 		auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
1316 			~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F";
1317 		auto b = punyEncode(a);
1318 		assert(b == "xn--egbpdaj6bu4bxfgehfvwxn", b);
1319 	}
1320 	import std.stdio;
1321 }
1322 
1323 unittest {
1324 	{
1325 		auto b = punyDecode("xn--egbpdaj6bu4bxfgehfvwxn");
1326 		assert(b == "ليهمابتكلموشعربي؟", b);
1327 	}
1328 	{
1329 		assert(punyDecode("xn--maana-pta") == "mañana");
1330 	}
1331 }
1332 
1333 unittest {
1334 	import std.string, std.algorithm, std.array, std.range;
1335 	{
1336 		auto domain = "xn--m3h.xn--n3h.com";
1337 		auto decodedDomain = domain.splitter(".").map!(punyDecode).join(".");
1338 		assert(decodedDomain == "☂.☃.com", decodedDomain);
1339 	}
1340 	{
1341 		auto domain = "☂.☃.com";
1342 		auto decodedDomain = domain.splitter(".").map!(punyEncode).join(".");
1343 		assert(decodedDomain == "xn--m3h.xn--n3h.com", decodedDomain);
1344 	}
1345 }