url source code

1 /**
2 	* A URL handling library.
3 	*
4 	* URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional
5 	* elements like port, path, username, and password.
6 	*
7 	* This module aims to make it simple to muck about with them.
8 	*
9 	* Example usage:
10 	* ---
11 	* auto url = "ssh://me:password@192.168.0.8/".parseURL;
12 	* auto files = system("ssh", url.toString, "ls").splitLines;
13 	* foreach (file; files) {
14 	*		system("scp", url ~ file, ".");
15 	* }
16 	* ---
17 	*
18 	* License: The MIT license.
19 	*/
20 module url;
21 
22 import std.algorithm;
23 import std.array;
24 import std.conv;
25 import std.encoding;
26 import std.string;
27 import std.utf;
28 
29 /// An exception thrown when something bad happens with URLs.
30 class URLException : Exception {
31 	this(string msg) { super(msg); }
32 }
33 
34 /**
35 	* A mapping from schemes to their default ports.
36 	*
37   * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to
38 	* use even if they use ports. Entries here should be treated as best guesses.
39   */
40 ushort[string] schemeToDefaultPort;
41 
42 static this() {
43 	schemeToDefaultPort = [
44 		"aaa": 3868,
45 		"aaas": 5658,
46 		"acap": 674,
47 		"cap": 1026,
48 		"coap": 5683,
49 		"coaps": 5684,
50 		"dav": 443,
51 		"dict": 2628,
52 		"ftp": 21,
53 		"git": 9418,
54 		"go": 1096,
55 		"gopher": 70,
56 		"http": 80,
57 		"https": 443,
58 		"iac": 4569,
59 		"icap": 1344,
60 		"imap": 143,
61 		"ipp": 631,
62 		"ipps": 631,  // yes, they're both mapped to port 631
63 		"irc": 6667,  // De facto default port, not the IANA reserved port.
64 		"ircs": 6697,
65 		"iris": 702,  // defaults to iris.beep
66 		"iris.beep": 702,
67 		"iris.lwz": 715,
68 		"iris.xpc": 713,
69 		"iris.xpcs": 714,
70 		"jabber": 5222,  // client-to-server
71 		"ldap": 389,
72 		"ldaps": 636,
73 		"msrp": 2855,
74 		"msrps": 2855,
75 		"mtqp": 1038,
76 		"mupdate": 3905,
77 		"news": 119,
78 		"nfs": 2049,
79 		"pop": 110,
80 		"redis": 6379,
81 		"reload": 6084,
82 		"rsync": 873,
83 		"rtmfp": 1935,
84 		"rtsp": 554,
85 		"shttp": 80,
86 		"sieve": 4190,
87 		"sip": 5060,
88 		"sips": 5061,
89 		"smb": 445,
90 		"smtp": 25,
91 		"snews": 563,
92 		"snmp": 161,
93 		"soap.beep": 605,
94 		"ssh": 22,
95 		"stun": 3478,
96 		"stuns": 5349,
97 		"svn": 3690,
98 		"teamspeak": 9987,
99 		"telnet": 23,
100 		"tftp": 69,
101 		"tip": 3372,
102 	];
103 }
104 
105 /**
106 	* A Unique Resource Locator.
107 	* 
108 	* URLs can be parsed (see parseURL) and implicitly convert to strings.
109 	*/
110 struct URL {
111 	/// The URL scheme. For instance, ssh, ftp, or https.
112 	string scheme;
113 
114 	/// The username in this URL. Usually absent. If present, there will also be a password.
115 	string user;
116 
117 	/// The password in this URL. Usually absent.
118 	string pass;
119 
120 	/// The hostname.
121 	string host;
122 
123 	/**
124 	  * The port.
125 		*
126 	  * This is inferred from the scheme if it isn't present in the URL itself.
127 	  * If the scheme is not known and the port is not present, the port will be given as 0.
128 	  * For some schemes, port will not be sensible -- for instance, file or chrome-extension.
129 	  *
130 	  * If you explicitly need to detect whether the user provided a port, check the providedPort
131 	  * field.
132 	  */
133 	@property ushort port() {
134 		if (providedPort != 0) {
135 			return providedPort;
136 		}
137 		if (auto p = scheme in schemeToDefaultPort) {
138 			return *p;
139 		}
140 		return 0;
141 	}
142 
143 	/**
144 	  * Set the port.
145 		*
146 		* This sets the providedPort field and is provided for convenience.
147 		*/
148 	@property ushort port(ushort value) {
149 		return providedPort = value;
150 	}
151 
152 	/// The port that was explicitly provided in the URL.
153 	ushort providedPort;
154 
155 	/**
156 	  * The path.
157 	  *
158 	  * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is
159 	  * "/news/story/17774".
160 	  */
161 	string path;
162 
163 	/**
164 	  * The query string elements.
165 	  *
166 	  * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the query string
167 	  * elements will be ["visited": "false"].
168 	  *
169 	  * Similarly, in the URL https://bbc.co.uk/news?item, the query string elements will be
170 	  * ["item": ""].
171 	  *
172 	  * This field is mutable, so be cautious.
173 	  */
174 	string[string] query;
175 
176 	/**
177 	  * The fragment. In web documents, this typically refers to an anchor element.
178 	  * For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2".
179 	  */
180 	string fragment;
181 
182 	/**
183 	  * Convert this URL to a string.
184 	  * The string is properly formatted and usable for, eg, a web request.
185 	  */
186 	string toString() {
187 		Appender!string s;
188 		s ~= scheme;
189 		s ~= "://";
190 		if (user) {
191 			s ~= user.percentEncode;
192 			s ~= ":";
193 			s ~= pass.percentEncode;
194 			s ~= "@";
195 		}
196 		s ~= host;
197 		if (providedPort) {
198 			if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) {
199 				s ~= ":";
200 				s ~= providedPort.to!string;
201 			}
202 		}
203 		string p = path;
204 		if (p.length == 0 || p == "/") {
205 			s ~= '/';
206 		} else {
207 			if (p[0] == '/') {
208 				p = p[1..$];
209 			}
210 			foreach (part; p.split('/')) {
211 				s ~= '/';
212 				s ~= part.percentEncode;
213 			}
214 		}
215 		if (query) {
216 			s ~= '?';
217 			bool first = true;
218 			foreach (k, v; query) {
219 				if (!first) {
220 					s ~= '&';
221 				}
222 				first = false;
223 				s ~= k.percentEncode;
224 				if (v) {
225 					s ~= '=';
226 					s ~= v.percentEncode;
227 				}
228 			}
229 		}
230 		if (fragment) {
231 			s ~= '#';
232 			s ~= fragment.percentEncode;
233 		}
234 		return s.data;
235 	}
236 
237 	/// Implicitly convert URLs to strings.
238 	alias toString this;
239 
240 	/**
241 		* The append operator (~).
242 		*
243 		* The append operator for URLs returns a new URL with the given string appended as a path
244 		* element to the URL's path. It only adds new path elements (or sequences of path elements).
245 		*
246 		* Don't worry about path separators; whether you include them or not, it will just work.
247 		*
248 		* Query elements are copied.
249 		*
250 		* Examples:
251 		* ---
252 		* auto random = "http://testdata.org/random".parseURL;
253 		* auto randInt = random ~ "int";
254 		* writeln(randInt);  // prints "http://testdata.org/random/int"
255 		* ---
256 		*/
257 	URL opBinary(string op : "~")(string subsequentPath) {
258 		URL other = this;
259 		other ~= subsequentPath;
260 		if (query) {
261 			other.query = other.query.dup;
262 		}
263 		return other;
264 	}
265 
266 	/**
267 		* The append-in-place operator (~=).
268 		*
269 		* The append operator for URLs adds a path element to this URL. It only adds new path elements
270 		* (or sequences of path elements).
271 		*
272 		* Don't worry about path separators; whether you include them or not, it will just work.
273 		*
274 		* Examples:
275 		* ---
276 		* auto random = "http://testdata.org/random".parseURL;
277 		* random ~= "int";
278 		* writeln(random);  // prints "http://testdata.org/random/int"
279 		* ---
280 		*/
281 	URL opOpAssign(string op : "~")(string subsequentPath) {
282 		if (path.endsWith("/") || subsequentPath.startsWith("/")) {
283 			if (path.endsWith("/") && subsequentPath.startsWith("/")) {
284 				path ~= subsequentPath[1..$];
285 			} else {
286 				path ~= subsequentPath;
287 			}
288 		} else {
289 			path ~= '/';
290 			path ~= subsequentPath;
291 		}
292 		return this;
293 	}
294 }
295 
296 /**
297 	* Parse a URL from a string.
298 	*
299 	* This attempts to parse a wide range of URLs as people might actually type them. Some mistakes
300 	* may be made. However, any URL in a correct format will be parsed correctly.
301 	*
302 	* Punycode is not supported.
303 	*/
304 bool tryParseURL(string value, out URL url) {
305 	url = URL.init;
306 	// scheme:[//[user:password@]host[:port]][/]path[?query][#fragment]
307 	// Scheme is optional in common use. We infer 'http' if it's not given.
308 	auto i = value.indexOf("://");
309 	if (i > -1) {
310 		url.scheme = value[0..i];
311 		value = value[i+3 .. $];
312 	} else {
313 		url.scheme = "http";
314 	}
315 	// [user:password@]host[:port]][/]path[?query][#fragment
316 	i = value.indexOfAny([':', '/']);
317 	if (i == -1) {
318 		// Just a hostname.
319 		url.host = value;
320 		return true;
321 	}
322 
323 	if (value[i] == ':') {
324 		// This could be between username and password, or it could be between host and port.
325 		auto j = value.indexOfAny(['@', '/']);
326 		if (j > -1 && value[j] == '@') {
327 			try {
328 				url.user = value[0..i].percentDecode;
329 				url.pass = value[i+1 .. j].percentDecode;
330 			} catch (URLException) {
331 				return false;
332 			}
333 			value = value[j+1 .. $];
334 		}
335 	}
336 
337 	// It's trying to be a host/port, not a user/pass.
338 	i = value.indexOfAny([':', '/']);
339 	if (i == -1) {
340 		url.host = value;
341 		return true;
342 	}
343 	url.host = value[0..i];
344 	value = value[i .. $];
345 	if (value[0] == ':') {
346 		auto end = value.indexOf('/');
347 		if (end == -1) {
348 			end = value.length;
349 		}
350 		try {
351 			url.port = value[1 .. end].to!ushort;
352 		} catch (ConvException) {
353 			return false;
354 		}
355 		value = value[end .. $];
356 		if (value.length == 0) {
357 			return true;
358 		}
359 	}
360 
361 	i = value.indexOfAny("?#");
362 	if (i == -1) {
363 		url.path = value;
364 		return true;
365 	}
366 
367 	try {
368 		url.path = value[0..i].percentDecode;
369 	} catch (URLException) {
370 		return false;
371 	}
372 	auto c = value[i];
373 	value = value[i + 1 .. $];
374 	if (c == '?') {
375 		i = value.indexOf('#');
376 		string query;
377 		if (i < 0) {
378 			query = value;
379 			value = null;
380 		} else {
381 			query = value[0..i];
382 			value = value[i + 1 .. $];
383 		}
384 		auto queries = query.split('&');
385 		foreach (q; queries) {
386 			auto j = q.indexOf('=');
387 			try {
388 				if (j == -1) {
389 					url.query[q.percentDecode] = "";
390 				} else {
391 					url.query[q[0..j].percentDecode] = q[j + 1 .. $].percentDecode;
392 				}
393 			} catch (URLException) {
394 				return false;
395 			}
396 		}
397 	}
398 
399 	try {
400 		url.fragment = value.percentDecode;
401 	} catch (URLException) {
402 		return false;
403 	}
404 
405 	return true;
406 }
407 
408 ///
409 unittest {
410 	{
411 		// Basic.
412 		URL url;
413 		with (url) {
414 			scheme = "https";
415 			host = "example.org";
416 			path = "/foo/bar";
417 			query["hello"] = "world";
418 			query["gibe"] = "clay";
419 			fragment = "frag";
420 		}
421 		assert(
422 				// Not sure what order it'll come out in.
423 				url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
424 				url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
425 				url.toString);
426 	}
427 	{
428 		// Percent encoded.
429 		URL url;
430 		with (url) {
431 			scheme = "https";
432 			host = "example.org";
433 			path = "/f☃o";
434 			query["❄"] = "❀";
435 			query["["] = "]";
436 			fragment = "ş";
437 		}
438 		assert(
439 				// Not sure what order it'll come out in.
440 				url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" ||
441 				url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F",
442 				url.toString);
443 	}
444 	{
445 		// Port, user, pass.
446 		URL url;
447 		with (url) {
448 			scheme = "https";
449 			host = "example.org";
450 			user = "dhasenan";
451 			pass = "itsasecret";
452 			port = 17;
453 		}
454 		assert(
455 				url.toString == "https://dhasenan:itsasecret@example.org:17/",
456 				url.toString);
457 	}
458 	{
459 		// Query with no path.
460 		URL url;
461 		with (url) {
462 			scheme = "https";
463 			host = "example.org";
464 			query["hi"] = "bye";
465 		}
466 		assert(
467 				url.toString == "https://example.org/?hi=bye",
468 				url.toString);
469 	}
470 }
471 
472 ///
473 unittest {
474 	// There's an existing path.
475 	auto url = parseURL("http://example.org/foo");
476 	// No slash? Assume it needs a slash.
477 	assert((url ~ "bar").toString == "http://example.org/foo/bar");
478 	// With slash? Don't add another.
479 	assert((url ~ "/bar").toString == "http://example.org/foo/bar");
480 	url ~= "bar";
481 	assert(url.toString == "http://example.org/foo/bar");
482 
483 	// Path already ends with a slash; don't add another.
484 	url = parseURL("http://example.org/foo/");
485 	assert((url ~ "bar").toString == "http://example.org/foo/bar");
486 	// Still don't add one even if you're appending with a slash.
487 	assert((url ~ "/bar").toString == "http://example.org/foo/bar");
488 	url ~= "/bar";
489 	assert(url.toString == "http://example.org/foo/bar");
490 
491 	// No path.
492 	url = parseURL("http://example.org");
493 	assert((url ~ "bar").toString == "http://example.org/bar");
494 	assert((url ~ "/bar").toString == "http://example.org/bar");
495 	url ~= "bar";
496 	assert(url.toString == "http://example.org/bar");
497 
498 	// Path is just a slash.
499 	url = parseURL("http://example.org/");
500 	assert((url ~ "bar").toString == "http://example.org/bar");
501 	assert((url ~ "/bar").toString == "http://example.org/bar");
502 	url ~= "bar";
503 	assert(url.toString == "http://example.org/bar", url.toString);
504 
505 	// No path, just fragment.
506 	url = "ircs://irc.freenode.com/#d".parseURL;
507 	assert(url.toString == "ircs://irc.freenode.com/#d", url.toString);
508 }
509 
510 unittest {
511 	import std.net.curl;
512 	auto url = "http://example.org".parseURL;
513 	assert(is(typeof(std.net.curl.get(url))));
514 }
515 
516 /**
517 	* Parse the input string as a URL.
518 	*
519 	* Throws:
520 	*   URLException if the string was in an incorrect format.
521 	*/
522 URL parseURL(string value) {
523 	URL url;
524 	if (tryParseURL(value, url)) {
525 		return url;
526 	}
527 	throw new URLException("failed to parse URL " ~ value);
528 }
529 
530 ///
531 unittest {
532 	{
533 		// Infer scheme
534 		auto u1 = parseURL("example.org");
535 		assert(u1.scheme == "http");
536 		assert(u1.host == "example.org");
537 		assert(u1.path == "");
538 		assert(u1.port == 80);
539 		assert(u1.providedPort == 0);
540 		assert(u1.fragment == "");
541 	}
542 	{
543 		// Simple host and scheme
544 		auto u1 = parseURL("https://example.org");
545 		assert(u1.scheme == "https");
546 		assert(u1.host == "example.org");
547 		assert(u1.path == "");
548 		assert(u1.port == 443);
549 		assert(u1.providedPort == 0);
550 	}
551 	{
552 		// With path
553 		auto u1 = parseURL("https://example.org/foo/bar");
554 		assert(u1.scheme == "https");
555 		assert(u1.host == "example.org");
556 		assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
557 		assert(u1.port == 443);
558 		assert(u1.providedPort == 0);
559 	}
560 	{
561 		// With explicit port
562 		auto u1 = parseURL("https://example.org:1021/foo/bar");
563 		assert(u1.scheme == "https");
564 		assert(u1.host == "example.org");
565 		assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
566 		assert(u1.port == 1021);
567 		assert(u1.providedPort == 1021);
568 	}
569 	{
570 		// With user
571 		auto u1 = parseURL("https://bob:secret@example.org/foo/bar");
572 		assert(u1.scheme == "https");
573 		assert(u1.host == "example.org");
574 		assert(u1.path == "/foo/bar");
575 		assert(u1.port == 443);
576 		assert(u1.user == "bob");
577 		assert(u1.pass == "secret");
578 	}
579 	{
580 		// With user, URL-encoded
581 		auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar");
582 		assert(u1.scheme == "https");
583 		assert(u1.host == "example.org");
584 		assert(u1.path == "/foo/bar");
585 		assert(u1.port == 443);
586 		assert(u1.user == "bob!");
587 		assert(u1.pass == "secret!?");
588 	}
589 	{
590 		// With user and port and path
591 		auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar");
592 		assert(u1.scheme == "https");
593 		assert(u1.host == "example.org");
594 		assert(u1.path == "/foo/bar");
595 		assert(u1.port == 2210);
596 		assert(u1.user == "bob");
597 		assert(u1.pass == "secret");
598 		assert(u1.fragment == "");
599 	}
600 	{
601 		// With query string
602 		auto u1 = parseURL("https://example.org/?login=true");
603 		assert(u1.scheme == "https");
604 		assert(u1.host == "example.org");
605 		assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
606 		assert(u1.query["login"] == "true");
607 		assert(u1.fragment == "");
608 	}
609 	{
610 		// With query string and fragment
611 		auto u1 = parseURL("https://example.org/?login=true#justkidding");
612 		assert(u1.scheme == "https");
613 		assert(u1.host == "example.org");
614 		assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
615 		assert(u1.query["login"] == "true");
616 		assert(u1.fragment == "justkidding");
617 	}
618 	{
619 		// With URL-encoded values
620 		auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E");
621 		assert(u1.scheme == "https");
622 		assert(u1.host == "example.org");
623 		assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path);
624 		assert(u1.query["❄"] == "=");
625 		assert(u1.fragment == "^");
626 	}
627 }
628 
629 unittest {
630 	assert(parseURL("http://example.org").port == 80);
631 	assert(parseURL("http://example.org:5326").port == 5326);
632 
633 	auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment");
634 	assert(url.scheme == "redis");
635 	assert(url.user == "admin");
636 	assert(url.pass == "password");
637 
638 	assert(parseURL("example.org").toString == "http://example.org/");
639 	assert(parseURL("http://example.org:80").toString == "http://example.org/");
640 
641 	assert(parseURL("localhost:8070").toString == "http://localhost:8070/");
642 }
643 
644 /**
645 	* Percent-encode a string.
646 	*
647 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
648 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
649 	* everything else, there is percent encoding.
650 	*/
651 string percentEncode(string raw) {
652 	// We *must* encode these characters: :/?#[]@!$&'()*+,;="
653 	// We *can* encode any other characters.
654 	// We *should not* encode alpha, numeric, or -._~.
655 	Appender!string app;
656 	foreach (dchar d; raw) {
657 		if (('a' <= d && 'z' >= d) ||
658 				('A' <= d && 'Z' >= d) ||
659 				('0' <= d && '9' >= d) ||
660 				d == '-' || d == '.' || d == '_' || d == '~') {
661 			app ~= d;
662 			continue;
663 		}
664 		// Something simple like a space character? Still in 7-bit ASCII?
665 		// Then we get a single-character string out of it and just encode
666 		// that one bit.
667 		// Something not in 7-bit ASCII? Then we percent-encode each octet
668 		// in the UTF-8 encoding (and hope the server understands UTF-8).
669 		char[] c;
670 		encode(c, d);
671 		auto bytes = cast(ubyte[])c;
672 		foreach (b; bytes) {
673 			app ~= format("%%%02X", b);
674 		}
675 	}
676 	return cast(string)app.data;
677 }
678 
679 ///
680 unittest {
681 	assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding");
682 	assert(percentEncode("~~--..__") == "~~--..__");
683 	assert(percentEncode("0123456789") == "0123456789");
684 
685 	string e;
686 
687 	e = percentEncode("☃");
688 	assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e);
689 }
690 
691 /**
692 	* Percent-decode a string.
693 	*
694 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
695 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
696 	* everything else, there is percent encoding.
697 	*
698 	* This explicitly ensures that the result is a valid UTF-8 string.
699 	*/
700 string percentDecode(string encoded) {
701 	ubyte[] raw = percentDecodeRaw(encoded);
702 	auto s = cast(string) raw;
703 	if (!s.isValid) {
704 		// TODO(dhasenan): 
705 		throw new URLException("input contains invalid UTF data");
706 	}
707 	return s;
708 }
709 
710 ///
711 unittest {
712 	assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding");
713 	assert(percentDecode("~~--..__") == "~~--..__");
714 	assert(percentDecode("0123456789") == "0123456789");
715 
716 	string e;
717 
718 	e = percentDecode("%E2%98%83");
719 	assert(e == "☃", "expected a snowman but got" ~ e);
720 }
721 
722 /**
723 	* Percent-decode a string into a ubyte array.
724 	*
725 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
726 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
727 	* everything else, there is percent encoding.
728 	*
729 	* This yields a ubyte array and will not perform validation on the output. However, an improperly
730 	* formatted input string will result in a URLException.
731 	*/
732 ubyte[] percentDecodeRaw(string encoded) {
733 	// We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now.
734 	Appender!(ubyte[]) app;
735 	for (int i = 0; i < encoded.length; i++) {
736 		if (encoded[i] != '%') {
737 			app ~= encoded[i];
738 			continue;
739 		}
740 		if (i >= encoded.length - 2) {
741 			throw new URLException("Invalid percent encoded value: expected two characters after " ~
742 					"percent symbol. Error at index " ~ i.to!string);
743 		}
744 		auto b = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 1]));
745 		auto c = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 2]));
746 		app ~= cast(ubyte)((b << 4) | c);
747 		i += 2;
748 	}
749 	return app.data;
750 }
751 
752 /++
753 string toAscii(string unicodeHostname) {
754 	bool mustEncode = false;
755 	foreach (i, dchar d; unicodeHostname) {
756 		auto c = cast(uint) d;
757 		if (c > 0x80) {
758 			mustEncode = true;
759 			break;
760 		}
761 		if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) {
762 			throw new URLException(
763 					format(
764 						"domain name '%s' contains illegal character '%s' at position %s",
765 						unicodeHostname, d, i));
766 		}
767 	}
768 	if (!mustEncode) {
769 		return unicodeHostname;
770 	}
771 	auto parts = unicodeHostname.split('.');
772 	char[] result;
773 	foreach (part; parts) {
774 		result ~= punyEncode(part);
775 	}
776 	return cast(string)result;
777 }
778 
779 string punyEncode(string item, string delimiter = null, string marker = null) {
780 	// Puny state machine initial variables.
781 	auto base = 36;
782 	auto tmin = 1;
783 	auto tmax = 26;
784 	auto skew = 38;
785 	auto damp = 700;
786 	auto initialBias = 72;
787 	long b = 0;
788 
789 	bool needToEncode = false;
790 	Appender!(char[]) app;
791 	app ~= marker;
792 	foreach (dchar d; item) {
793 		if (d > '~') {  // Max printable ASCII. The DEL char isn't allowed in hostnames.
794 			needToEncode = true;
795 		} else {
796 			app ~= d;
797 			b++;
798 		}
799 	}
800 	if (!needToEncode) {
801 		return item;
802 	}
803 	app ~= delimiter;
804 
805 	// The puny algorithm.
806 	// We use 64-bit arithmetic to avoid overflow issues -- unicode only defines up to 0x10FFFF,
807 	// and we won't be encoding gigabytes of data, but just to be safe.
808 	// Also we use signed values just to make things easier.
809 	long delta = 0;
810 	long bias = initialBias;
811 	long h = b;
812 	long lastIndex = 0;
813 
814 	dchar digitToBasic(ulong digit) {
815 		if (digit < 26) {
816 			return 'a' + cast(dchar)digit;
817 		}
818 		return cast(dchar)('0' + (digit - 26));
819 	}
820 
821 	ulong adapt(ulong delta, ulong numPoints, bool firstTime) {
822 		auto k = 0;
823 		delta = firstTime ? (delta / damp) : delta >> 1;
824 		delta += (delta / numPoints);
825 		for (; delta > (base - tmin) * tmax >> 1; k += base) {
826 			delta = (delta / (base - tmin));
827 		}
828 		return k + (base - tmin + 1) * delta / (delta + skew);
829 	}
830 
831 	auto f = filter!(x => x >= cast(dchar)128)(item).array;
832 	auto uniqueChars = uniq(std.algorithm.sorting.sort(f));
833 	foreach (dchar n; uniqueChars) {
834 		foreach (dchar c; item) {
835 			if (c < n) {
836 				delta++;
837 			} else if (c == n) {
838 				auto q = delta;
839 				for (ulong k = 0; k < cast(ulong)uint.max; k += base) {
840 					auto t = k <= bias ? tmin : (k >= bias + tmax ? tmax : k - bias);
841 					if (q < t) {
842 						break;
843 					}
844 					app ~= digitToBasic(t + ((q - t) % (base - t)));
845 					q = (q - t) / (base - t);
846 				}
847 				app ~= digitToBasic(q);
848 				bias = adapt(delta, h + 1, h == b);
849 				h++;
850 			}
851 		}
852 		delta++;
853 	}
854 	return cast(string)app.data;
855 }
856 
857 unittest {
858 	import std.stdio;
859 	auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
860 		~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F";
861 	writeln(a);
862 	writeln(punyEncode(a));
863 	assert(punyEncode(a) == "egbpdaj6bu4bxfgehfvwxn");
864 }
865 
866 struct URL {
867 	Host host;
868 }
869 ++/