1 /**
2 	* A URL handling library.
3 	*
4 	* URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional
5 	* elements like port, path, username, and password.
6 	*
7 	* This module aims to make it simple to muck about with them.
8 	*
9 	* Example usage:
10 	* ---
11 	* auto url = "ssh://me:password@192.168.0.8/".parseURL;
12 	* auto files = system("ssh", url.toString, "ls").splitLines;
13 	* foreach (file; files) {
14 	*		auto fileURL = url;
15 	*		fileURL.path = file;
16 	*		system("scp", fileURL.toString, ".");
17 	* }
18 	* ---
19 	*/
20 module url;
21 
22 import std.algorithm;
23 import std.array;
24 import std.conv;
25 import std.encoding;
26 import std.string;
27 import std.utf;
28 
29 /// An exception thrown when something bad happens with URLs.
30 class URLException : Exception {
31 	this(string msg) { super(msg); }
32 }
33 
34 /**
35 	* A mapping from schemes to their default ports.
36 	*
37   * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to
38 	* use even if they use ports. Entries here should be treated as best guesses.
39   */
40 ushort[string] schemeToDefaultPort;
41 
42 static this() {
43 	schemeToDefaultPort = [
44 		"aaa": 3868,
45 		"aaas": 5658,
46 		"acap": 674,
47 		"cap": 1026,
48 		"coap": 5683,
49 		"coaps": 5684,
50 		"dav": 443,
51 		"dict": 2628,
52 		"ftp": 21,
53 		"git": 9418,
54 		"go": 1096,
55 		"gopher": 70,
56 		"http": 80,
57 		"https": 443,
58 		"iac": 4569,
59 		"icap": 1344,
60 		"imap": 143,
61 		"ipp": 631,
62 		"ipps": 631,  // yes, they're both mapped to port 631
63 		"irc": 6667,  // De facto default port, not the IANA reserved port.
64 		"ircs": 6697,
65 		"iris": 702,  // defaults to iris.beep
66 		"iris.beep": 702,
67 		"iris.lwz": 715,
68 		"iris.xpc": 713,
69 		"iris.xpcs": 714,
70 		"jabber": 5222,  // client-to-server
71 		"ldap": 389,
72 		"ldaps": 636,
73 		"msrp": 2855,
74 		"msrps": 2855,
75 		"mtqp": 1038,
76 		"mupdate": 3905,
77 		"news": 119,
78 		"nfs": 2049,
79 		"pop": 110,
80 		"redis": 6379,
81 		"reload": 6084,
82 		"rsync": 873,
83 		"rtmfp": 1935,
84 		"rtsp": 554,
85 		"shttp": 80,
86 		"sieve": 4190,
87 		"sip": 5060,
88 		"sips": 5061,
89 		"smb": 445,
90 		"smtp": 25,
91 		"snews": 563,
92 		"snmp": 161,
93 		"soap.beep": 605,
94 		"ssh": 22,
95 		"stun": 3478,
96 		"stuns": 5349,
97 		"svn": 3690,
98 		"teamspeak": 9987,
99 		"telnet": 23,
100 		"tftp": 69,
101 		"tip": 3372,
102 	];
103 }
104 
105 /**
106 	* A Unique Resource Locator.
107 	*
108 	* The syntax for URLs is scheme:[//[user:password@]host[:port]][/]path[?query][#fragment].
109 	* 
110 	*/
111 struct URL {
112 	/// The URL scheme. For instance, ssh, ftp, or https.
113 	string scheme;
114 
115 	/// The username in this URL. Usually absent. If present, there will also be a password.
116 	string user;
117 
118 	/// The password in this URL. Usually absent.
119 	string pass;
120 
121 	/// The hostname.
122 	string host;
123 
124 	/// The port.
125 	/// This is inferred from the scheme if it isn't present in the URL itself.
126 	/// If the scheme is not known and the port is not present, the port will be given as 0.
127 	/// For some schemes, port will not be sensible -- for instance, file or chrome-extension.
128 	///
129 	/// If you explicitly need to detect whether the user provided a port, check the providedPort
130 	/// field.
131 	@property ushort port() {
132 		if (providedPort != 0) {
133 			return providedPort;
134 		}
135 		if (auto p = scheme in schemeToDefaultPort) {
136 			return *p;
137 		}
138 		return 0;
139 	}
140 
141 	/// Set the port.
142 	/// This is a shortcut for convenience because you probably don't care about the difference
143 	/// between port and providedPort.
144 	@property ushort port(ushort value) {
145 		return providedPort = value;
146 	}
147 
148 	/// The port that was explicitly provided in the URL.
149 	/// 
150 	ushort providedPort;
151 
152 	/// The path. This excludes the query string.
153 	/// For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is
154 	/// "/news/story/17774".
155 	string path;
156 
157 	/// The query string elements.
158 	/// For instance, in the URL https://cnn.com/news/story/17774?visited=false, the query string
159 	/// elements will be ["visited": "false"].
160 	/// Similarly, in the URL https://bbc.co.uk/news?item, the query string elements will be
161 	/// ["item": ""].
162 	///
163 	/// This field is mutable. (There is no alternative in this case.) So be cautious.
164 	string[string] query;
165 
166 	/// The fragment. In web documents, this typically refers to an anchor element.
167 	/// For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2".
168 	string fragment;
169 
170 	/// Convert this URL to a string.
171 	/// The string is properly formatted and usable for, eg, a web request.
172 	string toString() {
173 		Appender!string s;
174 		s ~= scheme;
175 		s ~= "://";
176 		if (user) {
177 			s ~= user.percentEncode;
178 			s ~= ":";
179 			s ~= pass.percentEncode;
180 			s ~= "@";
181 		}
182 		s ~= host;
183 		if (providedPort) {
184 			if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) {
185 				s ~= ":";
186 				s ~= providedPort.to!string;
187 			}
188 		}
189 		string p = path;
190 		if (p.length == 0) {
191 			s ~= '/';
192 		} else {
193 			if (p[0] == '/') {
194 				p = p[1..$];
195 			}
196 			foreach (part; p.split('/')) {
197 				s ~= '/';
198 				s ~= part.percentEncode;
199 			}
200 		}
201 		if (query) {
202 			s ~= '?';
203 			bool first = true;
204 			foreach (k, v; query) {
205 				if (!first) {
206 					s ~= '&';
207 				}
208 				first = false;
209 				s ~= k.percentEncode;
210 				if (v) {
211 					s ~= '=';
212 					s ~= v.percentEncode;
213 				}
214 			}
215 		}
216 		if (fragment) {
217 			s ~= '#';
218 			s ~= fragment.percentEncode;
219 		}
220 		return s.data;
221 	}
222 
223 	/**
224 		* The append operator (~).
225 		*
226 		* The append operator for URLs returns a new URL with the given string appended as a path
227 		* element to the URL's path. It only adds new path elements (or sequences of path elements).
228 		*
229 		* Don't worry about path separators; whether you include them or not, it will just work.
230 		*
231 		* Query elements are copied.
232 		*
233 		* Examples:
234 		* ---
235 		* auto random = "http://testdata.org/random".parseURL;
236 		* auto randInt = random ~ "int";
237 		* writeln(randInt);  // prints "http://testdata.org/random/int"
238 		* ---
239 		*/
240 	URL opBinary(string op : "~")(string subsequentPath) {
241 		URL other = this;
242 		other ~= subsequentPath;
243 		if (query) {
244 			other.query = other.query.dup;
245 		}
246 		return other;
247 	}
248 
249 	/**
250 		* The append-in-place operator (~=).
251 		*
252 		* The append operator for URLs adds a path element to this URL. It only adds new path elements
253 		* (or sequences of path elements).
254 		*
255 		* Don't worry about path separators; whether you include them or not, it will just work.
256 		*
257 		* Examples:
258 		* ---
259 		* auto random = "http://testdata.org/random".parseURL;
260 		* random ~= "int";
261 		* writeln(random);  // prints "http://testdata.org/random/int"
262 		* ---
263 		*/
264 	URL opOpAssign(string op : "~")(string subsequentPath) {
265 		if (path.endsWith("/") || subsequentPath.startsWith("/")) {
266 			if (path.endsWith("/") && subsequentPath.startsWith("/")) {
267 				path ~= subsequentPath[1..$];
268 			} else {
269 				path ~= subsequentPath;
270 			}
271 		} else {
272 			path ~= '/';
273 			path ~= subsequentPath;
274 		}
275 		return this;
276 	}
277 }
278 
279 /**
280 	* Parse a URL from a string.
281 	*
282 	* This attempts to parse a wide range of URLs as people might actually type them. Some mistakes
283 	* may be made. However, any URL in a correct format will be parsed correctly.
284 	*
285 	* Punycode is not supported.
286 	*/
287 bool tryParseURL(string value, out URL url) {
288 	url = URL.init;
289 	// scheme:[//[user:password@]host[:port]][/]path[?query][#fragment]
290 	// Scheme is optional in common use. We infer 'http' if it's not given.
291 	auto i = value.indexOf("://");
292 	if (i > -1) {
293 		url.scheme = value[0..i];
294 		value = value[i+3 .. $];
295 	} else {
296 		url.scheme = "http";
297 	}
298 	// [user:password@]host[:port]][/]path[?query][#fragment
299 	i = value.indexOfAny([':', '/']);
300 	if (i == -1) {
301 		// Just a hostname.
302 		url.host = value;
303 		return true;
304 	}
305 
306 	if (value[i] == ':') {
307 		// This could be between username and password, or it could be between host and port.
308 		auto j = value.indexOfAny(['@', '/']);
309 		if (j > -1 && value[j] == '@') {
310 			try {
311 				url.user = value[0..i].percentDecode;
312 				url.pass = value[i+1 .. j].percentDecode;
313 			} catch (URLException) {
314 				return false;
315 			}
316 			value = value[j+1 .. $];
317 		}
318 	}
319 
320 	// It's trying to be a host/port, not a user/pass.
321 	i = value.indexOfAny([':', '/']);
322 	if (i == -1) {
323 		url.host = value;
324 		return true;
325 	}
326 	url.host = value[0..i];
327 	value = value[i .. $];
328 	if (value[0] == ':') {
329 		auto end = value.indexOf('/');
330 		if (end == -1) {
331 			end = value.length;
332 		}
333 		try {
334 			url.port = value[1 .. end].to!ushort;
335 		} catch (ConvException) {
336 			return false;
337 		}
338 		value = value[end .. $];
339 		if (value.length == 0) {
340 			return true;
341 		}
342 	}
343 
344 	i = value.indexOfAny("?#");
345 	if (i == -1) {
346 		url.path = value;
347 		return true;
348 	}
349 
350 	try {
351 		url.path = value[0..i].percentDecode;
352 	} catch (URLException) {
353 		return false;
354 	}
355 	auto c = value[i];
356 	value = value[i + 1 .. $];
357 	if (c == '?') {
358 		i = value.indexOf('#');
359 		string query;
360 		if (i < 0) {
361 			query = value;
362 			value = null;
363 		} else {
364 			query = value[0..i];
365 			value = value[i + 1 .. $];
366 		}
367 		auto queries = query.split('&');
368 		foreach (q; queries) {
369 			auto j = q.indexOf('=');
370 			try {
371 				if (j == -1) {
372 					url.query[q.percentDecode] = "";
373 				} else {
374 					url.query[q[0..j].percentDecode] = q[j + 1 .. $].percentDecode;
375 				}
376 			} catch (URLException) {
377 				return false;
378 			}
379 		}
380 	}
381 
382 	try {
383 		url.fragment = value.percentDecode;
384 	} catch (URLException) {
385 		return false;
386 	}
387 
388 	return true;
389 }
390 
391 ///
392 unittest {
393 	{
394 		// Basic.
395 		URL url;
396 		with (url) {
397 			scheme = "https";
398 			host = "example.org";
399 			path = "/foo/bar";
400 			query["hello"] = "world";
401 			query["gibe"] = "clay";
402 			fragment = "frag";
403 		}
404 		assert(
405 				// Not sure what order it'll come out in.
406 				url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
407 				url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
408 				url.toString);
409 	}
410 	{
411 		// Percent encoded.
412 		URL url;
413 		with (url) {
414 			scheme = "https";
415 			host = "example.org";
416 			path = "/f☃o";
417 			query["❄"] = "❀";
418 			query["["] = "]";
419 			fragment = "ş";
420 		}
421 		assert(
422 				// Not sure what order it'll come out in.
423 				url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" ||
424 				url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F",
425 				url.toString);
426 	}
427 	{
428 		// Port, user, pass.
429 		URL url;
430 		with (url) {
431 			scheme = "https";
432 			host = "example.org";
433 			user = "dhasenan";
434 			pass = "itsasecret";
435 			port = 17;
436 		}
437 		assert(
438 				url.toString == "https://dhasenan:itsasecret@example.org:17/",
439 				url.toString);
440 	}
441 	{
442 		// Query with no path.
443 		URL url;
444 		with (url) {
445 			scheme = "https";
446 			host = "example.org";
447 			query["hi"] = "bye";
448 		}
449 		assert(
450 				url.toString == "https://example.org/?hi=bye",
451 				url.toString);
452 	}
453 }
454 
455 ///
456 unittest {
457 	// There's an existing path.
458 	auto url = parseURL("http://example.org/foo");
459 	// No slash? Assume it needs a slash.
460 	assert((url ~ "bar").toString == "http://example.org/foo/bar");
461 	// With slash? Don't add another.
462 	assert((url ~ "/bar").toString == "http://example.org/foo/bar");
463 	url ~= "bar";
464 	assert(url.toString == "http://example.org/foo/bar");
465 
466 	// Path already ends with a slash; don't add another.
467 	url = parseURL("http://example.org/foo/");
468 	assert((url ~ "bar").toString == "http://example.org/foo/bar");
469 	// Still don't add one even if you're appending with a slash.
470 	assert((url ~ "/bar").toString == "http://example.org/foo/bar");
471 	url ~= "/bar";
472 	assert(url.toString == "http://example.org/foo/bar");
473 
474 	// No path.
475 	url = parseURL("http://example.org");
476 	assert((url ~ "bar").toString == "http://example.org/bar");
477 	assert((url ~ "/bar").toString == "http://example.org/bar");
478 	url ~= "bar";
479 	assert(url.toString == "http://example.org/bar");
480 
481 	// Path is just a slash.
482 	url = parseURL("http://example.org/");
483 	assert((url ~ "bar").toString == "http://example.org/bar");
484 	assert((url ~ "/bar").toString == "http://example.org/bar");
485 	url ~= "bar";
486 	assert(url.toString == "http://example.org/bar", url.toString);
487 }
488 
489 /**
490 	* Parse the input string as a URL.
491 	*
492 	* Throws:
493 	*   URLException if the string was in an incorrect format.
494 	*/
495 URL parseURL(string value) {
496 	URL url;
497 	if (tryParseURL(value, url)) {
498 		return url;
499 	}
500 	throw new URLException("failed to parse URL " ~ value);
501 }
502 
503 ///
504 unittest {
505 	{
506 		// Infer scheme
507 		auto u1 = parseURL("example.org");
508 		assert(u1.scheme == "http");
509 		assert(u1.host == "example.org");
510 		assert(u1.path == "");
511 		assert(u1.port == 80);
512 		assert(u1.providedPort == 0);
513 		assert(u1.fragment == "");
514 	}
515 	{
516 		// Simple host and scheme
517 		auto u1 = parseURL("https://example.org");
518 		assert(u1.scheme == "https");
519 		assert(u1.host == "example.org");
520 		assert(u1.path == "");
521 		assert(u1.port == 443);
522 		assert(u1.providedPort == 0);
523 	}
524 	{
525 		// With path
526 		auto u1 = parseURL("https://example.org/foo/bar");
527 		assert(u1.scheme == "https");
528 		assert(u1.host == "example.org");
529 		assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
530 		assert(u1.port == 443);
531 		assert(u1.providedPort == 0);
532 	}
533 	{
534 		// With explicit port
535 		auto u1 = parseURL("https://example.org:1021/foo/bar");
536 		assert(u1.scheme == "https");
537 		assert(u1.host == "example.org");
538 		assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
539 		assert(u1.port == 1021);
540 		assert(u1.providedPort == 1021);
541 	}
542 	{
543 		// With user
544 		auto u1 = parseURL("https://bob:secret@example.org/foo/bar");
545 		assert(u1.scheme == "https");
546 		assert(u1.host == "example.org");
547 		assert(u1.path == "/foo/bar");
548 		assert(u1.port == 443);
549 		assert(u1.user == "bob");
550 		assert(u1.pass == "secret");
551 	}
552 	{
553 		// With user, URL-encoded
554 		auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar");
555 		assert(u1.scheme == "https");
556 		assert(u1.host == "example.org");
557 		assert(u1.path == "/foo/bar");
558 		assert(u1.port == 443);
559 		assert(u1.user == "bob!");
560 		assert(u1.pass == "secret!?");
561 	}
562 	{
563 		// With user and port and path
564 		auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar");
565 		assert(u1.scheme == "https");
566 		assert(u1.host == "example.org");
567 		assert(u1.path == "/foo/bar");
568 		assert(u1.port == 2210);
569 		assert(u1.user == "bob");
570 		assert(u1.pass == "secret");
571 		assert(u1.fragment == "");
572 	}
573 	{
574 		// With query string
575 		auto u1 = parseURL("https://example.org/?login=true");
576 		assert(u1.scheme == "https");
577 		assert(u1.host == "example.org");
578 		assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
579 		assert(u1.query["login"] == "true");
580 		assert(u1.fragment == "");
581 	}
582 	{
583 		// With query string and fragment
584 		auto u1 = parseURL("https://example.org/?login=true#justkidding");
585 		assert(u1.scheme == "https");
586 		assert(u1.host == "example.org");
587 		assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
588 		assert(u1.query["login"] == "true");
589 		assert(u1.fragment == "justkidding");
590 	}
591 	{
592 		// With URL-encoded values
593 		auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E");
594 		assert(u1.scheme == "https");
595 		assert(u1.host == "example.org");
596 		assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path);
597 		assert(u1.query["❄"] == "=");
598 		assert(u1.fragment == "^");
599 	}
600 }
601 
602 unittest {
603 	assert(parseURL("http://example.org").port == 80);
604 	assert(parseURL("http://example.org:5326").port == 5326);
605 
606 	auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment");
607 	assert(url.scheme == "redis");
608 	assert(url.user == "admin");
609 	assert(url.pass == "password");
610 
611 	assert(parseURL("example.org").toString == "http://example.org/");
612 	assert(parseURL("http://example.org:80").toString == "http://example.org/");
613 
614 	assert(parseURL("localhost:8070").toString == "http://localhost:8070/");
615 }
616 
617 /**
618 	* Percent-encode a string.
619 	*
620 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
621 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
622 	* everything else, there is percent encoding.
623 	*/
624 string percentEncode(string raw) {
625 	// We *must* encode these characters: :/?#[]@!$&'()*+,;="
626 	// We *can* encode any other characters.
627 	// We *should not* encode alpha, numeric, or -._~.
628 	Appender!string app;
629 	foreach (dchar d; raw) {
630 		if (('a' <= d && 'z' >= d) ||
631 				('A' <= d && 'Z' >= d) ||
632 				('0' <= d && '9' >= d) ||
633 				d == '-' || d == '.' || d == '_' || d == '~') {
634 			app ~= d;
635 			continue;
636 		}
637 		// Something simple like a space character? Still in 7-bit ASCII?
638 		// Then we get a single-character string out of it and just encode
639 		// that one bit.
640 		// Something not in 7-bit ASCII? Then we percent-encode each octet
641 		// in the UTF-8 encoding (and hope the server understands UTF-8).
642 		char[] c;
643 		encode(c, d);
644 		auto bytes = cast(ubyte[])c;
645 		foreach (b; bytes) {
646 			app ~= format("%%%02X", b);
647 		}
648 	}
649 	return cast(string)app.data;
650 }
651 
652 ///
653 unittest {
654 	assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding");
655 	assert(percentEncode("~~--..__") == "~~--..__");
656 	assert(percentEncode("0123456789") == "0123456789");
657 
658 	string e;
659 
660 	e = percentEncode("☃");
661 	assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e);
662 }
663 
664 /**
665 	* Percent-decode a string.
666 	*
667 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
668 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
669 	* everything else, there is percent encoding.
670 	*
671 	* This explicitly ensures that the result is a valid UTF-8 string.
672 	*/
673 string percentDecode(string encoded) {
674 	ubyte[] raw = percentDecodeRaw(encoded);
675 	auto s = cast(string) raw;
676 	if (!s.isValid) {
677 		// TODO(dhasenan): 
678 		throw new URLException("input contains invalid UTF data");
679 	}
680 	return s;
681 }
682 
683 ///
684 unittest {
685 	assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding");
686 	assert(percentDecode("~~--..__") == "~~--..__");
687 	assert(percentDecode("0123456789") == "0123456789");
688 
689 	string e;
690 
691 	e = percentDecode("%E2%98%83");
692 	assert(e == "☃", "expected a snowman but got" ~ e);
693 }
694 
695 /**
696 	* Percent-decode a string into a ubyte array.
697 	*
698 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
699 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
700 	* everything else, there is percent encoding.
701 	*
702 	* This yields a ubyte array and will not perform validation on the output. However, an improperly
703 	* formatted input string will result in a URLException.
704 	*/
705 ubyte[] percentDecodeRaw(string encoded) {
706 	// We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now.
707 	Appender!(ubyte[]) app;
708 	for (int i = 0; i < encoded.length; i++) {
709 		if (encoded[i] != '%') {
710 			app ~= encoded[i];
711 			continue;
712 		}
713 		if (i >= encoded.length - 2) {
714 			throw new URLException("Invalid percent encoded value: expected two characters after " ~
715 					"percent symbol. Error at index " ~ i.to!string);
716 		}
717 		auto b = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 1]));
718 		auto c = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 2]));
719 		app ~= cast(ubyte)((b << 4) | c);
720 		i += 2;
721 	}
722 	return app.data;
723 }
724 
725 /++
726 string toAscii(string unicodeHostname) {
727 	bool mustEncode = false;
728 	foreach (i, dchar d; unicodeHostname) {
729 		auto c = cast(uint) d;
730 		if (c > 0x80) {
731 			mustEncode = true;
732 			break;
733 		}
734 		if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) {
735 			throw new URLException(
736 					format(
737 						"domain name '%s' contains illegal character '%s' at position %s",
738 						unicodeHostname, d, i));
739 		}
740 	}
741 	if (!mustEncode) {
742 		return unicodeHostname;
743 	}
744 	auto parts = unicodeHostname.split('.');
745 	char[] result;
746 	foreach (part; parts) {
747 		result ~= punyEncode(part);
748 	}
749 	return cast(string)result;
750 }
751 
752 string punyEncode(string item, string delimiter = null, string marker = null) {
753 	// Puny state machine initial variables.
754 	auto base = 36;
755 	auto tmin = 1;
756 	auto tmax = 26;
757 	auto skew = 38;
758 	auto damp = 700;
759 	auto initialBias = 72;
760 	long b = 0;
761 
762 	bool needToEncode = false;
763 	Appender!(char[]) app;
764 	app ~= marker;
765 	foreach (dchar d; item) {
766 		if (d > '~') {  // Max printable ASCII. The DEL char isn't allowed in hostnames.
767 			needToEncode = true;
768 		} else {
769 			app ~= d;
770 			b++;
771 		}
772 	}
773 	if (!needToEncode) {
774 		return item;
775 	}
776 	app ~= delimiter;
777 
778 	// The puny algorithm.
779 	// We use 64-bit arithmetic to avoid overflow issues -- unicode only defines up to 0x10FFFF,
780 	// and we won't be encoding gigabytes of data, but just to be safe.
781 	// Also we use signed values just to make things easier.
782 	long delta = 0;
783 	long bias = initialBias;
784 	long h = b;
785 	long lastIndex = 0;
786 
787 	dchar digitToBasic(ulong digit) {
788 		if (digit < 26) {
789 			return 'a' + cast(dchar)digit;
790 		}
791 		return cast(dchar)('0' + (digit - 26));
792 	}
793 
794 	ulong adapt(ulong delta, ulong numPoints, bool firstTime) {
795 		auto k = 0;
796 		delta = firstTime ? (delta / damp) : delta >> 1;
797 		delta += (delta / numPoints);
798 		for (; delta > (base - tmin) * tmax >> 1; k += base) {
799 			delta = (delta / (base - tmin));
800 		}
801 		return k + (base - tmin + 1) * delta / (delta + skew);
802 	}
803 
804 	auto f = filter!(x => x >= cast(dchar)128)(item).array;
805 	auto uniqueChars = uniq(std.algorithm.sorting.sort(f));
806 	foreach (dchar n; uniqueChars) {
807 		foreach (dchar c; item) {
808 			if (c < n) {
809 				delta++;
810 			} else if (c == n) {
811 				auto q = delta;
812 				for (ulong k = 0; k < cast(ulong)uint.max; k += base) {
813 					auto t = k <= bias ? tmin : (k >= bias + tmax ? tmax : k - bias);
814 					if (q < t) {
815 						break;
816 					}
817 					app ~= digitToBasic(t + ((q - t) % (base - t)));
818 					q = (q - t) / (base - t);
819 				}
820 				app ~= digitToBasic(q);
821 				bias = adapt(delta, h + 1, h == b);
822 				h++;
823 			}
824 		}
825 		delta++;
826 	}
827 	return cast(string)app.data;
828 }
829 
830 unittest {
831 	import std.stdio;
832 	auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
833 		~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F";
834 	writeln(a);
835 	writeln(punyEncode(a));
836 	assert(punyEncode(a) == "egbpdaj6bu4bxfgehfvwxn");
837 }
838 
839 struct URL {
840 	Host host;
841 }
842 ++/