url source code

1 /**
2 	* A URL handling library.
3 	*
4 	* URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional
5 	* elements like port, path, username, and password.
6 	*
7 	* This module aims to make it simple to muck about with them.
8 	*
9 	* Example usage:
10 	* ---
11 	* auto url = "ssh://me:password@192.168.0.8/".parseURL;
12 	* auto files = system("ssh", url.toString, "ls").splitLines;
13 	* foreach (file; files) {
14 	*		system("scp", url ~ file, ".");
15 	* }
16 	* ---
17 	*
18 	* License: The MIT license.
19 	*/
20 module url;
21 
22 import std.conv;
23 import std.string;
24 
25 pure:
26 @safe:
27 
28 /// An exception thrown when something bad happens with URLs.
29 class URLException : Exception
30 {
31 	this(string msg) pure { super(msg); }
32 }
33 
34 /**
35 	* A mapping from schemes to their default ports.
36 	*
37   * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to
38 	* use even if they use ports. Entries here should be treated as best guesses.
39   */
40 enum ushort[string] schemeToDefaultPort = [
41     "aaa": 3868,
42     "aaas": 5658,
43     "acap": 674,
44     "amqp": 5672,
45     "cap": 1026,
46     "coap": 5683,
47     "coaps": 5684,
48     "dav": 443,
49     "dict": 2628,
50     "ftp": 21,
51     "git": 9418,
52     "go": 1096,
53     "gopher": 70,
54     "http": 80,
55     "https": 443,
56     "ws": 80,
57     "wss": 443,
58     "iac": 4569,
59     "icap": 1344,
60     "imap": 143,
61     "ipp": 631,
62     "ipps": 631,  // yes, they're both mapped to port 631
63     "irc": 6667,  // De facto default port, not the IANA reserved port.
64     "ircs": 6697,
65     "iris": 702,  // defaults to iris.beep
66     "iris.beep": 702,
67     "iris.lwz": 715,
68     "iris.xpc": 713,
69     "iris.xpcs": 714,
70     "jabber": 5222,  // client-to-server
71     "ldap": 389,
72     "ldaps": 636,
73     "msrp": 2855,
74     "msrps": 2855,
75     "mtqp": 1038,
76     "mupdate": 3905,
77     "news": 119,
78     "nfs": 2049,
79     "pop": 110,
80     "redis": 6379,
81     "reload": 6084,
82     "rsync": 873,
83     "rtmfp": 1935,
84     "rtsp": 554,
85     "shttp": 80,
86     "sieve": 4190,
87     "sip": 5060,
88     "sips": 5061,
89     "smb": 445,
90     "smtp": 25,
91     "snews": 563,
92     "snmp": 161,
93     "soap.beep": 605,
94     "ssh": 22,
95     "stun": 3478,
96     "stuns": 5349,
97     "svn": 3690,
98     "teamspeak": 9987,
99     "telnet": 23,
100     "tftp": 69,
101     "tip": 3372,
102 ];
103 
104 /**
105 	* A collection of query parameters.
106 	*
107 	* This is effectively a multimap of string -> strings.
108 	*/
109 struct QueryParams
110 {
111     hash_t toHash() const nothrow @safe
112     {
113         return typeid(params).getHash(&params);
114     }
115 
116 pure:
117     import std.typecons;
118     alias Tuple!(string, "key", string, "value") Param;
119     Param[] params;
120 
121     @property size_t length() const {
122         return params.length;
123     }
124 
125     /// Get a range over the query parameter values for the given key.
126     auto opIndex(string key) const
127     {
128         import std.algorithm.searching : find;
129         import std.algorithm.iteration : map;
130         return params.find!(x => x.key == key).map!(x => x.value);
131     }
132 
133     /// Add a query parameter with the given key and value.
134     /// If one already exists, there will now be two query parameters with the given name.
135     void add(string key, string value) {
136         params ~= Param(key, value);
137     }
138 
139     /// Add a query parameter with the given key and value.
140     /// If there are any existing parameters with the same key, they are removed and overwritten.
141     void overwrite(string key, string value) {
142         for (int i = 0; i < params.length; i++) {
143             if (params[i].key == key) {
144                 params[i] = params[$-1];
145                 params.length--;
146             }
147         }
148         params ~= Param(key, value);
149     }
150 
151     private struct QueryParamRange
152     {
153 pure:
154         size_t i;
155         const(Param)[] params;
156         bool empty() { return i >= params.length; }
157         void popFront() { i++; }
158         Param front() { return params[i]; }
159     }
160 
161     /**
162      * A range over the query parameters.
163      *
164      * Usage:
165      * ---
166      * foreach (key, value; url.queryParams) {}
167      * ---
168      */
169     auto range() const
170     {
171         return QueryParamRange(0, this.params);
172     }
173     /// ditto
174     alias range this;
175 
176     /// Convert this set of query parameters into a query string.
177     string toString() const {
178         import std.array : Appender;
179         Appender!string s;
180         bool first = true;
181         foreach (tuple; this) {
182             if (!first) {
183                 s ~= '&';
184             }
185             first = false;
186             s ~= tuple.key.percentEncode;
187             if (tuple.value.length > 0) {
188                 s ~= '=';
189                 s ~= tuple.value.percentEncode;
190             }
191         }
192         return s.data;
193     }
194 
195     /// Clone this set of query parameters.
196     QueryParams dup()
197     {
198         QueryParams other = this;
199         other.params = params.dup;
200         return other;
201     }
202 
203     int opCmp(const ref QueryParams other) const
204     {
205         for (int i = 0; i < params.length && i < other.params.length; i++)
206         {
207             auto c = cmp(params[i].key, other.params[i].key);
208             if (c != 0) return c;
209             c = cmp(params[i].value, other.params[i].value);
210             if (c != 0) return c;
211         }
212         if (params.length > other.params.length) return 1;
213         if (params.length < other.params.length) return -1;
214         return 0;
215     }
216 }
217 
218 /**
219 	* A Unique Resource Locator.
220 	*
221 	* URLs can be parsed (see parseURL) and implicitly convert to strings.
222 	*/
223 struct URL
224 {
225     hash_t toHash() const @safe nothrow
226     {
227         return asTuple().toHash();
228     }
229 
230 pure:
231 	/// The URL scheme. For instance, ssh, ftp, or https.
232 	string scheme;
233 
234 	/// The username in this URL. Usually absent. If present, there will also be a password.
235 	string user;
236 
237 	/// The password in this URL. Usually absent.
238 	string pass;
239 
240 	/// The hostname.
241 	string host;
242 
243 	/**
244 	  * The port.
245 		*
246 	  * This is inferred from the scheme if it isn't present in the URL itself.
247 	  * If the scheme is not known and the port is not present, the port will be given as 0.
248 	  * For some schemes, port will not be sensible -- for instance, file or chrome-extension.
249 	  *
250 	  * If you explicitly need to detect whether the user provided a port, check the providedPort
251 	  * field.
252 	  */
253 	@property ushort port() const nothrow
254     {
255 		if (providedPort != 0) {
256 			return providedPort;
257 		}
258 		if (auto p = scheme in schemeToDefaultPort) {
259 			return *p;
260 		}
261 		return 0;
262 	}
263 
264 	/**
265 	  * Set the port.
266 		*
267 		* This sets the providedPort field and is provided for convenience.
268 		*/
269 	@property ushort port(ushort value) nothrow
270     {
271 		return providedPort = value;
272 	}
273 
274 	/// The port that was explicitly provided in the URL.
275 	ushort providedPort;
276 
277 	/**
278 	  * The path.
279 	  *
280 	  * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is
281 	  * "/news/story/17774".
282 	  */
283 	string path;
284 
285 	/**
286 		* The query parameters associated with this URL.
287 		*/
288 	QueryParams queryParams;
289 
290 	/**
291 	  * The fragment. In web documents, this typically refers to an anchor element.
292 	  * For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2".
293 	  */
294 	string fragment;
295 
296 	/**
297 	  * Convert this URL to a string.
298 	  * The string is properly formatted and usable for, eg, a web request.
299 	  */
300 	string toString() const
301     {
302 		return toString(false);
303 	}
304 
305 	/**
306 		* Convert this URL to a string.
307         *
308 		* The string is intended to be human-readable rather than machine-readable.
309 		*/
310 	string toHumanReadableString() const
311     {
312 		return toString(true);
313 	}
314 
315     ///
316     unittest
317     {
318         auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL;
319         assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye", url.toString);
320         assert(url.toHumanReadableString == "https://☂.☃.org/?hi=bye", url.toString);
321     }
322 
323     unittest
324     {
325         assert("http://example.org/some_path".parseURL.toHumanReadableString ==
326                 "http://example.org/some_path");
327     }
328 
329     /**
330       * Convert the path and query string of this URL to a string.
331       */
332     string toPathAndQueryString() const
333     {
334         if (queryParams.length > 0)
335         {
336             return path ~ '?' ~ queryParams.toString;
337         }
338         return path;
339     }
340 
341     ///
342     unittest
343     {
344         auto u = "http://example.org/index?page=12".parseURL;
345         auto pathAndQuery = u.toPathAndQueryString();
346         assert(pathAndQuery == "/index?page=12", pathAndQuery);
347     }
348 
349 	private string toString(bool humanReadable) const
350     {
351         import std.array : Appender;
352         Appender!string s;
353         s ~= scheme;
354         s ~= "://";
355         if (user) {
356             s ~= humanReadable ? user : user.percentEncode;
357             s ~= ":";
358             s ~= humanReadable ? pass : pass.percentEncode;
359             s ~= "@";
360         }
361         s ~= humanReadable ? host : host.toPuny;
362         if (providedPort) {
363             if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) {
364                 s ~= ":";
365                 s ~= providedPort.to!string;
366             }
367         }
368         string p = path;
369         if (p.length == 0 || p == "/") {
370             s ~= '/';
371         } else {
372             if (humanReadable) {
373                 s ~= p;
374             } else {
375                 if (p[0] == '/') {
376                     p = p[1..$];
377                 }
378                 foreach (part; p.split('/')) {
379                     s ~= '/';
380                     s ~= part.percentEncode;
381                 }
382             }
383         }
384         if (queryParams.length) {
385             s ~= '?';
386             s ~= queryParams.toString;
387         }		if (fragment) {
388             s ~= '#';
389             s ~= fragment.percentEncode;
390         }
391         return s.data;
392 	}
393 
394 	/// Implicitly convert URLs to strings.
395 	alias toString this;
396 
397     /**
398       Compare two URLs.
399 
400       I tried to make the comparison produce a sort order that seems natural, so it's not identical
401       to sorting based on .toString(). For instance, username/password have lower priority than
402       host. The scheme has higher priority than port but lower than host.
403 
404       While the output of this is guaranteed to provide a total ordering, and I've attempted to make
405       it human-friendly, it isn't guaranteed to be consistent between versions. The implementation
406       and its results can change without a minor version increase.
407     */
408     int opCmp(const URL other) const
409     {
410         return asTuple.opCmp(other.asTuple);
411     }
412 
413     private auto asTuple() const nothrow
414     {
415         import std.typecons : tuple;
416         return tuple(host, scheme, port, user, pass, path, queryParams);
417     }
418 
419     /// Equality checks.
420     bool opEquals(string other) const
421     {
422         URL o;
423         if (!tryParseURL(other, o))
424         {
425             return false;
426         }
427         return asTuple() == o.asTuple();
428     }
429 
430     /// Ditto
431     bool opEquals(ref const URL other) const
432     {
433         return asTuple() == other.asTuple();
434     }
435 
436     /// Ditto
437     bool opEquals(const URL other) const
438     {
439         return asTuple() == other.asTuple();
440     }
441 
442     unittest
443     {
444         import std.algorithm, std.array, std.format;
445         assert("http://example.org/some_path".parseURL > "http://example.org/other_path".parseURL);
446         alias sorted = std.algorithm.sort;
447         auto parsedURLs =
448         [
449             "http://example.org/some_path",
450             "http://example.org:81/other_path",
451             "http://example.org/other_path",
452             "https://example.org/first_path",
453             "http://example.xyz/other_other_path",
454             "http://me:secret@blog.ikeran.org/wp_admin",
455         ].map!(x => x.parseURL).array;
456         auto urls = sorted(parsedURLs).map!(x => x.toHumanReadableString).array;
457         auto expected =
458         [
459             "http://me:secret@blog.ikeran.org/wp_admin",
460             "http://example.org/other_path",
461             "http://example.org/some_path",
462             "http://example.org:81/other_path",
463             "https://example.org/first_path",
464             "http://example.xyz/other_other_path",
465         ];
466         assert(cmp(urls, expected) == 0, "expected:\n%s\ngot:\n%s".format(expected, urls));
467     }
468 
469     unittest
470     {
471         auto a = "http://x.org/a?b=c".parseURL;
472         auto b = "http://x.org/a?d=e".parseURL;
473         auto c = "http://x.org/a?b=a".parseURL;
474         assert(a < b);
475         assert(c < b);
476         assert(c < a);
477     }
478 
479 	/**
480 		* The append operator (~).
481 		*
482 		* The append operator for URLs returns a new URL with the given string appended as a path
483 		* element to the URL's path. It only adds new path elements (or sequences of path elements).
484 		*
485 		* Don't worry about path separators; whether you include them or not, it will just work.
486 		*
487 		* Query elements are copied.
488 		*
489 		* Examples:
490 		* ---
491 		* auto random = "http://testdata.org/random".parseURL;
492 		* auto randInt = random ~ "int";
493 		* writeln(randInt);  // prints "http://testdata.org/random/int"
494 		* ---
495 		*/
496 	URL opBinary(string op : "~")(string subsequentPath) {
497 		URL other = this;
498 		other ~= subsequentPath;
499 		other.queryParams = queryParams.dup;
500 		return other;
501 	}
502 
503 	/**
504 		* The append-in-place operator (~=).
505 		*
506 		* The append operator for URLs adds a path element to this URL. It only adds new path elements
507 		* (or sequences of path elements).
508 		*
509 		* Don't worry about path separators; whether you include them or not, it will just work.
510 		*
511 		* Examples:
512 		* ---
513 		* auto random = "http://testdata.org/random".parseURL;
514 		* random ~= "int";
515 		* writeln(random);  // prints "http://testdata.org/random/int"
516 		* ---
517 		*/
518 	URL opOpAssign(string op : "~")(string subsequentPath) {
519 		if (path.endsWith("/")) {
520 			if (subsequentPath.startsWith("/")) {
521 				path ~= subsequentPath[1..$];
522 			} else {
523 				path ~= subsequentPath;
524 			}
525 		} else {
526 			if (!subsequentPath.startsWith("/")) {
527 				path ~= '/';
528 			}
529 			path ~= subsequentPath;
530 		}
531 		return this;
532 	}
533 
534     /**
535         * Convert a relative URL to an absolute URL.
536         *
537         * This is designed so that you can scrape a webpage and quickly convert links within the
538         * page to URLs you can actually work with, but you're clever; I'm sure you'll find more uses
539         * for it.
540         *
541         * It's biased toward HTTP family URLs; as one quirk, "//" is interpreted as "same scheme,
542         * different everything else", which might not be desirable for all schemes.
543         *
544         * This only handles URLs, not URIs; if you pass in 'mailto:bob.dobbs@subgenius.org', for
545         * instance, this will give you our best attempt to parse it as a URL.
546         *
547         * Examples:
548         * ---
549         * auto base = "https://example.org/passworddb?secure=false".parseURL;
550         *
551         * // Download https://example.org/passworddb/by-username/dhasenan
552         * download(base.resolve("by-username/dhasenan"));
553         *
554         * // Download https://example.org/static/style.css
555         * download(base.resolve("/static/style.css"));
556         *
557         * // Download https://cdn.example.net/jquery.js
558         * download(base.resolve("https://cdn.example.net/jquery.js"));
559         * ---
560         */
561     URL resolve(string other)
562     {
563         if (other.length == 0) return this;
564         if (other[0] == '/')
565         {
566             if (other.length > 1 && other[1] == '/')
567             {
568                 // Uncommon syntax: a link like "//wikimedia.org" means "same scheme, switch URL"
569                 return parseURL(this.scheme ~ ':' ~ other);
570             }
571         }
572         else
573         {
574             auto schemeSep = other.indexOf("://");
575             if (schemeSep >= 0 && schemeSep < other.indexOf("/"))
576             // separate URL
577             {
578                 return other.parseURL;
579             }
580         }
581 
582         URL ret = this;
583         ret.path = "";
584         ret.queryParams = ret.queryParams.init;
585         if (other[0] != '/')
586         {
587             // relative to something
588             if (!this.path.length)
589             {
590                 // nothing to be relative to
591                 other = "/" ~ other;
592             }
593             else if (this.path[$-1] == '/')
594             {
595                 // directory-style path for the current thing
596                 // resolve relative to this directory
597                 other = this.path ~ other;
598             }
599             else
600             {
601                 // this is a file-like thing
602                 // find the 'directory' and relative to that
603                 other = this.path[0..this.path.lastIndexOf('/') + 1] ~ other;
604             }
605         }
606         // collapse /foo/../ to /
607         if (other.indexOf("/../") >= 0)
608         {
609             import std.array : Appender, array;
610             import std.string : split;
611             import std.algorithm.iteration : joiner, filter;
612             string[] parts = other.split('/');
613             for (int i = 0; i < parts.length; i++)
614             {
615                 if (parts[i] == "..")
616                 {
617                     for (int j = i - 1; j >= 0; j--)
618                     {
619                         if (parts[j] != null)
620                         {
621                             parts[j] = null;
622                             parts[i] = null;
623                             break;
624                         }
625                     }
626                 }
627             }
628             other = "/" ~ parts.filter!(x => x != null).joiner("/").to!string;
629         }
630         parsePathAndQuery(ret, other);
631         return ret;
632     }
633 
634     unittest
635     {
636         auto a = "http://alcyius.com/dndtools/index.html".parseURL;
637         auto b = a.resolve("contacts/index.html");
638         assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html");
639     }
640 
641     unittest
642     {
643         auto a = "http://alcyius.com/dndtools/index.html?a=b".parseURL;
644         auto b = a.resolve("contacts/index.html?foo=bar");
645         assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html?foo=bar");
646     }
647 
648     unittest
649     {
650         auto a = "http://alcyius.com/dndtools/index.html".parseURL;
651         auto b = a.resolve("../index.html");
652         assert(b.toString == "http://alcyius.com/index.html", b.toString);
653     }
654 
655     unittest
656     {
657         auto a = "http://alcyius.com/dndtools/foo/bar/index.html".parseURL;
658         auto b = a.resolve("../index.html");
659         assert(b.toString == "http://alcyius.com/dndtools/foo/index.html", b.toString);
660     }
661 }
662 
663 /**
664 	* Parse a URL from a string.
665 	*
666 	* This attempts to parse a wide range of URLs as people might actually type them. Some mistakes
667 	* may be made. However, any URL in a correct format will be parsed correctly.
668 	*/
669 bool tryParseURL(string value, out URL url)
670 {
671 	url = URL.init;
672 	// scheme:[//[user:password@]host[:port]][/]path[?query][#fragment]
673 	// Scheme is optional in common use. We infer 'http' if it's not given.
674 	auto i = value.indexOf("//");
675 	if (i > -1) {
676 		if (i > 1) {
677 			url.scheme = value[0..i-1];
678 		}
679 		value = value[i+2 .. $];
680 	} else {
681 		url.scheme = "http";
682 	}
683   // Check for an ipv6 hostname.
684 	// [user:password@]host[:port]][/]path[?query][#fragment
685 	i = value.indexOfAny([':', '/', '[']);
686 	if (i == -1) {
687 		// Just a hostname.
688 		url.host = value.fromPuny;
689 		return true;
690 	}
691 
692 	if (value[i] == ':') {
693 		// This could be between username and password, or it could be between host and port.
694 		auto j = value.indexOfAny(['@', '/']);
695 		if (j > -1 && value[j] == '@') {
696 			try {
697 				url.user = value[0..i].percentDecode;
698 				url.pass = value[i+1 .. j].percentDecode;
699 			} catch (URLException) {
700 				return false;
701 			}
702 			value = value[j+1 .. $];
703 		}
704 	}
705 
706 	// It's trying to be a host/port, not a user/pass.
707 	i = value.indexOfAny([':', '/', '[']);
708 	if (i == -1) {
709 		url.host = value.fromPuny;
710 		return true;
711 	}
712 
713 	// Find the hostname. It's either an ipv6 address (which has special rules) or not (which doesn't
714 	// have special rules). -- The main sticking point is that ipv6 addresses have colons, which we
715 	// handle specially, and are offset with square brackets.
716 	if (value[i] == '[') {
717 		auto j = value[i..$].indexOf(']');
718 		if (j < 0) {
719 			// unterminated ipv6 addr
720 			return false;
721 		}
722 		// includes square brackets
723 		url.host = value[i .. i+j+1];
724 		value = value[i+j+1 .. $];
725 		if (value.length == 0) {
726 			// read to end of string; we finished parse
727 			return true;
728 		}
729 		if (value[0] != ':' && value[0] != '?' && value[0] != '/') {
730 			return false;
731 		}
732 	} else {
733 		// Normal host.
734 		url.host = value[0..i].fromPuny;
735 		value = value[i .. $];
736 	}
737 
738 	if (value[0] == ':') {
739 		auto end = value.indexOf('/');
740 		if (end == -1) {
741 			end = value.length;
742 		}
743 		try {
744 			url.port = value[1 .. end].to!ushort;
745 		} catch (ConvException) {
746 			return false;
747 		}
748 		value = value[end .. $];
749 		if (value.length == 0) {
750 			return true;
751 		}
752 	}
753     return parsePathAndQuery(url, value);
754 }
755 
756 private bool parsePathAndQuery(ref URL url, string value)
757 {
758     auto i = value.indexOfAny("?#");
759     if (i == -1)
760     {
761         url.path = value.percentDecode;
762         return true;
763     }
764 
765     try
766     {
767         url.path = value[0..i].percentDecode;
768     }
769     catch (URLException)
770     {
771         return false;
772     }
773 
774     auto c = value[i];
775     value = value[i + 1 .. $];
776     if (c == '?')
777     {
778         i = value.indexOf('#');
779         string query;
780         if (i < 0)
781         {
782             query = value;
783             value = null;
784         }
785         else
786         {
787             query = value[0..i];
788             value = value[i + 1 .. $];
789         }
790         auto queries = query.split('&');
791         foreach (q; queries)
792         {
793             auto j = q.indexOf('=');
794             string key, val;
795             if (j < 0)
796             {
797                 key = q;
798             }
799             else
800             {
801                 key = q[0..j];
802                 val = q[j + 1 .. $];
803             }
804             try
805             {
806                 key = key.percentDecode;
807                 val = val.percentDecode;
808             }
809             catch (URLException)
810             {
811                 return false;
812             }
813             url.queryParams.add(key, val);
814         }
815     }
816 
817     try
818     {
819         url.fragment = value.percentDecode;
820     }
821     catch (URLException)
822     {
823         return false;
824     }
825 
826     return true;
827 }
828 
829 unittest {
830 	{
831 		// Basic.
832 		URL url;
833 		with (url) {
834 			scheme = "https";
835 			host = "example.org";
836 			path = "/foo/bar";
837 			queryParams.add("hello", "world");
838 			queryParams.add("gibe", "clay");
839 			fragment = "frag";
840 		}
841 		assert(
842 				// Not sure what order it'll come out in.
843 				url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
844 				url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
845 				url.toString);
846 	}
847 	{
848 		// Percent encoded.
849 		URL url;
850 		with (url) {
851 			scheme = "https";
852 			host = "example.org";
853 			path = "/f☃o";
854 			queryParams.add("❄", "❀");
855 			queryParams.add("[", "]");
856 			fragment = "ş";
857 		}
858 		assert(
859 				// Not sure what order it'll come out in.
860 				url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" ||
861 				url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F",
862 				url.toString);
863 	}
864 	{
865 		// Port, user, pass.
866 		URL url;
867 		with (url) {
868 			scheme = "https";
869 			host = "example.org";
870 			user = "dhasenan";
871 			pass = "itsasecret";
872 			port = 17;
873 		}
874 		assert(
875 				url.toString == "https://dhasenan:itsasecret@example.org:17/",
876 				url.toString);
877 	}
878 	{
879 		// Query with no path.
880 		URL url;
881 		with (url) {
882 			scheme = "https";
883 			host = "example.org";
884 			queryParams.add("hi", "bye");
885 		}
886 		assert(
887 				url.toString == "https://example.org/?hi=bye",
888 				url.toString);
889 	}
890 }
891 
892 unittest
893 {
894 	auto url = "//foo/bar".parseURL;
895 	assert(url.host == "foo", "expected host foo, got " ~ url.host);
896 	assert(url.path == "/bar");
897 }
898 
899 unittest
900 {
901     import std.stdio : writeln;
902     auto url = "file:///foo/bar".parseURL;
903     assert(url.host == null);
904     assert(url.port == 0);
905     assert(url.scheme == "file");
906     assert(url.path == "/foo/bar");
907     assert(url.toString == "file:///foo/bar");
908     assert(url.queryParams.empty);
909     assert(url.fragment == null);
910 }
911 
912 unittest
913 {
914 	// ipv6 hostnames!
915 	{
916 		// full range of data
917 		auto url = parseURL("https://bob:secret@[::1]:2771/foo/bar");
918 		assert(url.scheme == "https", url.scheme);
919 		assert(url.user == "bob", url.user);
920 		assert(url.pass == "secret", url.pass);
921 		assert(url.host == "[::1]", url.host);
922 		assert(url.port == 2771, url.port.to!string);
923 		assert(url.path == "/foo/bar", url.path);
924 	}
925 
926 	// minimal
927 	{
928 		auto url = parseURL("[::1]");
929 		assert(url.host == "[::1]", url.host);
930 	}
931 
932 	// some random bits
933 	{
934 		auto url = parseURL("http://[::1]/foo");
935 		assert(url.scheme == "http", url.scheme);
936 		assert(url.host == "[::1]", url.host);
937 		assert(url.path == "/foo", url.path);
938 	}
939 
940 	{
941 		auto url = parseURL("https://[2001:0db8:0:0:0:0:1428:57ab]/?login=true#justkidding");
942 		assert(url.scheme == "https");
943 		assert(url.host == "[2001:0db8:0:0:0:0:1428:57ab]");
944 		assert(url.path == "/");
945 		assert(url.fragment == "justkidding");
946 	}
947 }
948 
949 unittest
950 {
951 	auto url = "localhost:5984".parseURL;
952 	auto url2 = url ~ "db1";
953 	assert(url2.toString == "http://localhost:5984/db1", url2.toString);
954 	auto url3 = url2 ~ "_all_docs";
955 	assert(url3.toString == "http://localhost:5984/db1/_all_docs", url3.toString);
956 }
957 
958 ///
959 unittest {
960 	{
961 		// Basic.
962 		URL url;
963 		with (url) {
964 			scheme = "https";
965 			host = "example.org";
966 			path = "/foo/bar";
967 			queryParams.add("hello", "world");
968 			queryParams.add("gibe", "clay");
969 			fragment = "frag";
970 		}
971 		assert(
972 				// Not sure what order it'll come out in.
973 				url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
974 				url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
975 				url.toString);
976 	}
977 	{
978 		// Passing an array of query values.
979 		URL url;
980 		with (url) {
981 			scheme = "https";
982 			host = "example.org";
983 			path = "/foo/bar";
984 			queryParams.add("hello", "world");
985 			queryParams.add("hello", "aether");
986 			fragment = "frag";
987 		}
988 		assert(
989 				// Not sure what order it'll come out in.
990 				url.toString == "https://example.org/foo/bar?hello=world&hello=aether#frag" ||
991 				url.toString == "https://example.org/foo/bar?hello=aether&hello=world#frag",
992 				url.toString);
993 	}
994 	{
995 		// Percent encoded.
996 		URL url;
997 		with (url) {
998 			scheme = "https";
999 			host = "example.org";
1000 			path = "/f☃o";
1001 			queryParams.add("❄", "❀");
1002 			queryParams.add("[", "]");
1003 			fragment = "ş";
1004 		}
1005 		assert(
1006 				// Not sure what order it'll come out in.
1007 				url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" ||
1008 				url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F",
1009 				url.toString);
1010 	}
1011 	{
1012 		// Port, user, pass.
1013 		URL url;
1014 		with (url) {
1015 			scheme = "https";
1016 			host = "example.org";
1017 			user = "dhasenan";
1018 			pass = "itsasecret";
1019 			port = 17;
1020 		}
1021 		assert(
1022 				url.toString == "https://dhasenan:itsasecret@example.org:17/",
1023 				url.toString);
1024 	}
1025 	{
1026 		// Query with no path.
1027 		URL url;
1028 		with (url) {
1029 			scheme = "https";
1030 			host = "example.org";
1031 			queryParams.add("hi", "bye");
1032 		}
1033 		assert(
1034 				url.toString == "https://example.org/?hi=bye",
1035 				url.toString);
1036 	}
1037 }
1038 
1039 unittest {
1040 	// Percent decoding.
1041 
1042 	// http://#:!:@
1043 	auto urlString = "http://%23:%21%3A@example.org/%7B/%7D?%3B&%26=%3D#%23hash%EF%BF%BD";
1044 	auto url = urlString.parseURL;
1045 	assert(url.user == "#");
1046 	assert(url.pass == "!:");
1047 	assert(url.host == "example.org");
1048 	assert(url.path == "/{/}");
1049 	assert(url.queryParams[";"].front == "");
1050 	assert(url.queryParams["&"].front == "=");
1051 	assert(url.fragment == "#hash�");
1052 
1053 	// Round trip.
1054 	assert(urlString == urlString.parseURL.toString, urlString.parseURL.toString);
1055 	assert(urlString == urlString.parseURL.toString.parseURL.toString);
1056 }
1057 
1058 unittest {
1059 	auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL;
1060 	assert(url.host == "☂.☃.org", url.host);
1061 }
1062 
1063 unittest {
1064 	auto url = "https://☂.☃.org/?hi=bye".parseURL;
1065 	assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye");
1066 }
1067 
1068 ///
1069 unittest {
1070 	// There's an existing path.
1071 	auto url = parseURL("http://example.org/foo");
1072 	URL url2;
1073 	// No slash? Assume it needs a slash.
1074 	assert((url ~ "bar").toString == "http://example.org/foo/bar");
1075 	// With slash? Don't add another.
1076 	url2 = url ~ "/bar";
1077 	assert(url2.toString == "http://example.org/foo/bar", url2.toString);
1078 	url ~= "bar";
1079 	assert(url.toString == "http://example.org/foo/bar");
1080 
1081 	// Path already ends with a slash; don't add another.
1082 	url = parseURL("http://example.org/foo/");
1083 	assert((url ~ "bar").toString == "http://example.org/foo/bar");
1084 	// Still don't add one even if you're appending with a slash.
1085 	assert((url ~ "/bar").toString == "http://example.org/foo/bar");
1086 	url ~= "/bar";
1087 	assert(url.toString == "http://example.org/foo/bar");
1088 
1089 	// No path.
1090 	url = parseURL("http://example.org");
1091 	assert((url ~ "bar").toString == "http://example.org/bar");
1092 	assert((url ~ "/bar").toString == "http://example.org/bar");
1093 	url ~= "bar";
1094 	assert(url.toString == "http://example.org/bar");
1095 
1096 	// Path is just a slash.
1097 	url = parseURL("http://example.org/");
1098 	assert((url ~ "bar").toString == "http://example.org/bar");
1099 	assert((url ~ "/bar").toString == "http://example.org/bar");
1100 	url ~= "bar";
1101 	assert(url.toString == "http://example.org/bar", url.toString);
1102 
1103 	// No path, just fragment.
1104 	url = "ircs://irc.freenode.com/#d".parseURL;
1105 	assert(url.toString == "ircs://irc.freenode.com/#d", url.toString);
1106 }
1107 unittest
1108 {
1109     // basic resolve()
1110     {
1111         auto base = "https://example.org/this/".parseURL;
1112         assert(base.resolve("that") == "https://example.org/this/that");
1113         assert(base.resolve("/that") == "https://example.org/that");
1114         assert(base.resolve("//example.net/that") == "https://example.net/that");
1115     }
1116 
1117     // ensure we don't preserve query params
1118     {
1119         auto base = "https://example.org/this?query=value&other=value2".parseURL;
1120         assert(base.resolve("that") == "https://example.org/that");
1121         assert(base.resolve("/that") == "https://example.org/that");
1122         assert(base.resolve("tother/that") == "https://example.org/tother/that");
1123         assert(base.resolve("//example.net/that") == "https://example.net/that");
1124     }
1125 }
1126 
1127 
1128 unittest
1129 {
1130 	import std.net.curl;
1131 	auto url = "http://example.org".parseURL;
1132 	assert(is(typeof(std.net.curl.get(url))));
1133 }
1134 
1135 /**
1136 	* Parse the input string as a URL.
1137 	*
1138 	* Throws:
1139 	*   URLException if the string was in an incorrect format.
1140 	*/
1141 URL parseURL(string value) {
1142 	URL url;
1143 	if (tryParseURL(value, url)) {
1144 		return url;
1145 	}
1146 	throw new URLException("failed to parse URL " ~ value);
1147 }
1148 
1149 ///
1150 unittest {
1151 	{
1152 		// Infer scheme
1153 		auto u1 = parseURL("example.org");
1154 		assert(u1.scheme == "http");
1155 		assert(u1.host == "example.org");
1156 		assert(u1.path == "");
1157 		assert(u1.port == 80);
1158 		assert(u1.providedPort == 0);
1159 		assert(u1.fragment == "");
1160 	}
1161 	{
1162 		// Simple host and scheme
1163 		auto u1 = parseURL("https://example.org");
1164 		assert(u1.scheme == "https");
1165 		assert(u1.host == "example.org");
1166 		assert(u1.path == "");
1167 		assert(u1.port == 443);
1168 		assert(u1.providedPort == 0);
1169 	}
1170 	{
1171 		// With path
1172 		auto u1 = parseURL("https://example.org/foo/bar");
1173 		assert(u1.scheme == "https");
1174 		assert(u1.host == "example.org");
1175 		assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
1176 		assert(u1.port == 443);
1177 		assert(u1.providedPort == 0);
1178 	}
1179 	{
1180 		// With explicit port
1181 		auto u1 = parseURL("https://example.org:1021/foo/bar");
1182 		assert(u1.scheme == "https");
1183 		assert(u1.host == "example.org");
1184 		assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
1185 		assert(u1.port == 1021);
1186 		assert(u1.providedPort == 1021);
1187 	}
1188 	{
1189 		// With user
1190 		auto u1 = parseURL("https://bob:secret@example.org/foo/bar");
1191 		assert(u1.scheme == "https");
1192 		assert(u1.host == "example.org");
1193 		assert(u1.path == "/foo/bar");
1194 		assert(u1.port == 443);
1195 		assert(u1.user == "bob");
1196 		assert(u1.pass == "secret");
1197 	}
1198 	{
1199 		// With user, URL-encoded
1200 		auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar");
1201 		assert(u1.scheme == "https");
1202 		assert(u1.host == "example.org");
1203 		assert(u1.path == "/foo/bar");
1204 		assert(u1.port == 443);
1205 		assert(u1.user == "bob!");
1206 		assert(u1.pass == "secret!?");
1207 	}
1208 	{
1209 		// With user and port and path
1210 		auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar");
1211 		assert(u1.scheme == "https");
1212 		assert(u1.host == "example.org");
1213 		assert(u1.path == "/foo/bar");
1214 		assert(u1.port == 2210);
1215 		assert(u1.user == "bob");
1216 		assert(u1.pass == "secret");
1217 		assert(u1.fragment == "");
1218 	}
1219 	{
1220 		// With query string
1221 		auto u1 = parseURL("https://example.org/?login=true");
1222 		assert(u1.scheme == "https");
1223 		assert(u1.host == "example.org");
1224 		assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
1225 		assert(u1.queryParams["login"].front == "true");
1226 		assert(u1.fragment == "");
1227 	}
1228 	{
1229 		// With query string and fragment
1230 		auto u1 = parseURL("https://example.org/?login=true#justkidding");
1231 		assert(u1.scheme == "https");
1232 		assert(u1.host == "example.org");
1233 		assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
1234 		assert(u1.queryParams["login"].front == "true");
1235 		assert(u1.fragment == "justkidding");
1236 	}
1237 	{
1238 		// With URL-encoded values
1239 		auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E");
1240 		assert(u1.scheme == "https");
1241 		assert(u1.host == "example.org");
1242 		assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path);
1243 		assert(u1.queryParams["❄"].front == "=");
1244 		assert(u1.fragment == "^");
1245 	}
1246 }
1247 
1248 unittest {
1249 	assert(parseURL("http://example.org").port == 80);
1250 	assert(parseURL("http://example.org:5326").port == 5326);
1251 
1252 	auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment");
1253 	assert(url.scheme == "redis");
1254 	assert(url.user == "admin");
1255 	assert(url.pass == "password");
1256 
1257 	assert(parseURL("example.org").toString == "http://example.org/");
1258 	assert(parseURL("http://example.org:80").toString == "http://example.org/");
1259 
1260 	assert(parseURL("localhost:8070").toString == "http://localhost:8070/");
1261 }
1262 
1263 /**
1264 	* Percent-encode a string.
1265 	*
1266 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
1267 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
1268 	* everything else, there is percent encoding.
1269 	*/
1270 string percentEncode(string raw) {
1271 	// We *must* encode these characters: :/?#[]@!$&'()*+,;="
1272 	// We *can* encode any other characters.
1273 	// We *should not* encode alpha, numeric, or -._~.
1274     import std.utf : encode;
1275     import std.array : Appender;
1276 	Appender!string app;
1277 	foreach (dchar d; raw) {
1278 		if (('a' <= d && 'z' >= d) ||
1279 				('A' <= d && 'Z' >= d) ||
1280 				('0' <= d && '9' >= d) ||
1281 				d == '-' || d == '.' || d == '_' || d == '~') {
1282 			app ~= d;
1283 			continue;
1284 		}
1285 		// Something simple like a space character? Still in 7-bit ASCII?
1286 		// Then we get a single-character string out of it and just encode
1287 		// that one bit.
1288 		// Something not in 7-bit ASCII? Then we percent-encode each octet
1289 		// in the UTF-8 encoding (and hope the server understands UTF-8).
1290 		char[] c;
1291 		encode(c, d);
1292 		auto bytes = cast(ubyte[])c;
1293 		foreach (b; bytes) {
1294 			app ~= format("%%%02X", b);
1295 		}
1296 	}
1297 	return cast(string)app.data;
1298 }
1299 
1300 ///
1301 unittest {
1302 	assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding");
1303 	assert(percentEncode("~~--..__") == "~~--..__");
1304 	assert(percentEncode("0123456789") == "0123456789");
1305 
1306 	string e;
1307 
1308 	e = percentEncode("☃");
1309 	assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e);
1310 }
1311 
1312 /**
1313 	* Percent-decode a string.
1314 	*
1315 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
1316 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
1317 	* everything else, there is percent encoding.
1318 	*
1319 	* This explicitly ensures that the result is a valid UTF-8 string.
1320 	*/
1321 string percentDecode(string encoded)
1322 {
1323     import std.utf : validate, UTFException;
1324 	auto raw = percentDecodeRaw(encoded);
1325 	auto s = cast(string) raw;
1326     try
1327     {
1328         validate(s);
1329     }
1330     catch (UTFException e)
1331     {
1332         throw new URLException(
1333                 "The percent-encoded data `" ~ encoded ~ "` does not represent a valid UTF-8 sequence.");
1334     }
1335 	return s;
1336 }
1337 
1338 ///
1339 unittest {
1340 	assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding");
1341 	assert(percentDecode("~~--..__") == "~~--..__");
1342 	assert(percentDecode("0123456789") == "0123456789");
1343 
1344 	string e;
1345 
1346 	e = percentDecode("%E2%98%83");
1347 	assert(e == "☃", "expected a snowman but got" ~ e);
1348 
1349 	e = percentDecode("%e2%98%83");
1350 	assert(e == "☃", "expected a snowman but got" ~ e);
1351 
1352 	try {
1353 		// %ES is an invalid percent sequence: 'S' is not a hex digit.
1354 		percentDecode("%es");
1355 		assert(false, "expected exception not thrown");
1356 	} catch (URLException) {
1357 	}
1358 
1359 	try {
1360 		percentDecode("%e");
1361 		assert(false, "expected exception not thrown");
1362 	} catch (URLException) {
1363 	}
1364 }
1365 
1366 /**
1367 	* Percent-decode a string into a ubyte array.
1368 	*
1369 	* URL components cannot contain non-ASCII characters, and there are very few characters that are
1370 	* safe to include as URL components. Domain names using Unicode values use Punycode. For
1371 	* everything else, there is percent encoding.
1372 	*
1373 	* This yields a ubyte array and will not perform validation on the output. However, an improperly
1374 	* formatted input string will result in a URLException.
1375 	*/
1376 immutable(ubyte)[] percentDecodeRaw(string encoded)
1377 {
1378 	// We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now.
1379     import std.array : Appender;
1380 	Appender!(immutable(ubyte)[]) app;
1381 	for (int i = 0; i < encoded.length; i++) {
1382 		if (encoded[i] != '%') {
1383 			app ~= encoded[i];
1384 			continue;
1385 		}
1386 		if (i >= encoded.length - 2) {
1387 			throw new URLException("Invalid percent encoded value: expected two characters after " ~
1388 					"percent symbol. Error at index " ~ i.to!string);
1389 		}
1390 		if (isHex(encoded[i + 1]) && isHex(encoded[i + 2])) {
1391 			auto b = fromHex(encoded[i + 1]);
1392 			auto c = fromHex(encoded[i + 2]);
1393 			app ~= cast(ubyte)((b << 4) | c);
1394 		} else {
1395 			throw new URLException("Invalid percent encoded value: expected two hex digits after " ~
1396 					"percent symbol. Error at index " ~ i.to!string);
1397 		}
1398 		i += 2;
1399 	}
1400 	return app.data;
1401 }
1402 
1403 private bool isHex(char c) {
1404 	return ('0' <= c && '9' >= c) ||
1405 		('a' <= c && 'f' >= c) ||
1406 		('A' <= c && 'F' >= c);
1407 }
1408 
1409 private ubyte fromHex(char s) {
1410 	enum caseDiff = 'a' - 'A';
1411 	if (s >= 'a' && s <= 'z') {
1412 		s -= caseDiff;
1413 	}
1414 	return cast(ubyte)("0123456789ABCDEF".indexOf(s));
1415 }
1416 
1417 private string toPuny(string unicodeHostname)
1418 {
1419     if (unicodeHostname.length == 0) return "";
1420     if (unicodeHostname[0] == '[')
1421     {
1422         // It's an ipv6 name.
1423         return unicodeHostname;
1424     }
1425 	bool mustEncode = false;
1426 	foreach (i, dchar d; unicodeHostname) {
1427 		auto c = cast(uint) d;
1428 		if (c > 0x80) {
1429 			mustEncode = true;
1430 			break;
1431 		}
1432 		if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) {
1433 			throw new URLException(
1434 					format(
1435 						"domain name '%s' contains illegal character '%s' at position %s",
1436 						unicodeHostname, d, i));
1437 		}
1438 	}
1439 	if (!mustEncode) {
1440 		return unicodeHostname;
1441 	}
1442     import std.algorithm.iteration : map;
1443 	return unicodeHostname.split('.').map!punyEncode.join(".");
1444 }
1445 
1446 private string fromPuny(string hostname)
1447 {
1448     import std.algorithm.iteration : map;
1449 	return hostname.split('.').map!punyDecode.join(".");
1450 }
1451 
1452 private {
1453 	enum delimiter = '-';
1454 	enum marker = "xn--";
1455 	enum ulong damp = 700;
1456 	enum ulong tmin = 1;
1457 	enum ulong tmax = 26;
1458 	enum ulong skew = 38;
1459 	enum ulong base = 36;
1460 	enum ulong initialBias = 72;
1461 	enum dchar initialN = cast(dchar)128;
1462 
1463 	ulong adapt(ulong delta, ulong numPoints, bool firstTime) {
1464 		if (firstTime) {
1465 			delta /= damp;
1466 		} else {
1467 			delta /= 2;
1468 		}
1469 		delta += delta / numPoints;
1470 		ulong k = 0;
1471 		while (delta > ((base - tmin) * tmax) / 2) {
1472 			delta /= (base - tmin);
1473 			k += base;
1474 		}
1475 		return k + (((base - tmin + 1) * delta) / (delta + skew));
1476 	}
1477 }
1478 
1479 /**
1480 	* Encode the input string using the Punycode algorithm.
1481 	*
1482 	* Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked
1483 	* with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com"
1484 	* in Punycode, you will get "xn--m3h.xn--n3h.com".
1485 	*
1486 	* In order to puny-encode a domain name, you must split it into its components. The following will
1487 	* typically suffice:
1488 	* ---
1489 	* auto domain = "☂.☃.com";
1490 	* auto encodedDomain = domain.splitter(".").map!(punyEncode).join(".");
1491 	* ---
1492 	*/
1493 string punyEncode(string input)
1494 {
1495     import std.array : Appender;
1496 	ulong delta = 0;
1497 	dchar n = initialN;
1498 	auto i = 0;
1499 	auto bias = initialBias;
1500 	Appender!string output;
1501 	output ~= marker;
1502 	auto pushed = 0;
1503 	auto codePoints = 0;
1504 	foreach (dchar c; input) {
1505 		codePoints++;
1506 		if (c <= initialN) {
1507 			output ~= c;
1508 			pushed++;
1509 		}
1510 	}
1511 	if (pushed < codePoints) {
1512 		if (pushed > 0) {
1513 			output ~= delimiter;
1514 		}
1515 	} else {
1516 		// No encoding to do.
1517 		return input;
1518 	}
1519 	bool first = true;
1520 	while (pushed < codePoints) {
1521 		auto best = dchar.max;
1522 		foreach (dchar c; input) {
1523 			if (n <= c && c < best) {
1524 				best = c;
1525 			}
1526 		}
1527 		if (best == dchar.max) {
1528 			throw new URLException("failed to find a new codepoint to process during punyencode");
1529 		}
1530 		delta += (best - n) * (pushed + 1);
1531 		if (delta > uint.max) {
1532 			// TODO better error message
1533 			throw new URLException("overflow during punyencode");
1534 		}
1535 		n = best;
1536 		foreach (dchar c; input) {
1537 			if (c < n) {
1538 				delta++;
1539 			}
1540 			if (c == n) {
1541 				ulong q = delta;
1542 				auto k = base;
1543 				while (true) {
1544 					ulong t;
1545 					if (k <= bias) {
1546 						t = tmin;
1547 					} else if (k >= bias + tmax) {
1548 						t = tmax;
1549 					} else {
1550 						t = k - bias;
1551 					}
1552 					if (q < t) {
1553 						break;
1554 					}
1555 					output ~= digitToBasic(t + ((q - t) % (base - t)));
1556 					q = (q - t) / (base - t);
1557 					k += base;
1558 				}
1559 				output ~= digitToBasic(q);
1560 				pushed++;
1561 				bias = adapt(delta, pushed, first);
1562 				first = false;
1563 				delta = 0;
1564 			}
1565 		}
1566 		delta++;
1567 		n++;
1568 	}
1569 	return cast(string)output.data;
1570 }
1571 
1572 /**
1573 	* Decode the input string using the Punycode algorithm.
1574 	*
1575 	* Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked
1576 	* with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com"
1577 	* in Punycode, you will get "xn--m3h.xn--n3h.com".
1578 	*
1579 	* In order to puny-decode a domain name, you must split it into its components. The following will
1580 	* typically suffice:
1581 	* ---
1582 	* auto domain = "xn--m3h.xn--n3h.com";
1583 	* auto decodedDomain = domain.splitter(".").map!(punyDecode).join(".");
1584 	* ---
1585 	*/
1586 string punyDecode(string input) {
1587 	if (!input.startsWith(marker)) {
1588 		return input;
1589 	}
1590 	input = input[marker.length..$];
1591 
1592 	// let n = initial_n
1593 	dchar n = cast(dchar)128;
1594 
1595 	// let i = 0
1596 	// let bias = initial_bias
1597 	// let output = an empty string indexed from 0
1598 	size_t i = 0;
1599 	auto bias = initialBias;
1600 	dchar[] output;
1601 	// This reserves a bit more than necessary, but it should be more efficient overall than just
1602 	// appending and inserting volo-nolo.
1603 	output.reserve(input.length);
1604 
1605  	// consume all code points before the last delimiter (if there is one)
1606  	//   and copy them to output, fail on any non-basic code point
1607  	// if more than zero code points were consumed then consume one more
1608  	//   (which will be the last delimiter)
1609 	auto end = input.lastIndexOf(delimiter);
1610 	if (end > -1) {
1611 		foreach (dchar c; input[0..end]) {
1612 			output ~= c;
1613 		}
1614 		input = input[end+1 .. $];
1615 	}
1616 
1617  	// while the input is not exhausted do begin
1618 	size_t pos = 0;
1619 	while (pos < input.length) {
1620  	//   let oldi = i
1621  	//   let w = 1
1622 		auto oldi = i;
1623 		auto w = 1;
1624  	//   for k = base to infinity in steps of base do begin
1625 		for (ulong k = base; k < uint.max; k += base) {
1626  	//     consume a code point, or fail if there was none to consume
1627 			// Note that the input is all ASCII, so we can simply index the input string bytewise.
1628 			auto c = input[pos];
1629 			pos++;
1630  	//     let digit = the code point's digit-value, fail if it has none
1631 			auto digit = basicToDigit(c);
1632  	//     let i = i + digit * w, fail on overflow
1633 			i += digit * w;
1634  	//     let t = tmin if k <= bias {+ tmin}, or
1635  	//             tmax if k >= bias + tmax, or k - bias otherwise
1636 			ulong t;
1637 			if (k <= bias) {
1638 				t = tmin;
1639 			} else if (k >= bias + tmax) {
1640 				t = tmax;
1641 			} else {
1642 				t = k - bias;
1643 			}
1644  	//     if digit < t then break
1645 			if (digit < t) {
1646 				break;
1647 			}
1648  	//     let w = w * (base - t), fail on overflow
1649 			w *= (base - t);
1650  	//   end
1651 		}
1652  	//   let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?)
1653 		bias = adapt(i - oldi, output.length + 1, oldi == 0);
1654  	//   let n = n + i div (length(output) + 1), fail on overflow
1655 		n += i / (output.length + 1);
1656  	//   let i = i mod (length(output) + 1)
1657 		i %= (output.length + 1);
1658  	//   {if n is a basic code point then fail}
1659 		// (We aren't actually going to fail here; it's clear what this means.)
1660  	//   insert n into output at position i
1661         import std.array : insertInPlace;
1662 		(() @trusted { output.insertInPlace(i, cast(dchar)n); })();  // should be @safe but isn't marked
1663  	//   increment i
1664 		i++;
1665  	// end
1666 	}
1667 	return output.to!string;
1668 }
1669 
1670 // Lifted from punycode.js.
1671 private dchar digitToBasic(ulong digit) {
1672 	return cast(dchar)(digit + 22 + 75 * (digit < 26));
1673 }
1674 
1675 // Lifted from punycode.js.
1676 private uint basicToDigit(char c) {
1677 	auto codePoint = cast(uint)c;
1678 	if (codePoint - 48 < 10) {
1679 		return codePoint - 22;
1680 	}
1681 	if (codePoint - 65 < 26) {
1682 		return codePoint - 65;
1683 	}
1684 	if (codePoint - 97 < 26) {
1685 		return codePoint - 97;
1686 	}
1687 	return base;
1688 }
1689 
1690 unittest {
1691 	{
1692 		auto a = "b\u00FCcher";
1693 		assert(punyEncode(a) == "xn--bcher-kva");
1694 	}
1695 	{
1696 		auto a = "b\u00FCc\u00FCher";
1697 		assert(punyEncode(a) == "xn--bcher-kvab");
1698 	}
1699 	{
1700 		auto a = "ýbücher";
1701 		auto b = punyEncode(a);
1702 		assert(b == "xn--bcher-kvaf", b);
1703 	}
1704 
1705 	{
1706 		auto a = "mañana";
1707 		assert(punyEncode(a) == "xn--maana-pta");
1708 	}
1709 
1710 	{
1711 		auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
1712 			~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F";
1713 		auto b = punyEncode(a);
1714 		assert(b == "xn--egbpdaj6bu4bxfgehfvwxn", b);
1715 	}
1716 	import std.stdio;
1717 }
1718 
1719 unittest {
1720 	{
1721 		auto b = punyDecode("xn--egbpdaj6bu4bxfgehfvwxn");
1722 		assert(b == "ليهمابتكلموشعربي؟", b);
1723 	}
1724 	{
1725 		assert(punyDecode("xn--maana-pta") == "mañana");
1726 	}
1727 }
1728 
1729 unittest {
1730 	import std.string, std.algorithm, std.array, std.range;
1731 	{
1732 		auto domain = "xn--m3h.xn--n3h.com";
1733 		auto decodedDomain = domain.splitter(".").map!(punyDecode).join(".");
1734 		assert(decodedDomain == "☂.☃.com", decodedDomain);
1735 	}
1736 	{
1737 		auto domain = "☂.☃.com";
1738 		auto decodedDomain = domain.splitter(".").map!(punyEncode).join(".");
1739 		assert(decodedDomain == "xn--m3h.xn--n3h.com", decodedDomain);
1740 	}
1741 }
1742