1 /** 2 * A URL handling library. 3 * 4 * URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional 5 * elements like port, path, username, and password. 6 * 7 * This module aims to make it simple to muck about with them. 8 * 9 * Example usage: 10 * --- 11 * auto url = "ssh://me:password@192.168.0.8/".parseURL; 12 * auto files = system("ssh", url.toString, "ls").splitLines; 13 * foreach (file; files) { 14 * system("scp", url ~ file, "."); 15 * } 16 * --- 17 * 18 * License: The MIT license. 19 */ 20 module url; 21 22 import std.conv; 23 import std.string; 24 25 pure: 26 @safe: 27 28 /// An exception thrown when something bad happens with URLs. 29 class URLException : Exception 30 { 31 this(string msg) pure { super(msg); } 32 } 33 34 /** 35 * A mapping from schemes to their default ports. 36 * 37 * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to 38 * use even if they use ports. Entries here should be treated as best guesses. 39 */ 40 enum ushort[string] schemeToDefaultPort = [ 41 "aaa": 3868, 42 "aaas": 5658, 43 "acap": 674, 44 "amqp": 5672, 45 "cap": 1026, 46 "coap": 5683, 47 "coaps": 5684, 48 "dav": 443, 49 "dict": 2628, 50 "ftp": 21, 51 "git": 9418, 52 "go": 1096, 53 "gopher": 70, 54 "http": 80, 55 "https": 443, 56 "ws": 80, 57 "wss": 443, 58 "iac": 4569, 59 "icap": 1344, 60 "imap": 143, 61 "ipp": 631, 62 "ipps": 631, // yes, they're both mapped to port 631 63 "irc": 6667, // De facto default port, not the IANA reserved port. 64 "ircs": 6697, 65 "iris": 702, // defaults to iris.beep 66 "iris.beep": 702, 67 "iris.lwz": 715, 68 "iris.xpc": 713, 69 "iris.xpcs": 714, 70 "jabber": 5222, // client-to-server 71 "ldap": 389, 72 "ldaps": 636, 73 "msrp": 2855, 74 "msrps": 2855, 75 "mtqp": 1038, 76 "mupdate": 3905, 77 "news": 119, 78 "nfs": 2049, 79 "pop": 110, 80 "redis": 6379, 81 "reload": 6084, 82 "rsync": 873, 83 "rtmfp": 1935, 84 "rtsp": 554, 85 "shttp": 80, 86 "sieve": 4190, 87 "sip": 5060, 88 "sips": 5061, 89 "smb": 445, 90 "smtp": 25, 91 "snews": 563, 92 "snmp": 161, 93 "soap.beep": 605, 94 "ssh": 22, 95 "stun": 3478, 96 "stuns": 5349, 97 "svn": 3690, 98 "teamspeak": 9987, 99 "telnet": 23, 100 "tftp": 69, 101 "tip": 3372, 102 ]; 103 104 /** 105 * A collection of query parameters. 106 * 107 * This is effectively a multimap of string -> strings. 108 */ 109 struct QueryParams 110 { 111 hash_t toHash() const nothrow @safe 112 { 113 return typeid(params).getHash(¶ms); 114 } 115 116 pure: 117 import std.typecons; 118 alias Tuple!(string, "key", string, "value") Param; 119 Param[] params; 120 121 @property size_t length() const { 122 return params.length; 123 } 124 125 /// Get a range over the query parameter values for the given key. 126 auto opIndex(string key) const 127 { 128 import std.algorithm.searching : find; 129 import std.algorithm.iteration : map; 130 return params.find!(x => x.key == key).map!(x => x.value); 131 } 132 133 /// Add a query parameter with the given key and value. 134 /// If one already exists, there will now be two query parameters with the given name. 135 void add(string key, string value) { 136 params ~= Param(key, value); 137 } 138 139 /// Add a query parameter with the given key and value. 140 /// If there are any existing parameters with the same key, they are removed and overwritten. 141 void overwrite(string key, string value) { 142 for (int i = 0; i < params.length; i++) { 143 if (params[i].key == key) { 144 params[i] = params[$-1]; 145 params.length--; 146 } 147 } 148 params ~= Param(key, value); 149 } 150 151 private struct QueryParamRange 152 { 153 pure: 154 size_t i; 155 const(Param)[] params; 156 bool empty() { return i >= params.length; } 157 void popFront() { i++; } 158 Param front() { return params[i]; } 159 } 160 161 /** 162 * A range over the query parameters. 163 * 164 * Usage: 165 * --- 166 * foreach (key, value; url.queryParams) {} 167 * --- 168 */ 169 auto range() const 170 { 171 return QueryParamRange(0, this.params); 172 } 173 /// ditto 174 alias range this; 175 176 /// Convert this set of query parameters into a query string. 177 string toString() const { 178 import std.array : Appender; 179 Appender!string s; 180 bool first = true; 181 foreach (tuple; this) { 182 if (!first) { 183 s ~= '&'; 184 } 185 first = false; 186 s ~= tuple.key.percentEncode; 187 if (tuple.value.length > 0) { 188 s ~= '='; 189 s ~= tuple.value.percentEncode; 190 } 191 } 192 return s.data; 193 } 194 195 /// Clone this set of query parameters. 196 QueryParams dup() 197 { 198 QueryParams other = this; 199 other.params = params.dup; 200 return other; 201 } 202 203 int opCmp(const ref QueryParams other) const 204 { 205 for (int i = 0; i < params.length && i < other.params.length; i++) 206 { 207 auto c = cmp(params[i].key, other.params[i].key); 208 if (c != 0) return c; 209 c = cmp(params[i].value, other.params[i].value); 210 if (c != 0) return c; 211 } 212 if (params.length > other.params.length) return 1; 213 if (params.length < other.params.length) return -1; 214 return 0; 215 } 216 } 217 218 /** 219 * A Unique Resource Locator. 220 * 221 * URLs can be parsed (see parseURL) and implicitly convert to strings. 222 */ 223 struct URL 224 { 225 hash_t toHash() const @safe nothrow 226 { 227 return asTuple().toHash(); 228 } 229 230 pure: 231 /// The URL scheme. For instance, ssh, ftp, or https. 232 string scheme; 233 234 /// The username in this URL. Usually absent. If present, there will also be a password. 235 string user; 236 237 /// The password in this URL. Usually absent. 238 string pass; 239 240 /// The hostname. 241 string host; 242 243 /** 244 * The port. 245 * 246 * This is inferred from the scheme if it isn't present in the URL itself. 247 * If the scheme is not known and the port is not present, the port will be given as 0. 248 * For some schemes, port will not be sensible -- for instance, file or chrome-extension. 249 * 250 * If you explicitly need to detect whether the user provided a port, check the providedPort 251 * field. 252 */ 253 @property ushort port() const nothrow 254 { 255 if (providedPort != 0) { 256 return providedPort; 257 } 258 if (auto p = scheme in schemeToDefaultPort) { 259 return *p; 260 } 261 return 0; 262 } 263 264 /** 265 * Set the port. 266 * 267 * This sets the providedPort field and is provided for convenience. 268 */ 269 @property ushort port(ushort value) nothrow 270 { 271 return providedPort = value; 272 } 273 274 /// The port that was explicitly provided in the URL. 275 ushort providedPort; 276 277 /** 278 * The path. 279 * 280 * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is 281 * "/news/story/17774". 282 */ 283 string path; 284 285 /** 286 * The query parameters associated with this URL. 287 */ 288 QueryParams queryParams; 289 290 /** 291 * The fragment. In web documents, this typically refers to an anchor element. 292 * For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2". 293 */ 294 string fragment; 295 296 /** 297 * Convert this URL to a string. 298 * The string is properly formatted and usable for, eg, a web request. 299 */ 300 string toString() const 301 { 302 return toString(false); 303 } 304 305 /** 306 * Convert this URL to a string. 307 * 308 * The string is intended to be human-readable rather than machine-readable. 309 */ 310 string toHumanReadableString() const 311 { 312 return toString(true); 313 } 314 315 /// 316 unittest 317 { 318 auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL; 319 assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye", url.toString); 320 assert(url.toHumanReadableString == "https://☂.☃.org/?hi=bye", url.toString); 321 } 322 323 unittest 324 { 325 assert("http://example.org/some_path".parseURL.toHumanReadableString == 326 "http://example.org/some_path"); 327 } 328 329 /** 330 * Convert the path and query string of this URL to a string. 331 */ 332 string toPathAndQueryString() const 333 { 334 if (queryParams.length > 0) 335 { 336 return path ~ '?' ~ queryParams.toString; 337 } 338 return path; 339 } 340 341 /// 342 unittest 343 { 344 auto u = "http://example.org/index?page=12".parseURL; 345 auto pathAndQuery = u.toPathAndQueryString(); 346 assert(pathAndQuery == "/index?page=12", pathAndQuery); 347 } 348 349 private string toString(bool humanReadable) const 350 { 351 import std.array : Appender; 352 Appender!string s; 353 s ~= scheme; 354 s ~= "://"; 355 if (user) { 356 s ~= humanReadable ? user : user.percentEncode; 357 s ~= ":"; 358 s ~= humanReadable ? pass : pass.percentEncode; 359 s ~= "@"; 360 } 361 s ~= humanReadable ? host : host.toPuny; 362 if (providedPort) { 363 if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) { 364 s ~= ":"; 365 s ~= providedPort.to!string; 366 } 367 } 368 string p = path; 369 if (p.length == 0 || p == "/") { 370 s ~= '/'; 371 } else { 372 if (humanReadable) { 373 s ~= p; 374 } else { 375 if (p[0] == '/') { 376 p = p[1..$]; 377 } 378 foreach (part; p.split('/')) { 379 s ~= '/'; 380 s ~= part.percentEncode; 381 } 382 } 383 } 384 if (queryParams.length) { 385 s ~= '?'; 386 s ~= queryParams.toString; 387 } if (fragment) { 388 s ~= '#'; 389 s ~= fragment.percentEncode; 390 } 391 return s.data; 392 } 393 394 /// Implicitly convert URLs to strings. 395 alias toString this; 396 397 /** 398 Compare two URLs. 399 400 I tried to make the comparison produce a sort order that seems natural, so it's not identical 401 to sorting based on .toString(). For instance, username/password have lower priority than 402 host. The scheme has higher priority than port but lower than host. 403 404 While the output of this is guaranteed to provide a total ordering, and I've attempted to make 405 it human-friendly, it isn't guaranteed to be consistent between versions. The implementation 406 and its results can change without a minor version increase. 407 */ 408 int opCmp(const URL other) const 409 { 410 return asTuple.opCmp(other.asTuple); 411 } 412 413 private auto asTuple() const nothrow 414 { 415 import std.typecons : tuple; 416 return tuple(host, scheme, port, user, pass, path, queryParams); 417 } 418 419 /// Equality checks. 420 bool opEquals(string other) const 421 { 422 URL o; 423 if (!tryParseURL(other, o)) 424 { 425 return false; 426 } 427 return asTuple() == o.asTuple(); 428 } 429 430 /// Ditto 431 bool opEquals(ref const URL other) const 432 { 433 return asTuple() == other.asTuple(); 434 } 435 436 /// Ditto 437 bool opEquals(const URL other) const 438 { 439 return asTuple() == other.asTuple(); 440 } 441 442 unittest 443 { 444 import std.algorithm, std.array, std.format; 445 assert("http://example.org/some_path".parseURL > "http://example.org/other_path".parseURL); 446 alias sorted = std.algorithm.sort; 447 auto parsedURLs = 448 [ 449 "http://example.org/some_path", 450 "http://example.org:81/other_path", 451 "http://example.org/other_path", 452 "https://example.org/first_path", 453 "http://example.xyz/other_other_path", 454 "http://me:secret@blog.ikeran.org/wp_admin", 455 ].map!(x => x.parseURL).array; 456 auto urls = sorted(parsedURLs).map!(x => x.toHumanReadableString).array; 457 auto expected = 458 [ 459 "http://me:secret@blog.ikeran.org/wp_admin", 460 "http://example.org/other_path", 461 "http://example.org/some_path", 462 "http://example.org:81/other_path", 463 "https://example.org/first_path", 464 "http://example.xyz/other_other_path", 465 ]; 466 assert(cmp(urls, expected) == 0, "expected:\n%s\ngot:\n%s".format(expected, urls)); 467 } 468 469 unittest 470 { 471 auto a = "http://x.org/a?b=c".parseURL; 472 auto b = "http://x.org/a?d=e".parseURL; 473 auto c = "http://x.org/a?b=a".parseURL; 474 assert(a < b); 475 assert(c < b); 476 assert(c < a); 477 } 478 479 /** 480 * The append operator (~). 481 * 482 * The append operator for URLs returns a new URL with the given string appended as a path 483 * element to the URL's path. It only adds new path elements (or sequences of path elements). 484 * 485 * Don't worry about path separators; whether you include them or not, it will just work. 486 * 487 * Query elements are copied. 488 * 489 * Examples: 490 * --- 491 * auto random = "http://testdata.org/random".parseURL; 492 * auto randInt = random ~ "int"; 493 * writeln(randInt); // prints "http://testdata.org/random/int" 494 * --- 495 */ 496 URL opBinary(string op : "~")(string subsequentPath) { 497 URL other = this; 498 other ~= subsequentPath; 499 other.queryParams = queryParams.dup; 500 return other; 501 } 502 503 /** 504 * The append-in-place operator (~=). 505 * 506 * The append operator for URLs adds a path element to this URL. It only adds new path elements 507 * (or sequences of path elements). 508 * 509 * Don't worry about path separators; whether you include them or not, it will just work. 510 * 511 * Examples: 512 * --- 513 * auto random = "http://testdata.org/random".parseURL; 514 * random ~= "int"; 515 * writeln(random); // prints "http://testdata.org/random/int" 516 * --- 517 */ 518 URL opOpAssign(string op : "~")(string subsequentPath) { 519 if (path.endsWith("/")) { 520 if (subsequentPath.startsWith("/")) { 521 path ~= subsequentPath[1..$]; 522 } else { 523 path ~= subsequentPath; 524 } 525 } else { 526 if (!subsequentPath.startsWith("/")) { 527 path ~= '/'; 528 } 529 path ~= subsequentPath; 530 } 531 return this; 532 } 533 534 /** 535 * Convert a relative URL to an absolute URL. 536 * 537 * This is designed so that you can scrape a webpage and quickly convert links within the 538 * page to URLs you can actually work with, but you're clever; I'm sure you'll find more uses 539 * for it. 540 * 541 * It's biased toward HTTP family URLs; as one quirk, "//" is interpreted as "same scheme, 542 * different everything else", which might not be desirable for all schemes. 543 * 544 * This only handles URLs, not URIs; if you pass in 'mailto:bob.dobbs@subgenius.org', for 545 * instance, this will give you our best attempt to parse it as a URL. 546 * 547 * Examples: 548 * --- 549 * auto base = "https://example.org/passworddb?secure=false".parseURL; 550 * 551 * // Download https://example.org/passworddb/by-username/dhasenan 552 * download(base.resolve("by-username/dhasenan")); 553 * 554 * // Download https://example.org/static/style.css 555 * download(base.resolve("/static/style.css")); 556 * 557 * // Download https://cdn.example.net/jquery.js 558 * download(base.resolve("https://cdn.example.net/jquery.js")); 559 * --- 560 */ 561 URL resolve(string other) 562 { 563 if (other.length == 0) return this; 564 if (other[0] == '/') 565 { 566 if (other.length > 1 && other[1] == '/') 567 { 568 // Uncommon syntax: a link like "//wikimedia.org" means "same scheme, switch URL" 569 return parseURL(this.scheme ~ ':' ~ other); 570 } 571 } 572 else 573 { 574 auto schemeSep = other.indexOf("://"); 575 if (schemeSep >= 0 && schemeSep < other.indexOf("/")) 576 // separate URL 577 { 578 return other.parseURL; 579 } 580 } 581 582 URL ret = this; 583 ret.path = ""; 584 ret.queryParams = ret.queryParams.init; 585 if (other[0] != '/') 586 { 587 // relative to something 588 if (!this.path.length) 589 { 590 // nothing to be relative to 591 other = "/" ~ other; 592 } 593 else if (this.path[$-1] == '/') 594 { 595 // directory-style path for the current thing 596 // resolve relative to this directory 597 other = this.path ~ other; 598 } 599 else 600 { 601 // this is a file-like thing 602 // find the 'directory' and relative to that 603 other = this.path[0..this.path.lastIndexOf('/') + 1] ~ other; 604 } 605 } 606 // collapse /foo/../ to / 607 if (other.indexOf("/../") >= 0) 608 { 609 import std.array : Appender, array; 610 import std.string : split; 611 import std.algorithm.iteration : joiner, filter; 612 string[] parts = other.split('/'); 613 for (int i = 0; i < parts.length; i++) 614 { 615 if (parts[i] == "..") 616 { 617 for (int j = i - 1; j >= 0; j--) 618 { 619 if (parts[j] != null) 620 { 621 parts[j] = null; 622 parts[i] = null; 623 break; 624 } 625 } 626 } 627 } 628 other = "/" ~ parts.filter!(x => x != null).joiner("/").to!string; 629 } 630 parsePathAndQuery(ret, other); 631 return ret; 632 } 633 634 unittest 635 { 636 auto a = "http://alcyius.com/dndtools/index.html".parseURL; 637 auto b = a.resolve("contacts/index.html"); 638 assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html"); 639 } 640 641 unittest 642 { 643 auto a = "http://alcyius.com/dndtools/index.html?a=b".parseURL; 644 auto b = a.resolve("contacts/index.html?foo=bar"); 645 assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html?foo=bar"); 646 } 647 648 unittest 649 { 650 auto a = "http://alcyius.com/dndtools/index.html".parseURL; 651 auto b = a.resolve("../index.html"); 652 assert(b.toString == "http://alcyius.com/index.html", b.toString); 653 } 654 655 unittest 656 { 657 auto a = "http://alcyius.com/dndtools/foo/bar/index.html".parseURL; 658 auto b = a.resolve("../index.html"); 659 assert(b.toString == "http://alcyius.com/dndtools/foo/index.html", b.toString); 660 } 661 } 662 663 /** 664 * Parse a URL from a string. 665 * 666 * This attempts to parse a wide range of URLs as people might actually type them. Some mistakes 667 * may be made. However, any URL in a correct format will be parsed correctly. 668 */ 669 bool tryParseURL(string value, out URL url) 670 { 671 url = URL.init; 672 // scheme:[//[user:password@]host[:port]][/]path[?query][#fragment] 673 // Scheme is optional in common use. We infer 'http' if it's not given. 674 auto i = value.indexOf("//"); 675 if (i > -1) { 676 if (i > 1) { 677 url.scheme = value[0..i-1]; 678 } 679 value = value[i+2 .. $]; 680 } else { 681 url.scheme = "http"; 682 } 683 // Check for an ipv6 hostname. 684 // [user:password@]host[:port]][/]path[?query][#fragment 685 i = value.indexOfAny([':', '/', '[']); 686 if (i == -1) { 687 // Just a hostname. 688 url.host = value.fromPuny; 689 return true; 690 } 691 692 if (value[i] == ':') { 693 // This could be between username and password, or it could be between host and port. 694 auto j = value.indexOfAny(['@', '/']); 695 if (j > -1 && value[j] == '@') { 696 try { 697 url.user = value[0..i].percentDecode; 698 url.pass = value[i+1 .. j].percentDecode; 699 } catch (URLException) { 700 return false; 701 } 702 value = value[j+1 .. $]; 703 } 704 } 705 706 // It's trying to be a host/port, not a user/pass. 707 i = value.indexOfAny([':', '/', '[']); 708 if (i == -1) { 709 url.host = value.fromPuny; 710 return true; 711 } 712 713 // Find the hostname. It's either an ipv6 address (which has special rules) or not (which doesn't 714 // have special rules). -- The main sticking point is that ipv6 addresses have colons, which we 715 // handle specially, and are offset with square brackets. 716 if (value[i] == '[') { 717 auto j = value[i..$].indexOf(']'); 718 if (j < 0) { 719 // unterminated ipv6 addr 720 return false; 721 } 722 // includes square brackets 723 url.host = value[i .. i+j+1]; 724 value = value[i+j+1 .. $]; 725 if (value.length == 0) { 726 // read to end of string; we finished parse 727 return true; 728 } 729 if (value[0] != ':' && value[0] != '?' && value[0] != '/') { 730 return false; 731 } 732 } else { 733 // Normal host. 734 url.host = value[0..i].fromPuny; 735 value = value[i .. $]; 736 } 737 738 if (value[0] == ':') { 739 auto end = value.indexOf('/'); 740 if (end == -1) { 741 end = value.length; 742 } 743 try { 744 url.port = value[1 .. end].to!ushort; 745 } catch (ConvException) { 746 return false; 747 } 748 value = value[end .. $]; 749 if (value.length == 0) { 750 return true; 751 } 752 } 753 return parsePathAndQuery(url, value); 754 } 755 756 private bool parsePathAndQuery(ref URL url, string value) 757 { 758 auto i = value.indexOfAny("?#"); 759 if (i == -1) 760 { 761 url.path = value.percentDecode; 762 return true; 763 } 764 765 try 766 { 767 url.path = value[0..i].percentDecode; 768 } 769 catch (URLException) 770 { 771 return false; 772 } 773 774 auto c = value[i]; 775 value = value[i + 1 .. $]; 776 if (c == '?') 777 { 778 i = value.indexOf('#'); 779 string query; 780 if (i < 0) 781 { 782 query = value; 783 value = null; 784 } 785 else 786 { 787 query = value[0..i]; 788 value = value[i + 1 .. $]; 789 } 790 auto queries = query.split('&'); 791 foreach (q; queries) 792 { 793 auto j = q.indexOf('='); 794 string key, val; 795 if (j < 0) 796 { 797 key = q; 798 } 799 else 800 { 801 key = q[0..j]; 802 val = q[j + 1 .. $]; 803 } 804 try 805 { 806 key = key.percentDecode; 807 val = val.percentDecode; 808 } 809 catch (URLException) 810 { 811 return false; 812 } 813 url.queryParams.add(key, val); 814 } 815 } 816 817 try 818 { 819 url.fragment = value.percentDecode; 820 } 821 catch (URLException) 822 { 823 return false; 824 } 825 826 return true; 827 } 828 829 unittest { 830 { 831 // Basic. 832 URL url; 833 with (url) { 834 scheme = "https"; 835 host = "example.org"; 836 path = "/foo/bar"; 837 queryParams.add("hello", "world"); 838 queryParams.add("gibe", "clay"); 839 fragment = "frag"; 840 } 841 assert( 842 // Not sure what order it'll come out in. 843 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 844 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 845 url.toString); 846 } 847 { 848 // Percent encoded. 849 URL url; 850 with (url) { 851 scheme = "https"; 852 host = "example.org"; 853 path = "/f☃o"; 854 queryParams.add("❄", "❀"); 855 queryParams.add("[", "]"); 856 fragment = "ş"; 857 } 858 assert( 859 // Not sure what order it'll come out in. 860 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 861 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 862 url.toString); 863 } 864 { 865 // Port, user, pass. 866 URL url; 867 with (url) { 868 scheme = "https"; 869 host = "example.org"; 870 user = "dhasenan"; 871 pass = "itsasecret"; 872 port = 17; 873 } 874 assert( 875 url.toString == "https://dhasenan:itsasecret@example.org:17/", 876 url.toString); 877 } 878 { 879 // Query with no path. 880 URL url; 881 with (url) { 882 scheme = "https"; 883 host = "example.org"; 884 queryParams.add("hi", "bye"); 885 } 886 assert( 887 url.toString == "https://example.org/?hi=bye", 888 url.toString); 889 } 890 } 891 892 unittest 893 { 894 auto url = "//foo/bar".parseURL; 895 assert(url.host == "foo", "expected host foo, got " ~ url.host); 896 assert(url.path == "/bar"); 897 } 898 899 unittest 900 { 901 import std.stdio : writeln; 902 auto url = "file:///foo/bar".parseURL; 903 assert(url.host == null); 904 assert(url.port == 0); 905 assert(url.scheme == "file"); 906 assert(url.path == "/foo/bar"); 907 assert(url.toString == "file:///foo/bar"); 908 assert(url.queryParams.empty); 909 assert(url.fragment == null); 910 } 911 912 unittest 913 { 914 // ipv6 hostnames! 915 { 916 // full range of data 917 auto url = parseURL("https://bob:secret@[::1]:2771/foo/bar"); 918 assert(url.scheme == "https", url.scheme); 919 assert(url.user == "bob", url.user); 920 assert(url.pass == "secret", url.pass); 921 assert(url.host == "[::1]", url.host); 922 assert(url.port == 2771, url.port.to!string); 923 assert(url.path == "/foo/bar", url.path); 924 } 925 926 // minimal 927 { 928 auto url = parseURL("[::1]"); 929 assert(url.host == "[::1]", url.host); 930 } 931 932 // some random bits 933 { 934 auto url = parseURL("http://[::1]/foo"); 935 assert(url.scheme == "http", url.scheme); 936 assert(url.host == "[::1]", url.host); 937 assert(url.path == "/foo", url.path); 938 } 939 940 { 941 auto url = parseURL("https://[2001:0db8:0:0:0:0:1428:57ab]/?login=true#justkidding"); 942 assert(url.scheme == "https"); 943 assert(url.host == "[2001:0db8:0:0:0:0:1428:57ab]"); 944 assert(url.path == "/"); 945 assert(url.fragment == "justkidding"); 946 } 947 } 948 949 unittest 950 { 951 auto url = "localhost:5984".parseURL; 952 auto url2 = url ~ "db1"; 953 assert(url2.toString == "http://localhost:5984/db1", url2.toString); 954 auto url3 = url2 ~ "_all_docs"; 955 assert(url3.toString == "http://localhost:5984/db1/_all_docs", url3.toString); 956 } 957 958 /// 959 unittest { 960 { 961 // Basic. 962 URL url; 963 with (url) { 964 scheme = "https"; 965 host = "example.org"; 966 path = "/foo/bar"; 967 queryParams.add("hello", "world"); 968 queryParams.add("gibe", "clay"); 969 fragment = "frag"; 970 } 971 assert( 972 // Not sure what order it'll come out in. 973 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 974 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 975 url.toString); 976 } 977 { 978 // Passing an array of query values. 979 URL url; 980 with (url) { 981 scheme = "https"; 982 host = "example.org"; 983 path = "/foo/bar"; 984 queryParams.add("hello", "world"); 985 queryParams.add("hello", "aether"); 986 fragment = "frag"; 987 } 988 assert( 989 // Not sure what order it'll come out in. 990 url.toString == "https://example.org/foo/bar?hello=world&hello=aether#frag" || 991 url.toString == "https://example.org/foo/bar?hello=aether&hello=world#frag", 992 url.toString); 993 } 994 { 995 // Percent encoded. 996 URL url; 997 with (url) { 998 scheme = "https"; 999 host = "example.org"; 1000 path = "/f☃o"; 1001 queryParams.add("❄", "❀"); 1002 queryParams.add("[", "]"); 1003 fragment = "ş"; 1004 } 1005 assert( 1006 // Not sure what order it'll come out in. 1007 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 1008 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 1009 url.toString); 1010 } 1011 { 1012 // Port, user, pass. 1013 URL url; 1014 with (url) { 1015 scheme = "https"; 1016 host = "example.org"; 1017 user = "dhasenan"; 1018 pass = "itsasecret"; 1019 port = 17; 1020 } 1021 assert( 1022 url.toString == "https://dhasenan:itsasecret@example.org:17/", 1023 url.toString); 1024 } 1025 { 1026 // Query with no path. 1027 URL url; 1028 with (url) { 1029 scheme = "https"; 1030 host = "example.org"; 1031 queryParams.add("hi", "bye"); 1032 } 1033 assert( 1034 url.toString == "https://example.org/?hi=bye", 1035 url.toString); 1036 } 1037 } 1038 1039 unittest { 1040 // Percent decoding. 1041 1042 // http://#:!:@ 1043 auto urlString = "http://%23:%21%3A@example.org/%7B/%7D?%3B&%26=%3D#%23hash%EF%BF%BD"; 1044 auto url = urlString.parseURL; 1045 assert(url.user == "#"); 1046 assert(url.pass == "!:"); 1047 assert(url.host == "example.org"); 1048 assert(url.path == "/{/}"); 1049 assert(url.queryParams[";"].front == ""); 1050 assert(url.queryParams["&"].front == "="); 1051 assert(url.fragment == "#hash�"); 1052 1053 // Round trip. 1054 assert(urlString == urlString.parseURL.toString, urlString.parseURL.toString); 1055 assert(urlString == urlString.parseURL.toString.parseURL.toString); 1056 } 1057 1058 unittest { 1059 auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL; 1060 assert(url.host == "☂.☃.org", url.host); 1061 } 1062 1063 unittest { 1064 auto url = "https://☂.☃.org/?hi=bye".parseURL; 1065 assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye"); 1066 } 1067 1068 /// 1069 unittest { 1070 // There's an existing path. 1071 auto url = parseURL("http://example.org/foo"); 1072 URL url2; 1073 // No slash? Assume it needs a slash. 1074 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 1075 // With slash? Don't add another. 1076 url2 = url ~ "/bar"; 1077 assert(url2.toString == "http://example.org/foo/bar", url2.toString); 1078 url ~= "bar"; 1079 assert(url.toString == "http://example.org/foo/bar"); 1080 1081 // Path already ends with a slash; don't add another. 1082 url = parseURL("http://example.org/foo/"); 1083 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 1084 // Still don't add one even if you're appending with a slash. 1085 assert((url ~ "/bar").toString == "http://example.org/foo/bar"); 1086 url ~= "/bar"; 1087 assert(url.toString == "http://example.org/foo/bar"); 1088 1089 // No path. 1090 url = parseURL("http://example.org"); 1091 assert((url ~ "bar").toString == "http://example.org/bar"); 1092 assert((url ~ "/bar").toString == "http://example.org/bar"); 1093 url ~= "bar"; 1094 assert(url.toString == "http://example.org/bar"); 1095 1096 // Path is just a slash. 1097 url = parseURL("http://example.org/"); 1098 assert((url ~ "bar").toString == "http://example.org/bar"); 1099 assert((url ~ "/bar").toString == "http://example.org/bar"); 1100 url ~= "bar"; 1101 assert(url.toString == "http://example.org/bar", url.toString); 1102 1103 // No path, just fragment. 1104 url = "ircs://irc.freenode.com/#d".parseURL; 1105 assert(url.toString == "ircs://irc.freenode.com/#d", url.toString); 1106 } 1107 unittest 1108 { 1109 // basic resolve() 1110 { 1111 auto base = "https://example.org/this/".parseURL; 1112 assert(base.resolve("that") == "https://example.org/this/that"); 1113 assert(base.resolve("/that") == "https://example.org/that"); 1114 assert(base.resolve("//example.net/that") == "https://example.net/that"); 1115 } 1116 1117 // ensure we don't preserve query params 1118 { 1119 auto base = "https://example.org/this?query=value&other=value2".parseURL; 1120 assert(base.resolve("that") == "https://example.org/that"); 1121 assert(base.resolve("/that") == "https://example.org/that"); 1122 assert(base.resolve("tother/that") == "https://example.org/tother/that"); 1123 assert(base.resolve("//example.net/that") == "https://example.net/that"); 1124 } 1125 } 1126 1127 1128 unittest 1129 { 1130 import std.net.curl; 1131 auto url = "http://example.org".parseURL; 1132 assert(is(typeof(std.net.curl.get(url)))); 1133 } 1134 1135 /** 1136 * Parse the input string as a URL. 1137 * 1138 * Throws: 1139 * URLException if the string was in an incorrect format. 1140 */ 1141 URL parseURL(string value) { 1142 URL url; 1143 if (tryParseURL(value, url)) { 1144 return url; 1145 } 1146 throw new URLException("failed to parse URL " ~ value); 1147 } 1148 1149 /// 1150 unittest { 1151 { 1152 // Infer scheme 1153 auto u1 = parseURL("example.org"); 1154 assert(u1.scheme == "http"); 1155 assert(u1.host == "example.org"); 1156 assert(u1.path == ""); 1157 assert(u1.port == 80); 1158 assert(u1.providedPort == 0); 1159 assert(u1.fragment == ""); 1160 } 1161 { 1162 // Simple host and scheme 1163 auto u1 = parseURL("https://example.org"); 1164 assert(u1.scheme == "https"); 1165 assert(u1.host == "example.org"); 1166 assert(u1.path == ""); 1167 assert(u1.port == 443); 1168 assert(u1.providedPort == 0); 1169 } 1170 { 1171 // With path 1172 auto u1 = parseURL("https://example.org/foo/bar"); 1173 assert(u1.scheme == "https"); 1174 assert(u1.host == "example.org"); 1175 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 1176 assert(u1.port == 443); 1177 assert(u1.providedPort == 0); 1178 } 1179 { 1180 // With explicit port 1181 auto u1 = parseURL("https://example.org:1021/foo/bar"); 1182 assert(u1.scheme == "https"); 1183 assert(u1.host == "example.org"); 1184 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 1185 assert(u1.port == 1021); 1186 assert(u1.providedPort == 1021); 1187 } 1188 { 1189 // With user 1190 auto u1 = parseURL("https://bob:secret@example.org/foo/bar"); 1191 assert(u1.scheme == "https"); 1192 assert(u1.host == "example.org"); 1193 assert(u1.path == "/foo/bar"); 1194 assert(u1.port == 443); 1195 assert(u1.user == "bob"); 1196 assert(u1.pass == "secret"); 1197 } 1198 { 1199 // With user, URL-encoded 1200 auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar"); 1201 assert(u1.scheme == "https"); 1202 assert(u1.host == "example.org"); 1203 assert(u1.path == "/foo/bar"); 1204 assert(u1.port == 443); 1205 assert(u1.user == "bob!"); 1206 assert(u1.pass == "secret!?"); 1207 } 1208 { 1209 // With user and port and path 1210 auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar"); 1211 assert(u1.scheme == "https"); 1212 assert(u1.host == "example.org"); 1213 assert(u1.path == "/foo/bar"); 1214 assert(u1.port == 2210); 1215 assert(u1.user == "bob"); 1216 assert(u1.pass == "secret"); 1217 assert(u1.fragment == ""); 1218 } 1219 { 1220 // With query string 1221 auto u1 = parseURL("https://example.org/?login=true"); 1222 assert(u1.scheme == "https"); 1223 assert(u1.host == "example.org"); 1224 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 1225 assert(u1.queryParams["login"].front == "true"); 1226 assert(u1.fragment == ""); 1227 } 1228 { 1229 // With query string and fragment 1230 auto u1 = parseURL("https://example.org/?login=true#justkidding"); 1231 assert(u1.scheme == "https"); 1232 assert(u1.host == "example.org"); 1233 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 1234 assert(u1.queryParams["login"].front == "true"); 1235 assert(u1.fragment == "justkidding"); 1236 } 1237 { 1238 // With URL-encoded values 1239 auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E"); 1240 assert(u1.scheme == "https"); 1241 assert(u1.host == "example.org"); 1242 assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path); 1243 assert(u1.queryParams["❄"].front == "="); 1244 assert(u1.fragment == "^"); 1245 } 1246 } 1247 1248 unittest { 1249 assert(parseURL("http://example.org").port == 80); 1250 assert(parseURL("http://example.org:5326").port == 5326); 1251 1252 auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment"); 1253 assert(url.scheme == "redis"); 1254 assert(url.user == "admin"); 1255 assert(url.pass == "password"); 1256 1257 assert(parseURL("example.org").toString == "http://example.org/"); 1258 assert(parseURL("http://example.org:80").toString == "http://example.org/"); 1259 1260 assert(parseURL("localhost:8070").toString == "http://localhost:8070/"); 1261 } 1262 1263 /** 1264 * Percent-encode a string. 1265 * 1266 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1267 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1268 * everything else, there is percent encoding. 1269 */ 1270 string percentEncode(string raw) { 1271 // We *must* encode these characters: :/?#[]@!$&'()*+,;=" 1272 // We *can* encode any other characters. 1273 // We *should not* encode alpha, numeric, or -._~. 1274 import std.utf : encode; 1275 import std.array : Appender; 1276 Appender!string app; 1277 foreach (dchar d; raw) { 1278 if (('a' <= d && 'z' >= d) || 1279 ('A' <= d && 'Z' >= d) || 1280 ('0' <= d && '9' >= d) || 1281 d == '-' || d == '.' || d == '_' || d == '~') { 1282 app ~= d; 1283 continue; 1284 } 1285 // Something simple like a space character? Still in 7-bit ASCII? 1286 // Then we get a single-character string out of it and just encode 1287 // that one bit. 1288 // Something not in 7-bit ASCII? Then we percent-encode each octet 1289 // in the UTF-8 encoding (and hope the server understands UTF-8). 1290 char[] c; 1291 encode(c, d); 1292 auto bytes = cast(ubyte[])c; 1293 foreach (b; bytes) { 1294 app ~= format("%%%02X", b); 1295 } 1296 } 1297 return cast(string)app.data; 1298 } 1299 1300 /// 1301 unittest { 1302 assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding"); 1303 assert(percentEncode("~~--..__") == "~~--..__"); 1304 assert(percentEncode("0123456789") == "0123456789"); 1305 1306 string e; 1307 1308 e = percentEncode("☃"); 1309 assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e); 1310 } 1311 1312 /** 1313 * Percent-decode a string. 1314 * 1315 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1316 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1317 * everything else, there is percent encoding. 1318 * 1319 * This explicitly ensures that the result is a valid UTF-8 string. 1320 */ 1321 string percentDecode(string encoded) 1322 { 1323 import std.utf : validate, UTFException; 1324 auto raw = percentDecodeRaw(encoded); 1325 auto s = cast(string) raw; 1326 try 1327 { 1328 validate(s); 1329 } 1330 catch (UTFException e) 1331 { 1332 throw new URLException( 1333 "The percent-encoded data `" ~ encoded ~ "` does not represent a valid UTF-8 sequence."); 1334 } 1335 return s; 1336 } 1337 1338 /// 1339 unittest { 1340 assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding"); 1341 assert(percentDecode("~~--..__") == "~~--..__"); 1342 assert(percentDecode("0123456789") == "0123456789"); 1343 1344 string e; 1345 1346 e = percentDecode("%E2%98%83"); 1347 assert(e == "☃", "expected a snowman but got" ~ e); 1348 1349 e = percentDecode("%e2%98%83"); 1350 assert(e == "☃", "expected a snowman but got" ~ e); 1351 1352 try { 1353 // %ES is an invalid percent sequence: 'S' is not a hex digit. 1354 percentDecode("%es"); 1355 assert(false, "expected exception not thrown"); 1356 } catch (URLException) { 1357 } 1358 1359 try { 1360 percentDecode("%e"); 1361 assert(false, "expected exception not thrown"); 1362 } catch (URLException) { 1363 } 1364 } 1365 1366 /** 1367 * Percent-decode a string into a ubyte array. 1368 * 1369 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1370 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1371 * everything else, there is percent encoding. 1372 * 1373 * This yields a ubyte array and will not perform validation on the output. However, an improperly 1374 * formatted input string will result in a URLException. 1375 */ 1376 immutable(ubyte)[] percentDecodeRaw(string encoded) 1377 { 1378 // We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now. 1379 import std.array : Appender; 1380 Appender!(immutable(ubyte)[]) app; 1381 for (int i = 0; i < encoded.length; i++) { 1382 if (encoded[i] != '%') { 1383 app ~= encoded[i]; 1384 continue; 1385 } 1386 if (i >= encoded.length - 2) { 1387 throw new URLException("Invalid percent encoded value: expected two characters after " ~ 1388 "percent symbol. Error at index " ~ i.to!string); 1389 } 1390 if (isHex(encoded[i + 1]) && isHex(encoded[i + 2])) { 1391 auto b = fromHex(encoded[i + 1]); 1392 auto c = fromHex(encoded[i + 2]); 1393 app ~= cast(ubyte)((b << 4) | c); 1394 } else { 1395 throw new URLException("Invalid percent encoded value: expected two hex digits after " ~ 1396 "percent symbol. Error at index " ~ i.to!string); 1397 } 1398 i += 2; 1399 } 1400 return app.data; 1401 } 1402 1403 private bool isHex(char c) { 1404 return ('0' <= c && '9' >= c) || 1405 ('a' <= c && 'f' >= c) || 1406 ('A' <= c && 'F' >= c); 1407 } 1408 1409 private ubyte fromHex(char s) { 1410 enum caseDiff = 'a' - 'A'; 1411 if (s >= 'a' && s <= 'z') { 1412 s -= caseDiff; 1413 } 1414 return cast(ubyte)("0123456789ABCDEF".indexOf(s)); 1415 } 1416 1417 private string toPuny(string unicodeHostname) 1418 { 1419 if (unicodeHostname.length == 0) return ""; 1420 if (unicodeHostname[0] == '[') 1421 { 1422 // It's an ipv6 name. 1423 return unicodeHostname; 1424 } 1425 bool mustEncode = false; 1426 foreach (i, dchar d; unicodeHostname) { 1427 auto c = cast(uint) d; 1428 if (c > 0x80) { 1429 mustEncode = true; 1430 break; 1431 } 1432 if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) { 1433 throw new URLException( 1434 format( 1435 "domain name '%s' contains illegal character '%s' at position %s", 1436 unicodeHostname, d, i)); 1437 } 1438 } 1439 if (!mustEncode) { 1440 return unicodeHostname; 1441 } 1442 import std.algorithm.iteration : map; 1443 return unicodeHostname.split('.').map!punyEncode.join("."); 1444 } 1445 1446 private string fromPuny(string hostname) 1447 { 1448 import std.algorithm.iteration : map; 1449 return hostname.split('.').map!punyDecode.join("."); 1450 } 1451 1452 private { 1453 enum delimiter = '-'; 1454 enum marker = "xn--"; 1455 enum ulong damp = 700; 1456 enum ulong tmin = 1; 1457 enum ulong tmax = 26; 1458 enum ulong skew = 38; 1459 enum ulong base = 36; 1460 enum ulong initialBias = 72; 1461 enum dchar initialN = cast(dchar)128; 1462 1463 ulong adapt(ulong delta, ulong numPoints, bool firstTime) { 1464 if (firstTime) { 1465 delta /= damp; 1466 } else { 1467 delta /= 2; 1468 } 1469 delta += delta / numPoints; 1470 ulong k = 0; 1471 while (delta > ((base - tmin) * tmax) / 2) { 1472 delta /= (base - tmin); 1473 k += base; 1474 } 1475 return k + (((base - tmin + 1) * delta) / (delta + skew)); 1476 } 1477 } 1478 1479 /** 1480 * Encode the input string using the Punycode algorithm. 1481 * 1482 * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked 1483 * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com" 1484 * in Punycode, you will get "xn--m3h.xn--n3h.com". 1485 * 1486 * In order to puny-encode a domain name, you must split it into its components. The following will 1487 * typically suffice: 1488 * --- 1489 * auto domain = "☂.☃.com"; 1490 * auto encodedDomain = domain.splitter(".").map!(punyEncode).join("."); 1491 * --- 1492 */ 1493 string punyEncode(string input) 1494 { 1495 import std.array : Appender; 1496 ulong delta = 0; 1497 dchar n = initialN; 1498 auto i = 0; 1499 auto bias = initialBias; 1500 Appender!string output; 1501 output ~= marker; 1502 auto pushed = 0; 1503 auto codePoints = 0; 1504 foreach (dchar c; input) { 1505 codePoints++; 1506 if (c <= initialN) { 1507 output ~= c; 1508 pushed++; 1509 } 1510 } 1511 if (pushed < codePoints) { 1512 if (pushed > 0) { 1513 output ~= delimiter; 1514 } 1515 } else { 1516 // No encoding to do. 1517 return input; 1518 } 1519 bool first = true; 1520 while (pushed < codePoints) { 1521 auto best = dchar.max; 1522 foreach (dchar c; input) { 1523 if (n <= c && c < best) { 1524 best = c; 1525 } 1526 } 1527 if (best == dchar.max) { 1528 throw new URLException("failed to find a new codepoint to process during punyencode"); 1529 } 1530 delta += (best - n) * (pushed + 1); 1531 if (delta > uint.max) { 1532 // TODO better error message 1533 throw new URLException("overflow during punyencode"); 1534 } 1535 n = best; 1536 foreach (dchar c; input) { 1537 if (c < n) { 1538 delta++; 1539 } 1540 if (c == n) { 1541 ulong q = delta; 1542 auto k = base; 1543 while (true) { 1544 ulong t; 1545 if (k <= bias) { 1546 t = tmin; 1547 } else if (k >= bias + tmax) { 1548 t = tmax; 1549 } else { 1550 t = k - bias; 1551 } 1552 if (q < t) { 1553 break; 1554 } 1555 output ~= digitToBasic(t + ((q - t) % (base - t))); 1556 q = (q - t) / (base - t); 1557 k += base; 1558 } 1559 output ~= digitToBasic(q); 1560 pushed++; 1561 bias = adapt(delta, pushed, first); 1562 first = false; 1563 delta = 0; 1564 } 1565 } 1566 delta++; 1567 n++; 1568 } 1569 return cast(string)output.data; 1570 } 1571 1572 /** 1573 * Decode the input string using the Punycode algorithm. 1574 * 1575 * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked 1576 * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com" 1577 * in Punycode, you will get "xn--m3h.xn--n3h.com". 1578 * 1579 * In order to puny-decode a domain name, you must split it into its components. The following will 1580 * typically suffice: 1581 * --- 1582 * auto domain = "xn--m3h.xn--n3h.com"; 1583 * auto decodedDomain = domain.splitter(".").map!(punyDecode).join("."); 1584 * --- 1585 */ 1586 string punyDecode(string input) { 1587 if (!input.startsWith(marker)) { 1588 return input; 1589 } 1590 input = input[marker.length..$]; 1591 1592 // let n = initial_n 1593 dchar n = cast(dchar)128; 1594 1595 // let i = 0 1596 // let bias = initial_bias 1597 // let output = an empty string indexed from 0 1598 size_t i = 0; 1599 auto bias = initialBias; 1600 dchar[] output; 1601 // This reserves a bit more than necessary, but it should be more efficient overall than just 1602 // appending and inserting volo-nolo. 1603 output.reserve(input.length); 1604 1605 // consume all code points before the last delimiter (if there is one) 1606 // and copy them to output, fail on any non-basic code point 1607 // if more than zero code points were consumed then consume one more 1608 // (which will be the last delimiter) 1609 auto end = input.lastIndexOf(delimiter); 1610 if (end > -1) { 1611 foreach (dchar c; input[0..end]) { 1612 output ~= c; 1613 } 1614 input = input[end+1 .. $]; 1615 } 1616 1617 // while the input is not exhausted do begin 1618 size_t pos = 0; 1619 while (pos < input.length) { 1620 // let oldi = i 1621 // let w = 1 1622 auto oldi = i; 1623 auto w = 1; 1624 // for k = base to infinity in steps of base do begin 1625 for (ulong k = base; k < uint.max; k += base) { 1626 // consume a code point, or fail if there was none to consume 1627 // Note that the input is all ASCII, so we can simply index the input string bytewise. 1628 auto c = input[pos]; 1629 pos++; 1630 // let digit = the code point's digit-value, fail if it has none 1631 auto digit = basicToDigit(c); 1632 // let i = i + digit * w, fail on overflow 1633 i += digit * w; 1634 // let t = tmin if k <= bias {+ tmin}, or 1635 // tmax if k >= bias + tmax, or k - bias otherwise 1636 ulong t; 1637 if (k <= bias) { 1638 t = tmin; 1639 } else if (k >= bias + tmax) { 1640 t = tmax; 1641 } else { 1642 t = k - bias; 1643 } 1644 // if digit < t then break 1645 if (digit < t) { 1646 break; 1647 } 1648 // let w = w * (base - t), fail on overflow 1649 w *= (base - t); 1650 // end 1651 } 1652 // let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?) 1653 bias = adapt(i - oldi, output.length + 1, oldi == 0); 1654 // let n = n + i div (length(output) + 1), fail on overflow 1655 n += i / (output.length + 1); 1656 // let i = i mod (length(output) + 1) 1657 i %= (output.length + 1); 1658 // {if n is a basic code point then fail} 1659 // (We aren't actually going to fail here; it's clear what this means.) 1660 // insert n into output at position i 1661 import std.array : insertInPlace; 1662 (() @trusted { output.insertInPlace(i, cast(dchar)n); })(); // should be @safe but isn't marked 1663 // increment i 1664 i++; 1665 // end 1666 } 1667 return output.to!string; 1668 } 1669 1670 // Lifted from punycode.js. 1671 private dchar digitToBasic(ulong digit) { 1672 return cast(dchar)(digit + 22 + 75 * (digit < 26)); 1673 } 1674 1675 // Lifted from punycode.js. 1676 private uint basicToDigit(char c) { 1677 auto codePoint = cast(uint)c; 1678 if (codePoint - 48 < 10) { 1679 return codePoint - 22; 1680 } 1681 if (codePoint - 65 < 26) { 1682 return codePoint - 65; 1683 } 1684 if (codePoint - 97 < 26) { 1685 return codePoint - 97; 1686 } 1687 return base; 1688 } 1689 1690 unittest { 1691 { 1692 auto a = "b\u00FCcher"; 1693 assert(punyEncode(a) == "xn--bcher-kva"); 1694 } 1695 { 1696 auto a = "b\u00FCc\u00FCher"; 1697 assert(punyEncode(a) == "xn--bcher-kvab"); 1698 } 1699 { 1700 auto a = "ýbücher"; 1701 auto b = punyEncode(a); 1702 assert(b == "xn--bcher-kvaf", b); 1703 } 1704 1705 { 1706 auto a = "mañana"; 1707 assert(punyEncode(a) == "xn--maana-pta"); 1708 } 1709 1710 { 1711 auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" 1712 ~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F"; 1713 auto b = punyEncode(a); 1714 assert(b == "xn--egbpdaj6bu4bxfgehfvwxn", b); 1715 } 1716 import std.stdio; 1717 } 1718 1719 unittest { 1720 { 1721 auto b = punyDecode("xn--egbpdaj6bu4bxfgehfvwxn"); 1722 assert(b == "ليهمابتكلموشعربي؟", b); 1723 } 1724 { 1725 assert(punyDecode("xn--maana-pta") == "mañana"); 1726 } 1727 } 1728 1729 unittest { 1730 import std.string, std.algorithm, std.array, std.range; 1731 { 1732 auto domain = "xn--m3h.xn--n3h.com"; 1733 auto decodedDomain = domain.splitter(".").map!(punyDecode).join("."); 1734 assert(decodedDomain == "☂.☃.com", decodedDomain); 1735 } 1736 { 1737 auto domain = "☂.☃.com"; 1738 auto decodedDomain = domain.splitter(".").map!(punyEncode).join("."); 1739 assert(decodedDomain == "xn--m3h.xn--n3h.com", decodedDomain); 1740 } 1741 } 1742