1 /** 2 * A URL handling library. 3 * 4 * URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional 5 * elements like port, path, username, and password. 6 * 7 * This module aims to make it simple to muck about with them. 8 * 9 * Example usage: 10 * --- 11 * auto url = "ssh://me:password@192.168.0.8/".parseURL; 12 * auto files = system("ssh", url.toString, "ls").splitLines; 13 * foreach (file; files) { 14 * system("scp", url ~ file, "."); 15 * } 16 * --- 17 * 18 * License: The MIT license. 19 */ 20 module url; 21 22 import std.conv; 23 import std.string; 24 25 pure: 26 @safe: 27 28 /// An exception thrown when something bad happens with URLs. 29 class URLException : Exception 30 { 31 this(string msg) pure { super(msg); } 32 } 33 34 /** 35 * A mapping from schemes to their default ports. 36 * 37 * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to 38 * use even if they use ports. Entries here should be treated as best guesses. 39 */ 40 enum ushort[string] schemeToDefaultPort = [ 41 "aaa": 3868, 42 "aaas": 5658, 43 "acap": 674, 44 "amqp": 5672, 45 "cap": 1026, 46 "coap": 5683, 47 "coaps": 5684, 48 "dav": 443, 49 "dict": 2628, 50 "ftp": 21, 51 "git": 9418, 52 "go": 1096, 53 "gopher": 70, 54 "http": 80, 55 "https": 443, 56 "ws": 80, 57 "wss": 443, 58 "iac": 4569, 59 "icap": 1344, 60 "imap": 143, 61 "ipp": 631, 62 "ipps": 631, // yes, they're both mapped to port 631 63 "irc": 6667, // De facto default port, not the IANA reserved port. 64 "ircs": 6697, 65 "iris": 702, // defaults to iris.beep 66 "iris.beep": 702, 67 "iris.lwz": 715, 68 "iris.xpc": 713, 69 "iris.xpcs": 714, 70 "jabber": 5222, // client-to-server 71 "ldap": 389, 72 "ldaps": 636, 73 "msrp": 2855, 74 "msrps": 2855, 75 "mtqp": 1038, 76 "mupdate": 3905, 77 "news": 119, 78 "nfs": 2049, 79 "pop": 110, 80 "redis": 6379, 81 "reload": 6084, 82 "rsync": 873, 83 "rtmfp": 1935, 84 "rtsp": 554, 85 "shttp": 80, 86 "sieve": 4190, 87 "sip": 5060, 88 "sips": 5061, 89 "smb": 445, 90 "smtp": 25, 91 "snews": 563, 92 "snmp": 161, 93 "soap.beep": 605, 94 "ssh": 22, 95 "stun": 3478, 96 "stuns": 5349, 97 "svn": 3690, 98 "teamspeak": 9987, 99 "telnet": 23, 100 "tftp": 69, 101 "tip": 3372, 102 ]; 103 104 /** 105 * A collection of query parameters. 106 * 107 * This is effectively a multimap of string -> strings. 108 */ 109 struct QueryParams 110 { 111 pure: 112 import std.typecons; 113 alias Tuple!(string, "key", string, "value") Param; 114 Param[] params; 115 116 @property size_t length() const { 117 return params.length; 118 } 119 120 /// Get a range over the query parameter values for the given key. 121 auto opIndex(string key) const 122 { 123 import std.algorithm.searching : find; 124 import std.algorithm.iteration : map; 125 return params.find!(x => x.key == key).map!(x => x.value); 126 } 127 128 /// Add a query parameter with the given key and value. 129 /// If one already exists, there will now be two query parameters with the given name. 130 void add(string key, string value) { 131 params ~= Param(key, value); 132 } 133 134 /// Add a query parameter with the given key and value. 135 /// If there are any existing parameters with the same key, they are removed and overwritten. 136 void overwrite(string key, string value) { 137 for (int i = 0; i < params.length; i++) { 138 if (params[i].key == key) { 139 params[i] = params[$-1]; 140 params.length--; 141 } 142 } 143 params ~= Param(key, value); 144 } 145 146 private struct QueryParamRange 147 { 148 pure: 149 size_t i; 150 const(Param)[] params; 151 bool empty() { return i >= params.length; } 152 void popFront() { i++; } 153 Param front() { return params[i]; } 154 } 155 156 /** 157 * A range over the query parameters. 158 * 159 * Usage: 160 * --- 161 * foreach (key, value; url.queryParams) {} 162 * --- 163 */ 164 auto range() const 165 { 166 return QueryParamRange(0, this.params); 167 } 168 /// ditto 169 alias range this; 170 171 /// Convert this set of query parameters into a query string. 172 string toString() const { 173 import std.array : Appender; 174 Appender!string s; 175 bool first = true; 176 foreach (tuple; this) { 177 if (!first) { 178 s ~= '&'; 179 } 180 first = false; 181 s ~= tuple.key.percentEncode; 182 if (tuple.value.length > 0) { 183 s ~= '='; 184 s ~= tuple.value.percentEncode; 185 } 186 } 187 return s.data; 188 } 189 190 /// Clone this set of query parameters. 191 QueryParams dup() { 192 QueryParams other = this; 193 other.params = params.dup; 194 return other; 195 } 196 } 197 198 /** 199 * A Unique Resource Locator. 200 * 201 * URLs can be parsed (see parseURL) and implicitly convert to strings. 202 */ 203 struct URL 204 { 205 pure: 206 /// The URL scheme. For instance, ssh, ftp, or https. 207 string scheme; 208 209 /// The username in this URL. Usually absent. If present, there will also be a password. 210 string user; 211 212 /// The password in this URL. Usually absent. 213 string pass; 214 215 /// The hostname. 216 string host; 217 218 /** 219 * The port. 220 * 221 * This is inferred from the scheme if it isn't present in the URL itself. 222 * If the scheme is not known and the port is not present, the port will be given as 0. 223 * For some schemes, port will not be sensible -- for instance, file or chrome-extension. 224 * 225 * If you explicitly need to detect whether the user provided a port, check the providedPort 226 * field. 227 */ 228 @property ushort port() const 229 { 230 if (providedPort != 0) { 231 return providedPort; 232 } 233 if (auto p = scheme in schemeToDefaultPort) { 234 return *p; 235 } 236 return 0; 237 } 238 239 /** 240 * Set the port. 241 * 242 * This sets the providedPort field and is provided for convenience. 243 */ 244 @property ushort port(ushort value) 245 { 246 return providedPort = value; 247 } 248 249 /// The port that was explicitly provided in the URL. 250 ushort providedPort; 251 252 /** 253 * The path. 254 * 255 * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is 256 * "/news/story/17774". 257 */ 258 string path; 259 260 /** 261 * The query parameters associated with this URL. 262 */ 263 QueryParams queryParams; 264 265 /** 266 * The fragment. In web documents, this typically refers to an anchor element. 267 * For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2". 268 */ 269 string fragment; 270 271 /** 272 * Convert this URL to a string. 273 * The string is properly formatted and usable for, eg, a web request. 274 */ 275 string toString() const 276 { 277 return toString(false); 278 } 279 280 /** 281 * Convert this URL to a string. 282 * 283 * The string is intended to be human-readable rather than machine-readable. 284 */ 285 string toHumanReadableString() const 286 { 287 return toString(true); 288 } 289 290 /// 291 unittest 292 { 293 auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL; 294 assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye", url.toString); 295 assert(url.toHumanReadableString == "https://☂.☃.org/?hi=bye", url.toString); 296 } 297 298 unittest 299 { 300 assert("http://example.org/some_path".parseURL.toHumanReadableString == 301 "http://example.org/some_path"); 302 } 303 304 private string toString(bool humanReadable) const 305 { 306 import std.array : Appender; 307 Appender!string s; 308 s ~= scheme; 309 s ~= "://"; 310 if (user) { 311 s ~= humanReadable ? user : user.percentEncode; 312 s ~= ":"; 313 s ~= humanReadable ? pass : pass.percentEncode; 314 s ~= "@"; 315 } 316 s ~= humanReadable ? host : host.toPuny; 317 if (providedPort) { 318 if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) { 319 s ~= ":"; 320 s ~= providedPort.to!string; 321 } 322 } 323 string p = path; 324 if (p.length == 0 || p == "/") { 325 s ~= '/'; 326 } else { 327 if (humanReadable) { 328 s ~= p; 329 } else { 330 if (p[0] == '/') { 331 p = p[1..$]; 332 } 333 foreach (part; p.split('/')) { 334 s ~= '/'; 335 s ~= part.percentEncode; 336 } 337 } 338 } 339 if (queryParams.length) { 340 s ~= '?'; 341 s ~= queryParams.toString; 342 } if (fragment) { 343 s ~= '#'; 344 s ~= fragment.percentEncode; 345 } 346 return s.data; 347 } 348 349 /// Implicitly convert URLs to strings. 350 alias toString this; 351 352 /** 353 Compare two URLs. 354 355 I tried to make the comparison produce a sort order that seems natural, so it's not identical 356 to sorting based on .toString(). For instance, username/password have lower priority than 357 host. The scheme has higher priority than port but lower than host. 358 359 While the output of this is guaranteed to provide a total ordering, and I've attempted to make 360 it human-friendly, it isn't guaranteed to be consistent between versions. The implementation 361 and its results can change without a minor version increase. 362 */ 363 int opCmp(const URL other) const 364 { 365 return asTuple.opCmp(other.asTuple); 366 } 367 368 private auto asTuple() const 369 { 370 import std.typecons : tuple; 371 return tuple(host, scheme, port, user, pass, path); 372 } 373 374 bool opEquals(string other) const 375 { 376 URL o; 377 if (!tryParseURL(other, o)) 378 { 379 return false; 380 } 381 return asTuple() == o.asTuple(); 382 } 383 384 bool opEquals(ref const URL other) const 385 { 386 return asTuple() == other.asTuple(); 387 } 388 389 bool opEquals(const URL other) const 390 { 391 return asTuple() == other.asTuple(); 392 } 393 394 unittest 395 { 396 import std.algorithm, std.array, std.format; 397 assert("http://example.org/some_path".parseURL > "http://example.org/other_path".parseURL); 398 alias sorted = std.algorithm.sort; 399 auto parsedURLs = 400 [ 401 "http://example.org/some_path", 402 "http://example.org:81/other_path", 403 "http://example.org/other_path", 404 "https://example.org/first_path", 405 "http://example.xyz/other_other_path", 406 "http://me:secret@blog.ikeran.org/wp_admin", 407 ].map!(x => x.parseURL).array; 408 auto urls = sorted(parsedURLs).map!(x => x.toHumanReadableString).array; 409 auto expected = 410 [ 411 "http://me:secret@blog.ikeran.org/wp_admin", 412 "http://example.org/other_path", 413 "http://example.org/some_path", 414 "http://example.org:81/other_path", 415 "https://example.org/first_path", 416 "http://example.xyz/other_other_path", 417 ]; 418 assert(cmp(urls, expected) == 0, "expected:\n%s\ngot:\n%s".format(expected, urls)); 419 } 420 421 /** 422 * The append operator (~). 423 * 424 * The append operator for URLs returns a new URL with the given string appended as a path 425 * element to the URL's path. It only adds new path elements (or sequences of path elements). 426 * 427 * Don't worry about path separators; whether you include them or not, it will just work. 428 * 429 * Query elements are copied. 430 * 431 * Examples: 432 * --- 433 * auto random = "http://testdata.org/random".parseURL; 434 * auto randInt = random ~ "int"; 435 * writeln(randInt); // prints "http://testdata.org/random/int" 436 * --- 437 */ 438 URL opBinary(string op : "~")(string subsequentPath) { 439 URL other = this; 440 other ~= subsequentPath; 441 other.queryParams = queryParams.dup; 442 return other; 443 } 444 445 /** 446 * The append-in-place operator (~=). 447 * 448 * The append operator for URLs adds a path element to this URL. It only adds new path elements 449 * (or sequences of path elements). 450 * 451 * Don't worry about path separators; whether you include them or not, it will just work. 452 * 453 * Examples: 454 * --- 455 * auto random = "http://testdata.org/random".parseURL; 456 * random ~= "int"; 457 * writeln(random); // prints "http://testdata.org/random/int" 458 * --- 459 */ 460 URL opOpAssign(string op : "~")(string subsequentPath) { 461 if (path.endsWith("/")) { 462 if (subsequentPath.startsWith("/")) { 463 path ~= subsequentPath[1..$]; 464 } else { 465 path ~= subsequentPath; 466 } 467 } else { 468 if (!subsequentPath.startsWith("/")) { 469 path ~= '/'; 470 } 471 path ~= subsequentPath; 472 } 473 return this; 474 } 475 476 /** 477 * Convert a relative URL to an absolute URL. 478 * 479 * This is designed so that you can scrape a webpage and quickly convert links within the 480 * page to URLs you can actually work with, but you're clever; I'm sure you'll find more uses 481 * for it. 482 * 483 * It's biased toward HTTP family URLs; as one quirk, "//" is interpreted as "same scheme, 484 * different everything else", which might not be desirable for all schemes. 485 * 486 * This only handles URLs, not URIs; if you pass in 'mailto:bob.dobbs@subgenius.org', for 487 * instance, this will give you our best attempt to parse it as a URL. 488 * 489 * Examples: 490 * --- 491 * auto base = "https://example.org/passworddb?secure=false".parseURL; 492 * 493 * // Download https://example.org/passworddb/by-username/dhasenan 494 * download(base.resolve("by-username/dhasenan")); 495 * 496 * // Download https://example.org/static/style.css 497 * download(base.resolve("/static/style.css")); 498 * 499 * // Download https://cdn.example.net/jquery.js 500 * download(base.resolve("https://cdn.example.net/jquery.js")); 501 * --- 502 */ 503 URL resolve(string other) 504 { 505 if (other.length == 0) return this; 506 if (other[0] == '/') 507 { 508 if (other.length > 1 && other[1] == '/') 509 { 510 // Uncommon syntax: a link like "//wikimedia.org" means "same scheme, switch URL" 511 return parseURL(this.scheme ~ ':' ~ other); 512 } 513 } 514 else 515 { 516 auto schemeSep = other.indexOf("://"); 517 if (schemeSep >= 0 && schemeSep < other.indexOf("/")) 518 // separate URL 519 { 520 return other.parseURL; 521 } 522 } 523 524 URL ret = this; 525 ret.path = ""; 526 ret.queryParams = ret.queryParams.init; 527 if (other[0] != '/') 528 { 529 // relative to something 530 if (!this.path.length) 531 { 532 // nothing to be relative to 533 other = "/" ~ other; 534 } 535 else if (this.path[$-1] == '/') 536 { 537 // directory-style path for the current thing 538 // resolve relative to this directory 539 other = this.path ~ other; 540 } 541 else 542 { 543 // this is a file-like thing 544 // find the 'directory' and relative to that 545 other = this.path[0..this.path.lastIndexOf('/') + 1] ~ other; 546 } 547 } 548 parsePathAndQuery(ret, other); 549 return ret; 550 } 551 552 unittest 553 { 554 auto a = "http://alcyius.com/dndtools/index.html".parseURL; 555 auto b = a.resolve("contacts/index.html"); 556 assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html"); 557 } 558 } 559 560 /** 561 * Parse a URL from a string. 562 * 563 * This attempts to parse a wide range of URLs as people might actually type them. Some mistakes 564 * may be made. However, any URL in a correct format will be parsed correctly. 565 */ 566 bool tryParseURL(string value, out URL url) 567 { 568 url = URL.init; 569 // scheme:[//[user:password@]host[:port]][/]path[?query][#fragment] 570 // Scheme is optional in common use. We infer 'http' if it's not given. 571 auto i = value.indexOf("//"); 572 if (i > -1) { 573 if (i > 1) { 574 url.scheme = value[0..i-1]; 575 } 576 value = value[i+2 .. $]; 577 } else { 578 url.scheme = "http"; 579 } 580 // Check for an ipv6 hostname. 581 // [user:password@]host[:port]][/]path[?query][#fragment 582 i = value.indexOfAny([':', '/', '[']); 583 if (i == -1) { 584 // Just a hostname. 585 url.host = value.fromPuny; 586 return true; 587 } 588 589 if (value[i] == ':') { 590 // This could be between username and password, or it could be between host and port. 591 auto j = value.indexOfAny(['@', '/']); 592 if (j > -1 && value[j] == '@') { 593 try { 594 url.user = value[0..i].percentDecode; 595 url.pass = value[i+1 .. j].percentDecode; 596 } catch (URLException) { 597 return false; 598 } 599 value = value[j+1 .. $]; 600 } 601 } 602 603 // It's trying to be a host/port, not a user/pass. 604 i = value.indexOfAny([':', '/', '[']); 605 if (i == -1) { 606 url.host = value.fromPuny; 607 return true; 608 } 609 610 // Find the hostname. It's either an ipv6 address (which has special rules) or not (which doesn't 611 // have special rules). -- The main sticking point is that ipv6 addresses have colons, which we 612 // handle specially, and are offset with square brackets. 613 if (value[i] == '[') { 614 auto j = value[i..$].indexOf(']'); 615 if (j < 0) { 616 // unterminated ipv6 addr 617 return false; 618 } 619 // includes square brackets 620 url.host = value[i .. i+j+1]; 621 value = value[i+j+1 .. $]; 622 if (value.length == 0) { 623 // read to end of string; we finished parse 624 return true; 625 } 626 if (value[0] != ':' && value[0] != '?' && value[0] != '/') { 627 return false; 628 } 629 } else { 630 // Normal host. 631 url.host = value[0..i].fromPuny; 632 value = value[i .. $]; 633 } 634 635 if (value[0] == ':') { 636 auto end = value.indexOf('/'); 637 if (end == -1) { 638 end = value.length; 639 } 640 try { 641 url.port = value[1 .. end].to!ushort; 642 } catch (ConvException) { 643 return false; 644 } 645 value = value[end .. $]; 646 if (value.length == 0) { 647 return true; 648 } 649 } 650 return parsePathAndQuery(url, value); 651 } 652 653 private bool parsePathAndQuery(ref URL url, string value) 654 { 655 auto i = value.indexOfAny("?#"); 656 if (i == -1) 657 { 658 url.path = value.percentDecode; 659 return true; 660 } 661 662 try 663 { 664 url.path = value[0..i].percentDecode; 665 } 666 catch (URLException) 667 { 668 return false; 669 } 670 671 auto c = value[i]; 672 value = value[i + 1 .. $]; 673 if (c == '?') 674 { 675 i = value.indexOf('#'); 676 string query; 677 if (i < 0) 678 { 679 query = value; 680 value = null; 681 } 682 else 683 { 684 query = value[0..i]; 685 value = value[i + 1 .. $]; 686 } 687 auto queries = query.split('&'); 688 foreach (q; queries) 689 { 690 auto j = q.indexOf('='); 691 string key, val; 692 if (j < 0) 693 { 694 key = q; 695 } 696 else 697 { 698 key = q[0..j]; 699 val = q[j + 1 .. $]; 700 } 701 try 702 { 703 key = key.percentDecode; 704 val = val.percentDecode; 705 } 706 catch (URLException) 707 { 708 return false; 709 } 710 url.queryParams.add(key, val); 711 } 712 } 713 714 try 715 { 716 url.fragment = value.percentDecode; 717 } 718 catch (URLException) 719 { 720 return false; 721 } 722 723 return true; 724 } 725 726 unittest { 727 { 728 // Basic. 729 URL url; 730 with (url) { 731 scheme = "https"; 732 host = "example.org"; 733 path = "/foo/bar"; 734 queryParams.add("hello", "world"); 735 queryParams.add("gibe", "clay"); 736 fragment = "frag"; 737 } 738 assert( 739 // Not sure what order it'll come out in. 740 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 741 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 742 url.toString); 743 } 744 { 745 // Percent encoded. 746 URL url; 747 with (url) { 748 scheme = "https"; 749 host = "example.org"; 750 path = "/f☃o"; 751 queryParams.add("❄", "❀"); 752 queryParams.add("[", "]"); 753 fragment = "ş"; 754 } 755 assert( 756 // Not sure what order it'll come out in. 757 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 758 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 759 url.toString); 760 } 761 { 762 // Port, user, pass. 763 URL url; 764 with (url) { 765 scheme = "https"; 766 host = "example.org"; 767 user = "dhasenan"; 768 pass = "itsasecret"; 769 port = 17; 770 } 771 assert( 772 url.toString == "https://dhasenan:itsasecret@example.org:17/", 773 url.toString); 774 } 775 { 776 // Query with no path. 777 URL url; 778 with (url) { 779 scheme = "https"; 780 host = "example.org"; 781 queryParams.add("hi", "bye"); 782 } 783 assert( 784 url.toString == "https://example.org/?hi=bye", 785 url.toString); 786 } 787 } 788 789 unittest 790 { 791 auto url = "//foo/bar".parseURL; 792 assert(url.host == "foo", "expected host foo, got " ~ url.host); 793 assert(url.path == "/bar"); 794 } 795 796 unittest 797 { 798 // ipv6 hostnames! 799 { 800 // full range of data 801 auto url = parseURL("https://bob:secret@[::1]:2771/foo/bar"); 802 assert(url.scheme == "https", url.scheme); 803 assert(url.user == "bob", url.user); 804 assert(url.pass == "secret", url.pass); 805 assert(url.host == "[::1]", url.host); 806 assert(url.port == 2771, url.port.to!string); 807 assert(url.path == "/foo/bar", url.path); 808 } 809 810 // minimal 811 { 812 auto url = parseURL("[::1]"); 813 assert(url.host == "[::1]", url.host); 814 } 815 816 // some random bits 817 { 818 auto url = parseURL("http://[::1]/foo"); 819 assert(url.scheme == "http", url.scheme); 820 assert(url.host == "[::1]", url.host); 821 assert(url.path == "/foo", url.path); 822 } 823 824 { 825 auto url = parseURL("https://[2001:0db8:0:0:0:0:1428:57ab]/?login=true#justkidding"); 826 assert(url.scheme == "https"); 827 assert(url.host == "[2001:0db8:0:0:0:0:1428:57ab]"); 828 assert(url.path == "/"); 829 assert(url.fragment == "justkidding"); 830 } 831 } 832 833 unittest 834 { 835 auto url = "localhost:5984".parseURL; 836 auto url2 = url ~ "db1"; 837 assert(url2.toString == "http://localhost:5984/db1", url2.toString); 838 auto url3 = url2 ~ "_all_docs"; 839 assert(url3.toString == "http://localhost:5984/db1/_all_docs", url3.toString); 840 } 841 842 /// 843 unittest { 844 { 845 // Basic. 846 URL url; 847 with (url) { 848 scheme = "https"; 849 host = "example.org"; 850 path = "/foo/bar"; 851 queryParams.add("hello", "world"); 852 queryParams.add("gibe", "clay"); 853 fragment = "frag"; 854 } 855 assert( 856 // Not sure what order it'll come out in. 857 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 858 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 859 url.toString); 860 } 861 { 862 // Passing an array of query values. 863 URL url; 864 with (url) { 865 scheme = "https"; 866 host = "example.org"; 867 path = "/foo/bar"; 868 queryParams.add("hello", "world"); 869 queryParams.add("hello", "aether"); 870 fragment = "frag"; 871 } 872 assert( 873 // Not sure what order it'll come out in. 874 url.toString == "https://example.org/foo/bar?hello=world&hello=aether#frag" || 875 url.toString == "https://example.org/foo/bar?hello=aether&hello=world#frag", 876 url.toString); 877 } 878 { 879 // Percent encoded. 880 URL url; 881 with (url) { 882 scheme = "https"; 883 host = "example.org"; 884 path = "/f☃o"; 885 queryParams.add("❄", "❀"); 886 queryParams.add("[", "]"); 887 fragment = "ş"; 888 } 889 assert( 890 // Not sure what order it'll come out in. 891 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 892 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 893 url.toString); 894 } 895 { 896 // Port, user, pass. 897 URL url; 898 with (url) { 899 scheme = "https"; 900 host = "example.org"; 901 user = "dhasenan"; 902 pass = "itsasecret"; 903 port = 17; 904 } 905 assert( 906 url.toString == "https://dhasenan:itsasecret@example.org:17/", 907 url.toString); 908 } 909 { 910 // Query with no path. 911 URL url; 912 with (url) { 913 scheme = "https"; 914 host = "example.org"; 915 queryParams.add("hi", "bye"); 916 } 917 assert( 918 url.toString == "https://example.org/?hi=bye", 919 url.toString); 920 } 921 } 922 923 unittest { 924 // Percent decoding. 925 926 // http://#:!:@ 927 auto urlString = "http://%23:%21%3A@example.org/%7B/%7D?%3B&%26=%3D#%23hash"; 928 auto url = urlString.parseURL; 929 assert(url.user == "#"); 930 assert(url.pass == "!:"); 931 assert(url.host == "example.org"); 932 assert(url.path == "/{/}"); 933 assert(url.queryParams[";"].front == ""); 934 assert(url.queryParams["&"].front == "="); 935 assert(url.fragment == "#hash"); 936 937 // Round trip. 938 assert(urlString == urlString.parseURL.toString, urlString.parseURL.toString); 939 assert(urlString == urlString.parseURL.toString.parseURL.toString); 940 } 941 942 unittest { 943 auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL; 944 assert(url.host == "☂.☃.org", url.host); 945 } 946 947 unittest { 948 auto url = "https://☂.☃.org/?hi=bye".parseURL; 949 assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye"); 950 } 951 952 /// 953 unittest { 954 // There's an existing path. 955 auto url = parseURL("http://example.org/foo"); 956 URL url2; 957 // No slash? Assume it needs a slash. 958 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 959 // With slash? Don't add another. 960 url2 = url ~ "/bar"; 961 assert(url2.toString == "http://example.org/foo/bar", url2.toString); 962 url ~= "bar"; 963 assert(url.toString == "http://example.org/foo/bar"); 964 965 // Path already ends with a slash; don't add another. 966 url = parseURL("http://example.org/foo/"); 967 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 968 // Still don't add one even if you're appending with a slash. 969 assert((url ~ "/bar").toString == "http://example.org/foo/bar"); 970 url ~= "/bar"; 971 assert(url.toString == "http://example.org/foo/bar"); 972 973 // No path. 974 url = parseURL("http://example.org"); 975 assert((url ~ "bar").toString == "http://example.org/bar"); 976 assert((url ~ "/bar").toString == "http://example.org/bar"); 977 url ~= "bar"; 978 assert(url.toString == "http://example.org/bar"); 979 980 // Path is just a slash. 981 url = parseURL("http://example.org/"); 982 assert((url ~ "bar").toString == "http://example.org/bar"); 983 assert((url ~ "/bar").toString == "http://example.org/bar"); 984 url ~= "bar"; 985 assert(url.toString == "http://example.org/bar", url.toString); 986 987 // No path, just fragment. 988 url = "ircs://irc.freenode.com/#d".parseURL; 989 assert(url.toString == "ircs://irc.freenode.com/#d", url.toString); 990 } 991 unittest 992 { 993 // basic resolve() 994 { 995 auto base = "https://example.org/this/".parseURL; 996 assert(base.resolve("that") == "https://example.org/this/that"); 997 assert(base.resolve("/that") == "https://example.org/that"); 998 assert(base.resolve("//example.net/that") == "https://example.net/that"); 999 } 1000 1001 // ensure we don't preserve query params 1002 { 1003 auto base = "https://example.org/this?query=value&other=value2".parseURL; 1004 assert(base.resolve("that") == "https://example.org/that"); 1005 assert(base.resolve("/that") == "https://example.org/that"); 1006 assert(base.resolve("tother/that") == "https://example.org/tother/that"); 1007 assert(base.resolve("//example.net/that") == "https://example.net/that"); 1008 } 1009 } 1010 1011 1012 unittest 1013 { 1014 import std.net.curl; 1015 auto url = "http://example.org".parseURL; 1016 assert(is(typeof(std.net.curl.get(url)))); 1017 } 1018 1019 /** 1020 * Parse the input string as a URL. 1021 * 1022 * Throws: 1023 * URLException if the string was in an incorrect format. 1024 */ 1025 URL parseURL(string value) { 1026 URL url; 1027 if (tryParseURL(value, url)) { 1028 return url; 1029 } 1030 throw new URLException("failed to parse URL " ~ value); 1031 } 1032 1033 /// 1034 unittest { 1035 { 1036 // Infer scheme 1037 auto u1 = parseURL("example.org"); 1038 assert(u1.scheme == "http"); 1039 assert(u1.host == "example.org"); 1040 assert(u1.path == ""); 1041 assert(u1.port == 80); 1042 assert(u1.providedPort == 0); 1043 assert(u1.fragment == ""); 1044 } 1045 { 1046 // Simple host and scheme 1047 auto u1 = parseURL("https://example.org"); 1048 assert(u1.scheme == "https"); 1049 assert(u1.host == "example.org"); 1050 assert(u1.path == ""); 1051 assert(u1.port == 443); 1052 assert(u1.providedPort == 0); 1053 } 1054 { 1055 // With path 1056 auto u1 = parseURL("https://example.org/foo/bar"); 1057 assert(u1.scheme == "https"); 1058 assert(u1.host == "example.org"); 1059 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 1060 assert(u1.port == 443); 1061 assert(u1.providedPort == 0); 1062 } 1063 { 1064 // With explicit port 1065 auto u1 = parseURL("https://example.org:1021/foo/bar"); 1066 assert(u1.scheme == "https"); 1067 assert(u1.host == "example.org"); 1068 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 1069 assert(u1.port == 1021); 1070 assert(u1.providedPort == 1021); 1071 } 1072 { 1073 // With user 1074 auto u1 = parseURL("https://bob:secret@example.org/foo/bar"); 1075 assert(u1.scheme == "https"); 1076 assert(u1.host == "example.org"); 1077 assert(u1.path == "/foo/bar"); 1078 assert(u1.port == 443); 1079 assert(u1.user == "bob"); 1080 assert(u1.pass == "secret"); 1081 } 1082 { 1083 // With user, URL-encoded 1084 auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar"); 1085 assert(u1.scheme == "https"); 1086 assert(u1.host == "example.org"); 1087 assert(u1.path == "/foo/bar"); 1088 assert(u1.port == 443); 1089 assert(u1.user == "bob!"); 1090 assert(u1.pass == "secret!?"); 1091 } 1092 { 1093 // With user and port and path 1094 auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar"); 1095 assert(u1.scheme == "https"); 1096 assert(u1.host == "example.org"); 1097 assert(u1.path == "/foo/bar"); 1098 assert(u1.port == 2210); 1099 assert(u1.user == "bob"); 1100 assert(u1.pass == "secret"); 1101 assert(u1.fragment == ""); 1102 } 1103 { 1104 // With query string 1105 auto u1 = parseURL("https://example.org/?login=true"); 1106 assert(u1.scheme == "https"); 1107 assert(u1.host == "example.org"); 1108 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 1109 assert(u1.queryParams["login"].front == "true"); 1110 assert(u1.fragment == ""); 1111 } 1112 { 1113 // With query string and fragment 1114 auto u1 = parseURL("https://example.org/?login=true#justkidding"); 1115 assert(u1.scheme == "https"); 1116 assert(u1.host == "example.org"); 1117 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 1118 assert(u1.queryParams["login"].front == "true"); 1119 assert(u1.fragment == "justkidding"); 1120 } 1121 { 1122 // With URL-encoded values 1123 auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E"); 1124 assert(u1.scheme == "https"); 1125 assert(u1.host == "example.org"); 1126 assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path); 1127 assert(u1.queryParams["❄"].front == "="); 1128 assert(u1.fragment == "^"); 1129 } 1130 } 1131 1132 unittest { 1133 assert(parseURL("http://example.org").port == 80); 1134 assert(parseURL("http://example.org:5326").port == 5326); 1135 1136 auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment"); 1137 assert(url.scheme == "redis"); 1138 assert(url.user == "admin"); 1139 assert(url.pass == "password"); 1140 1141 assert(parseURL("example.org").toString == "http://example.org/"); 1142 assert(parseURL("http://example.org:80").toString == "http://example.org/"); 1143 1144 assert(parseURL("localhost:8070").toString == "http://localhost:8070/"); 1145 } 1146 1147 /** 1148 * Percent-encode a string. 1149 * 1150 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1151 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1152 * everything else, there is percent encoding. 1153 */ 1154 string percentEncode(string raw) { 1155 // We *must* encode these characters: :/?#[]@!$&'()*+,;=" 1156 // We *can* encode any other characters. 1157 // We *should not* encode alpha, numeric, or -._~. 1158 import std.utf : encode; 1159 import std.array : Appender; 1160 Appender!string app; 1161 foreach (dchar d; raw) { 1162 if (('a' <= d && 'z' >= d) || 1163 ('A' <= d && 'Z' >= d) || 1164 ('0' <= d && '9' >= d) || 1165 d == '-' || d == '.' || d == '_' || d == '~') { 1166 app ~= d; 1167 continue; 1168 } 1169 // Something simple like a space character? Still in 7-bit ASCII? 1170 // Then we get a single-character string out of it and just encode 1171 // that one bit. 1172 // Something not in 7-bit ASCII? Then we percent-encode each octet 1173 // in the UTF-8 encoding (and hope the server understands UTF-8). 1174 char[] c; 1175 encode(c, d); 1176 auto bytes = cast(ubyte[])c; 1177 foreach (b; bytes) { 1178 app ~= format("%%%02X", b); 1179 } 1180 } 1181 return cast(string)app.data; 1182 } 1183 1184 /// 1185 unittest { 1186 assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding"); 1187 assert(percentEncode("~~--..__") == "~~--..__"); 1188 assert(percentEncode("0123456789") == "0123456789"); 1189 1190 string e; 1191 1192 e = percentEncode("☃"); 1193 assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e); 1194 } 1195 1196 /** 1197 * Percent-decode a string. 1198 * 1199 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1200 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1201 * everything else, there is percent encoding. 1202 * 1203 * This explicitly ensures that the result is a valid UTF-8 string. 1204 */ 1205 string percentDecode(string encoded) 1206 { 1207 import std.utf : validate, UTFException; 1208 auto raw = percentDecodeRaw(encoded); 1209 auto s = cast(string) raw; 1210 try 1211 { 1212 validate(s); 1213 } 1214 catch (UTFException e) 1215 { 1216 throw new URLException( 1217 "The percent-encoded data `" ~ encoded ~ "` does not represent a valid UTF-8 sequence."); 1218 } 1219 return s; 1220 } 1221 1222 /// 1223 unittest { 1224 assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding"); 1225 assert(percentDecode("~~--..__") == "~~--..__"); 1226 assert(percentDecode("0123456789") == "0123456789"); 1227 1228 string e; 1229 1230 e = percentDecode("%E2%98%83"); 1231 assert(e == "☃", "expected a snowman but got" ~ e); 1232 1233 e = percentDecode("%e2%98%83"); 1234 assert(e == "☃", "expected a snowman but got" ~ e); 1235 1236 try { 1237 // %ES is an invalid percent sequence: 'S' is not a hex digit. 1238 percentDecode("%es"); 1239 assert(false, "expected exception not thrown"); 1240 } catch (URLException) { 1241 } 1242 1243 try { 1244 percentDecode("%e"); 1245 assert(false, "expected exception not thrown"); 1246 } catch (URLException) { 1247 } 1248 } 1249 1250 /** 1251 * Percent-decode a string into a ubyte array. 1252 * 1253 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1254 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1255 * everything else, there is percent encoding. 1256 * 1257 * This yields a ubyte array and will not perform validation on the output. However, an improperly 1258 * formatted input string will result in a URLException. 1259 */ 1260 immutable(ubyte)[] percentDecodeRaw(string encoded) 1261 { 1262 // We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now. 1263 import std.array : Appender; 1264 Appender!(immutable(ubyte)[]) app; 1265 for (int i = 0; i < encoded.length; i++) { 1266 if (encoded[i] != '%') { 1267 app ~= encoded[i]; 1268 continue; 1269 } 1270 if (i >= encoded.length - 2) { 1271 throw new URLException("Invalid percent encoded value: expected two characters after " ~ 1272 "percent symbol. Error at index " ~ i.to!string); 1273 } 1274 if (isHex(encoded[i + 1]) && isHex(encoded[i + 2])) { 1275 auto b = fromHex(encoded[i + 1]); 1276 auto c = fromHex(encoded[i + 2]); 1277 app ~= cast(ubyte)((b << 4) | c); 1278 } else { 1279 throw new URLException("Invalid percent encoded value: expected two hex digits after " ~ 1280 "percent symbol. Error at index " ~ i.to!string); 1281 } 1282 i += 2; 1283 } 1284 return app.data; 1285 } 1286 1287 private bool isHex(char c) { 1288 return ('0' <= c && '9' >= c) || 1289 ('a' <= c && 'f' >= c) || 1290 ('A' <= c && 'F' >= c); 1291 } 1292 1293 private ubyte fromHex(char s) { 1294 enum caseDiff = 'a' - 'A'; 1295 if (s >= 'a' && s <= 'z') { 1296 s -= caseDiff; 1297 } 1298 return cast(ubyte)("0123456789ABCDEF".indexOf(s)); 1299 } 1300 1301 private string toPuny(string unicodeHostname) 1302 { 1303 if (unicodeHostname[0] == '[') 1304 { 1305 // It's an ipv6 name. 1306 return unicodeHostname; 1307 } 1308 bool mustEncode = false; 1309 foreach (i, dchar d; unicodeHostname) { 1310 auto c = cast(uint) d; 1311 if (c > 0x80) { 1312 mustEncode = true; 1313 break; 1314 } 1315 if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) { 1316 throw new URLException( 1317 format( 1318 "domain name '%s' contains illegal character '%s' at position %s", 1319 unicodeHostname, d, i)); 1320 } 1321 } 1322 if (!mustEncode) { 1323 return unicodeHostname; 1324 } 1325 import std.algorithm.iteration : map; 1326 return unicodeHostname.split('.').map!punyEncode.join("."); 1327 } 1328 1329 private string fromPuny(string hostname) 1330 { 1331 import std.algorithm.iteration : map; 1332 return hostname.split('.').map!punyDecode.join("."); 1333 } 1334 1335 private { 1336 enum delimiter = '-'; 1337 enum marker = "xn--"; 1338 enum ulong damp = 700; 1339 enum ulong tmin = 1; 1340 enum ulong tmax = 26; 1341 enum ulong skew = 38; 1342 enum ulong base = 36; 1343 enum ulong initialBias = 72; 1344 enum dchar initialN = cast(dchar)128; 1345 1346 ulong adapt(ulong delta, ulong numPoints, bool firstTime) { 1347 if (firstTime) { 1348 delta /= damp; 1349 } else { 1350 delta /= 2; 1351 } 1352 delta += delta / numPoints; 1353 ulong k = 0; 1354 while (delta > ((base - tmin) * tmax) / 2) { 1355 delta /= (base - tmin); 1356 k += base; 1357 } 1358 return k + (((base - tmin + 1) * delta) / (delta + skew)); 1359 } 1360 } 1361 1362 /** 1363 * Encode the input string using the Punycode algorithm. 1364 * 1365 * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked 1366 * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com" 1367 * in Punycode, you will get "xn--m3h.xn--n3h.com". 1368 * 1369 * In order to puny-encode a domain name, you must split it into its components. The following will 1370 * typically suffice: 1371 * --- 1372 * auto domain = "☂.☃.com"; 1373 * auto encodedDomain = domain.splitter(".").map!(punyEncode).join("."); 1374 * --- 1375 */ 1376 string punyEncode(string input) 1377 { 1378 import std.array : Appender; 1379 ulong delta = 0; 1380 dchar n = initialN; 1381 auto i = 0; 1382 auto bias = initialBias; 1383 Appender!string output; 1384 output ~= marker; 1385 auto pushed = 0; 1386 auto codePoints = 0; 1387 foreach (dchar c; input) { 1388 codePoints++; 1389 if (c <= initialN) { 1390 output ~= c; 1391 pushed++; 1392 } 1393 } 1394 if (pushed < codePoints) { 1395 if (pushed > 0) { 1396 output ~= delimiter; 1397 } 1398 } else { 1399 // No encoding to do. 1400 return input; 1401 } 1402 bool first = true; 1403 while (pushed < codePoints) { 1404 auto best = dchar.max; 1405 foreach (dchar c; input) { 1406 if (n <= c && c < best) { 1407 best = c; 1408 } 1409 } 1410 if (best == dchar.max) { 1411 throw new URLException("failed to find a new codepoint to process during punyencode"); 1412 } 1413 delta += (best - n) * (pushed + 1); 1414 if (delta > uint.max) { 1415 // TODO better error message 1416 throw new URLException("overflow during punyencode"); 1417 } 1418 n = best; 1419 foreach (dchar c; input) { 1420 if (c < n) { 1421 delta++; 1422 } 1423 if (c == n) { 1424 ulong q = delta; 1425 auto k = base; 1426 while (true) { 1427 ulong t; 1428 if (k <= bias) { 1429 t = tmin; 1430 } else if (k >= bias + tmax) { 1431 t = tmax; 1432 } else { 1433 t = k - bias; 1434 } 1435 if (q < t) { 1436 break; 1437 } 1438 output ~= digitToBasic(t + ((q - t) % (base - t))); 1439 q = (q - t) / (base - t); 1440 k += base; 1441 } 1442 output ~= digitToBasic(q); 1443 pushed++; 1444 bias = adapt(delta, pushed, first); 1445 first = false; 1446 delta = 0; 1447 } 1448 } 1449 delta++; 1450 n++; 1451 } 1452 return cast(string)output.data; 1453 } 1454 1455 /** 1456 * Decode the input string using the Punycode algorithm. 1457 * 1458 * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked 1459 * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com" 1460 * in Punycode, you will get "xn--m3h.xn--n3h.com". 1461 * 1462 * In order to puny-decode a domain name, you must split it into its components. The following will 1463 * typically suffice: 1464 * --- 1465 * auto domain = "xn--m3h.xn--n3h.com"; 1466 * auto decodedDomain = domain.splitter(".").map!(punyDecode).join("."); 1467 * --- 1468 */ 1469 string punyDecode(string input) { 1470 if (!input.startsWith(marker)) { 1471 return input; 1472 } 1473 input = input[marker.length..$]; 1474 1475 // let n = initial_n 1476 dchar n = cast(dchar)128; 1477 1478 // let i = 0 1479 // let bias = initial_bias 1480 // let output = an empty string indexed from 0 1481 size_t i = 0; 1482 auto bias = initialBias; 1483 dchar[] output; 1484 // This reserves a bit more than necessary, but it should be more efficient overall than just 1485 // appending and inserting volo-nolo. 1486 output.reserve(input.length); 1487 1488 // consume all code points before the last delimiter (if there is one) 1489 // and copy them to output, fail on any non-basic code point 1490 // if more than zero code points were consumed then consume one more 1491 // (which will be the last delimiter) 1492 auto end = input.lastIndexOf(delimiter); 1493 if (end > -1) { 1494 foreach (dchar c; input[0..end]) { 1495 output ~= c; 1496 } 1497 input = input[end+1 .. $]; 1498 } 1499 1500 // while the input is not exhausted do begin 1501 size_t pos = 0; 1502 while (pos < input.length) { 1503 // let oldi = i 1504 // let w = 1 1505 auto oldi = i; 1506 auto w = 1; 1507 // for k = base to infinity in steps of base do begin 1508 for (ulong k = base; k < uint.max; k += base) { 1509 // consume a code point, or fail if there was none to consume 1510 // Note that the input is all ASCII, so we can simply index the input string bytewise. 1511 auto c = input[pos]; 1512 pos++; 1513 // let digit = the code point's digit-value, fail if it has none 1514 auto digit = basicToDigit(c); 1515 // let i = i + digit * w, fail on overflow 1516 i += digit * w; 1517 // let t = tmin if k <= bias {+ tmin}, or 1518 // tmax if k >= bias + tmax, or k - bias otherwise 1519 ulong t; 1520 if (k <= bias) { 1521 t = tmin; 1522 } else if (k >= bias + tmax) { 1523 t = tmax; 1524 } else { 1525 t = k - bias; 1526 } 1527 // if digit < t then break 1528 if (digit < t) { 1529 break; 1530 } 1531 // let w = w * (base - t), fail on overflow 1532 w *= (base - t); 1533 // end 1534 } 1535 // let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?) 1536 bias = adapt(i - oldi, output.length + 1, oldi == 0); 1537 // let n = n + i div (length(output) + 1), fail on overflow 1538 n += i / (output.length + 1); 1539 // let i = i mod (length(output) + 1) 1540 i %= (output.length + 1); 1541 // {if n is a basic code point then fail} 1542 // (We aren't actually going to fail here; it's clear what this means.) 1543 // insert n into output at position i 1544 import std.array : insertInPlace; 1545 (() @trusted { output.insertInPlace(i, cast(dchar)n); })(); // should be @safe but isn't marked 1546 // increment i 1547 i++; 1548 // end 1549 } 1550 return output.to!string; 1551 } 1552 1553 // Lifted from punycode.js. 1554 private dchar digitToBasic(ulong digit) { 1555 return cast(dchar)(digit + 22 + 75 * (digit < 26)); 1556 } 1557 1558 // Lifted from punycode.js. 1559 private uint basicToDigit(char c) { 1560 auto codePoint = cast(uint)c; 1561 if (codePoint - 48 < 10) { 1562 return codePoint - 22; 1563 } 1564 if (codePoint - 65 < 26) { 1565 return codePoint - 65; 1566 } 1567 if (codePoint - 97 < 26) { 1568 return codePoint - 97; 1569 } 1570 return base; 1571 } 1572 1573 unittest { 1574 { 1575 auto a = "b\u00FCcher"; 1576 assert(punyEncode(a) == "xn--bcher-kva"); 1577 } 1578 { 1579 auto a = "b\u00FCc\u00FCher"; 1580 assert(punyEncode(a) == "xn--bcher-kvab"); 1581 } 1582 { 1583 auto a = "ýbücher"; 1584 auto b = punyEncode(a); 1585 assert(b == "xn--bcher-kvaf", b); 1586 } 1587 1588 { 1589 auto a = "mañana"; 1590 assert(punyEncode(a) == "xn--maana-pta"); 1591 } 1592 1593 { 1594 auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" 1595 ~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F"; 1596 auto b = punyEncode(a); 1597 assert(b == "xn--egbpdaj6bu4bxfgehfvwxn", b); 1598 } 1599 import std.stdio; 1600 } 1601 1602 unittest { 1603 { 1604 auto b = punyDecode("xn--egbpdaj6bu4bxfgehfvwxn"); 1605 assert(b == "ليهمابتكلموشعربي؟", b); 1606 } 1607 { 1608 assert(punyDecode("xn--maana-pta") == "mañana"); 1609 } 1610 } 1611 1612 unittest { 1613 import std.string, std.algorithm, std.array, std.range; 1614 { 1615 auto domain = "xn--m3h.xn--n3h.com"; 1616 auto decodedDomain = domain.splitter(".").map!(punyDecode).join("."); 1617 assert(decodedDomain == "☂.☃.com", decodedDomain); 1618 } 1619 { 1620 auto domain = "☂.☃.com"; 1621 auto decodedDomain = domain.splitter(".").map!(punyEncode).join("."); 1622 assert(decodedDomain == "xn--m3h.xn--n3h.com", decodedDomain); 1623 } 1624 } 1625