1 /** 2 * A URL handling library. 3 * 4 * URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional 5 * elements like port, path, username, and password. 6 * 7 * This module aims to make it simple to muck about with them. 8 * 9 * Example usage: 10 * --- 11 * auto url = "ssh://me:password@192.168.0.8/".parseURL; 12 * auto files = system("ssh", url.toString, "ls").splitLines; 13 * foreach (file; files) { 14 * system("scp", url ~ file, "."); 15 * } 16 * --- 17 * 18 * License: The MIT license. 19 */ 20 module url; 21 22 import std.conv; 23 import std.string; 24 25 pure: 26 @safe: 27 28 /// An exception thrown when something bad happens with URLs. 29 class URLException : Exception 30 { 31 this(string msg) pure { super(msg); } 32 } 33 34 /** 35 * A mapping from schemes to their default ports. 36 * 37 * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to 38 * use even if they use ports. Entries here should be treated as best guesses. 39 */ 40 enum ushort[string] schemeToDefaultPort = [ 41 "aaa": 3868, 42 "aaas": 5658, 43 "acap": 674, 44 "amqp": 5672, 45 "cap": 1026, 46 "coap": 5683, 47 "coaps": 5684, 48 "dav": 443, 49 "dict": 2628, 50 "ftp": 21, 51 "git": 9418, 52 "go": 1096, 53 "gopher": 70, 54 "http": 80, 55 "https": 443, 56 "ws": 80, 57 "wss": 443, 58 "iac": 4569, 59 "icap": 1344, 60 "imap": 143, 61 "ipp": 631, 62 "ipps": 631, // yes, they're both mapped to port 631 63 "irc": 6667, // De facto default port, not the IANA reserved port. 64 "ircs": 6697, 65 "iris": 702, // defaults to iris.beep 66 "iris.beep": 702, 67 "iris.lwz": 715, 68 "iris.xpc": 713, 69 "iris.xpcs": 714, 70 "jabber": 5222, // client-to-server 71 "ldap": 389, 72 "ldaps": 636, 73 "msrp": 2855, 74 "msrps": 2855, 75 "mtqp": 1038, 76 "mupdate": 3905, 77 "news": 119, 78 "nfs": 2049, 79 "pop": 110, 80 "redis": 6379, 81 "reload": 6084, 82 "rsync": 873, 83 "rtmfp": 1935, 84 "rtsp": 554, 85 "shttp": 80, 86 "sieve": 4190, 87 "sip": 5060, 88 "sips": 5061, 89 "smb": 445, 90 "smtp": 25, 91 "snews": 563, 92 "snmp": 161, 93 "soap.beep": 605, 94 "ssh": 22, 95 "stun": 3478, 96 "stuns": 5349, 97 "svn": 3690, 98 "teamspeak": 9987, 99 "telnet": 23, 100 "tftp": 69, 101 "tip": 3372, 102 ]; 103 104 /** 105 * A collection of query parameters. 106 * 107 * This is effectively a multimap of string -> strings. 108 */ 109 struct QueryParams 110 { 111 pure: 112 import std.typecons; 113 alias Tuple!(string, "key", string, "value") Param; 114 Param[] params; 115 116 @property size_t length() const { 117 return params.length; 118 } 119 120 /// Get a range over the query parameter values for the given key. 121 auto opIndex(string key) const 122 { 123 import std.algorithm.searching : find; 124 import std.algorithm.iteration : map; 125 return params.find!(x => x.key == key).map!(x => x.value); 126 } 127 128 /// Add a query parameter with the given key and value. 129 /// If one already exists, there will now be two query parameters with the given name. 130 void add(string key, string value) { 131 params ~= Param(key, value); 132 } 133 134 /// Add a query parameter with the given key and value. 135 /// If there are any existing parameters with the same key, they are removed and overwritten. 136 void overwrite(string key, string value) { 137 for (int i = 0; i < params.length; i++) { 138 if (params[i].key == key) { 139 params[i] = params[$-1]; 140 params.length--; 141 } 142 } 143 params ~= Param(key, value); 144 } 145 146 private struct QueryParamRange 147 { 148 pure: 149 size_t i; 150 const(Param)[] params; 151 bool empty() { return i >= params.length; } 152 void popFront() { i++; } 153 Param front() { return params[i]; } 154 } 155 156 /** 157 * A range over the query parameters. 158 * 159 * Usage: 160 * --- 161 * foreach (key, value; url.queryParams) {} 162 * --- 163 */ 164 auto range() const 165 { 166 return QueryParamRange(0, this.params); 167 } 168 /// ditto 169 alias range this; 170 171 /// Convert this set of query parameters into a query string. 172 string toString() const { 173 import std.array : Appender; 174 Appender!string s; 175 bool first = true; 176 foreach (tuple; this) { 177 if (!first) { 178 s ~= '&'; 179 } 180 first = false; 181 s ~= tuple.key.percentEncode; 182 if (tuple.value.length > 0) { 183 s ~= '='; 184 s ~= tuple.value.percentEncode; 185 } 186 } 187 return s.data; 188 } 189 190 /// Clone this set of query parameters. 191 QueryParams dup() { 192 QueryParams other = this; 193 other.params = params.dup; 194 return other; 195 } 196 } 197 198 /** 199 * A Unique Resource Locator. 200 * 201 * URLs can be parsed (see parseURL) and implicitly convert to strings. 202 */ 203 struct URL 204 { 205 pure: 206 /// The URL scheme. For instance, ssh, ftp, or https. 207 string scheme; 208 209 /// The username in this URL. Usually absent. If present, there will also be a password. 210 string user; 211 212 /// The password in this URL. Usually absent. 213 string pass; 214 215 /// The hostname. 216 string host; 217 218 /** 219 * The port. 220 * 221 * This is inferred from the scheme if it isn't present in the URL itself. 222 * If the scheme is not known and the port is not present, the port will be given as 0. 223 * For some schemes, port will not be sensible -- for instance, file or chrome-extension. 224 * 225 * If you explicitly need to detect whether the user provided a port, check the providedPort 226 * field. 227 */ 228 @property ushort port() const 229 { 230 if (providedPort != 0) { 231 return providedPort; 232 } 233 if (auto p = scheme in schemeToDefaultPort) { 234 return *p; 235 } 236 return 0; 237 } 238 239 /** 240 * Set the port. 241 * 242 * This sets the providedPort field and is provided for convenience. 243 */ 244 @property ushort port(ushort value) 245 { 246 return providedPort = value; 247 } 248 249 /// The port that was explicitly provided in the URL. 250 ushort providedPort; 251 252 /** 253 * The path. 254 * 255 * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is 256 * "/news/story/17774". 257 */ 258 string path; 259 260 /** 261 * The query parameters associated with this URL. 262 */ 263 QueryParams queryParams; 264 265 /** 266 * The fragment. In web documents, this typically refers to an anchor element. 267 * For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2". 268 */ 269 string fragment; 270 271 /** 272 * Convert this URL to a string. 273 * The string is properly formatted and usable for, eg, a web request. 274 */ 275 string toString() const 276 { 277 return toString(false); 278 } 279 280 /** 281 * Convert this URL to a string. 282 * 283 * The string is intended to be human-readable rather than machine-readable. 284 */ 285 string toHumanReadableString() const 286 { 287 return toString(true); 288 } 289 290 /// 291 unittest 292 { 293 auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL; 294 assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye", url.toString); 295 assert(url.toHumanReadableString == "https://☂.☃.org/?hi=bye", url.toString); 296 } 297 298 unittest 299 { 300 assert("http://example.org/some_path".parseURL.toHumanReadableString == 301 "http://example.org/some_path"); 302 } 303 304 private string toString(bool humanReadable) const 305 { 306 import std.array : Appender; 307 Appender!string s; 308 s ~= scheme; 309 s ~= "://"; 310 if (user) { 311 s ~= humanReadable ? user : user.percentEncode; 312 s ~= ":"; 313 s ~= humanReadable ? pass : pass.percentEncode; 314 s ~= "@"; 315 } 316 s ~= humanReadable ? host : host.toPuny; 317 if (providedPort) { 318 if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) { 319 s ~= ":"; 320 s ~= providedPort.to!string; 321 } 322 } 323 string p = path; 324 if (p.length == 0 || p == "/") { 325 s ~= '/'; 326 } else { 327 if (humanReadable) { 328 s ~= p; 329 } else { 330 if (p[0] == '/') { 331 p = p[1..$]; 332 } 333 foreach (part; p.split('/')) { 334 s ~= '/'; 335 s ~= part.percentEncode; 336 } 337 } 338 } 339 if (queryParams.length) { 340 s ~= '?'; 341 s ~= queryParams.toString; 342 } if (fragment) { 343 s ~= '#'; 344 s ~= fragment.percentEncode; 345 } 346 return s.data; 347 } 348 349 /// Implicitly convert URLs to strings. 350 alias toString this; 351 352 /** 353 Compare two URLs. 354 355 I tried to make the comparison produce a sort order that seems natural, so it's not identical 356 to sorting based on .toString(). For instance, username/password have lower priority than 357 host. The scheme has higher priority than port but lower than host. 358 359 While the output of this is guaranteed to provide a total ordering, and I've attempted to make 360 it human-friendly, it isn't guaranteed to be consistent between versions. The implementation 361 and its results can change without a minor version increase. 362 */ 363 int opCmp(const URL other) const 364 { 365 return asTuple.opCmp(other.asTuple); 366 } 367 368 private auto asTuple() const 369 { 370 import std.typecons : tuple; 371 return tuple(host, scheme, port, user, pass, path); 372 } 373 374 bool opEquals(string other) const 375 { 376 URL o; 377 if (!tryParseURL(other, o)) 378 { 379 return false; 380 } 381 return asTuple() == o.asTuple(); 382 } 383 384 bool opEquals(ref const URL other) const 385 { 386 return asTuple() == other.asTuple(); 387 } 388 389 bool opEquals(const URL other) const 390 { 391 return asTuple() == other.asTuple(); 392 } 393 394 unittest 395 { 396 import std.algorithm, std.array, std.format; 397 assert("http://example.org/some_path".parseURL > "http://example.org/other_path".parseURL); 398 alias sorted = std.algorithm.sort; 399 auto parsedURLs = 400 [ 401 "http://example.org/some_path", 402 "http://example.org:81/other_path", 403 "http://example.org/other_path", 404 "https://example.org/first_path", 405 "http://example.xyz/other_other_path", 406 "http://me:secret@blog.ikeran.org/wp_admin", 407 ].map!(x => x.parseURL).array; 408 auto urls = sorted(parsedURLs).map!(x => x.toHumanReadableString).array; 409 auto expected = 410 [ 411 "http://me:secret@blog.ikeran.org/wp_admin", 412 "http://example.org/other_path", 413 "http://example.org/some_path", 414 "http://example.org:81/other_path", 415 "https://example.org/first_path", 416 "http://example.xyz/other_other_path", 417 ]; 418 assert(cmp(urls, expected) == 0, "expected:\n%s\ngot:\n%s".format(expected, urls)); 419 } 420 421 /** 422 * The append operator (~). 423 * 424 * The append operator for URLs returns a new URL with the given string appended as a path 425 * element to the URL's path. It only adds new path elements (or sequences of path elements). 426 * 427 * Don't worry about path separators; whether you include them or not, it will just work. 428 * 429 * Query elements are copied. 430 * 431 * Examples: 432 * --- 433 * auto random = "http://testdata.org/random".parseURL; 434 * auto randInt = random ~ "int"; 435 * writeln(randInt); // prints "http://testdata.org/random/int" 436 * --- 437 */ 438 URL opBinary(string op : "~")(string subsequentPath) { 439 URL other = this; 440 other ~= subsequentPath; 441 other.queryParams = queryParams.dup; 442 return other; 443 } 444 445 /** 446 * The append-in-place operator (~=). 447 * 448 * The append operator for URLs adds a path element to this URL. It only adds new path elements 449 * (or sequences of path elements). 450 * 451 * Don't worry about path separators; whether you include them or not, it will just work. 452 * 453 * Examples: 454 * --- 455 * auto random = "http://testdata.org/random".parseURL; 456 * random ~= "int"; 457 * writeln(random); // prints "http://testdata.org/random/int" 458 * --- 459 */ 460 URL opOpAssign(string op : "~")(string subsequentPath) { 461 if (path.endsWith("/")) { 462 if (subsequentPath.startsWith("/")) { 463 path ~= subsequentPath[1..$]; 464 } else { 465 path ~= subsequentPath; 466 } 467 } else { 468 if (!subsequentPath.startsWith("/")) { 469 path ~= '/'; 470 } 471 path ~= subsequentPath; 472 } 473 return this; 474 } 475 476 /** 477 * Convert a relative URL to an absolute URL. 478 * 479 * This is designed so that you can scrape a webpage and quickly convert links within the 480 * page to URLs you can actually work with, but you're clever; I'm sure you'll find more uses 481 * for it. 482 * 483 * It's biased toward HTTP family URLs; as one quirk, "//" is interpreted as "same scheme, 484 * different everything else", which might not be desirable for all schemes. 485 * 486 * This only handles URLs, not URIs; if you pass in 'mailto:bob.dobbs@subgenius.org', for 487 * instance, this will give you our best attempt to parse it as a URL. 488 * 489 * Examples: 490 * --- 491 * auto base = "https://example.org/passworddb?secure=false".parseURL; 492 * 493 * // Download https://example.org/passworddb/by-username/dhasenan 494 * download(base.resolve("by-username/dhasenan")); 495 * 496 * // Download https://example.org/static/style.css 497 * download(base.resolve("/static/style.css")); 498 * 499 * // Download https://cdn.example.net/jquery.js 500 * download(base.resolve("https://cdn.example.net/jquery.js")); 501 * --- 502 */ 503 URL resolve(string other) 504 { 505 if (other.length == 0) return this; 506 if (other[0] == '/') 507 { 508 if (other.length > 1 && other[1] == '/') 509 { 510 // Uncommon syntax: a link like "//wikimedia.org" means "same scheme, switch URL" 511 return parseURL(this.scheme ~ ':' ~ other); 512 } 513 } 514 else if (other.indexOf("://") < other.indexOf("/")) 515 { 516 // separate URL 517 return other.parseURL; 518 } 519 520 URL ret = this; 521 ret.path = ""; 522 ret.queryParams = ret.queryParams.init; 523 if (other[0] != '/') 524 { 525 // relative to something 526 if (!this.path.length) 527 { 528 // nothing to be relative to 529 other = "/" ~ other; 530 } 531 else if (this.path[$-1] == '/') 532 { 533 // directory-style path for the current thing 534 // resolve relative to this directory 535 other = this.path ~ other; 536 } 537 else 538 { 539 // this is a file-like thing 540 // find the 'directory' and relative to that 541 other = this.path[0..this.path.lastIndexOf('/') + 1] ~ other; 542 } 543 } 544 parsePathAndQuery(ret, other); 545 return ret; 546 } 547 } 548 549 /** 550 * Parse a URL from a string. 551 * 552 * This attempts to parse a wide range of URLs as people might actually type them. Some mistakes 553 * may be made. However, any URL in a correct format will be parsed correctly. 554 */ 555 bool tryParseURL(string value, out URL url) 556 { 557 url = URL.init; 558 // scheme:[//[user:password@]host[:port]][/]path[?query][#fragment] 559 // Scheme is optional in common use. We infer 'http' if it's not given. 560 auto i = value.indexOf("//"); 561 if (i > -1) { 562 if (i > 1) { 563 url.scheme = value[0..i-1]; 564 } 565 value = value[i+2 .. $]; 566 } else { 567 url.scheme = "http"; 568 } 569 // Check for an ipv6 hostname. 570 // [user:password@]host[:port]][/]path[?query][#fragment 571 i = value.indexOfAny([':', '/', '[']); 572 if (i == -1) { 573 // Just a hostname. 574 url.host = value.fromPuny; 575 return true; 576 } 577 578 if (value[i] == ':') { 579 // This could be between username and password, or it could be between host and port. 580 auto j = value.indexOfAny(['@', '/']); 581 if (j > -1 && value[j] == '@') { 582 try { 583 url.user = value[0..i].percentDecode; 584 url.pass = value[i+1 .. j].percentDecode; 585 } catch (URLException) { 586 return false; 587 } 588 value = value[j+1 .. $]; 589 } 590 } 591 592 // It's trying to be a host/port, not a user/pass. 593 i = value.indexOfAny([':', '/', '[']); 594 if (i == -1) { 595 url.host = value.fromPuny; 596 return true; 597 } 598 599 // Find the hostname. It's either an ipv6 address (which has special rules) or not (which doesn't 600 // have special rules). -- The main sticking point is that ipv6 addresses have colons, which we 601 // handle specially, and are offset with square brackets. 602 if (value[i] == '[') { 603 auto j = value[i..$].indexOf(']'); 604 if (j < 0) { 605 // unterminated ipv6 addr 606 return false; 607 } 608 // includes square brackets 609 url.host = value[i .. i+j+1]; 610 value = value[i+j+1 .. $]; 611 if (value.length == 0) { 612 // read to end of string; we finished parse 613 return true; 614 } 615 if (value[0] != ':' && value[0] != '?' && value[0] != '/') { 616 return false; 617 } 618 } else { 619 // Normal host. 620 url.host = value[0..i].fromPuny; 621 value = value[i .. $]; 622 } 623 624 if (value[0] == ':') { 625 auto end = value.indexOf('/'); 626 if (end == -1) { 627 end = value.length; 628 } 629 try { 630 url.port = value[1 .. end].to!ushort; 631 } catch (ConvException) { 632 return false; 633 } 634 value = value[end .. $]; 635 if (value.length == 0) { 636 return true; 637 } 638 } 639 return parsePathAndQuery(url, value); 640 } 641 642 private bool parsePathAndQuery(ref URL url, string value) 643 { 644 auto i = value.indexOfAny("?#"); 645 if (i == -1) 646 { 647 url.path = value.percentDecode; 648 return true; 649 } 650 651 try 652 { 653 url.path = value[0..i].percentDecode; 654 } 655 catch (URLException) 656 { 657 return false; 658 } 659 660 auto c = value[i]; 661 value = value[i + 1 .. $]; 662 if (c == '?') 663 { 664 i = value.indexOf('#'); 665 string query; 666 if (i < 0) 667 { 668 query = value; 669 value = null; 670 } 671 else 672 { 673 query = value[0..i]; 674 value = value[i + 1 .. $]; 675 } 676 auto queries = query.split('&'); 677 foreach (q; queries) 678 { 679 auto j = q.indexOf('='); 680 string key, val; 681 if (j < 0) 682 { 683 key = q; 684 } 685 else 686 { 687 key = q[0..j]; 688 val = q[j + 1 .. $]; 689 } 690 try 691 { 692 key = key.percentDecode; 693 val = val.percentDecode; 694 } 695 catch (URLException) 696 { 697 return false; 698 } 699 url.queryParams.add(key, val); 700 } 701 } 702 703 try 704 { 705 url.fragment = value.percentDecode; 706 } 707 catch (URLException) 708 { 709 return false; 710 } 711 712 return true; 713 } 714 715 unittest { 716 { 717 // Basic. 718 URL url; 719 with (url) { 720 scheme = "https"; 721 host = "example.org"; 722 path = "/foo/bar"; 723 queryParams.add("hello", "world"); 724 queryParams.add("gibe", "clay"); 725 fragment = "frag"; 726 } 727 assert( 728 // Not sure what order it'll come out in. 729 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 730 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 731 url.toString); 732 } 733 { 734 // Percent encoded. 735 URL url; 736 with (url) { 737 scheme = "https"; 738 host = "example.org"; 739 path = "/f☃o"; 740 queryParams.add("❄", "❀"); 741 queryParams.add("[", "]"); 742 fragment = "ş"; 743 } 744 assert( 745 // Not sure what order it'll come out in. 746 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 747 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 748 url.toString); 749 } 750 { 751 // Port, user, pass. 752 URL url; 753 with (url) { 754 scheme = "https"; 755 host = "example.org"; 756 user = "dhasenan"; 757 pass = "itsasecret"; 758 port = 17; 759 } 760 assert( 761 url.toString == "https://dhasenan:itsasecret@example.org:17/", 762 url.toString); 763 } 764 { 765 // Query with no path. 766 URL url; 767 with (url) { 768 scheme = "https"; 769 host = "example.org"; 770 queryParams.add("hi", "bye"); 771 } 772 assert( 773 url.toString == "https://example.org/?hi=bye", 774 url.toString); 775 } 776 } 777 778 unittest 779 { 780 auto url = "//foo/bar".parseURL; 781 assert(url.host == "foo", "expected host foo, got " ~ url.host); 782 assert(url.path == "/bar"); 783 } 784 785 unittest 786 { 787 // ipv6 hostnames! 788 { 789 // full range of data 790 auto url = parseURL("https://bob:secret@[::1]:2771/foo/bar"); 791 assert(url.scheme == "https", url.scheme); 792 assert(url.user == "bob", url.user); 793 assert(url.pass == "secret", url.pass); 794 assert(url.host == "[::1]", url.host); 795 assert(url.port == 2771, url.port.to!string); 796 assert(url.path == "/foo/bar", url.path); 797 } 798 799 // minimal 800 { 801 auto url = parseURL("[::1]"); 802 assert(url.host == "[::1]", url.host); 803 } 804 805 // some random bits 806 { 807 auto url = parseURL("http://[::1]/foo"); 808 assert(url.scheme == "http", url.scheme); 809 assert(url.host == "[::1]", url.host); 810 assert(url.path == "/foo", url.path); 811 } 812 813 { 814 auto url = parseURL("https://[2001:0db8:0:0:0:0:1428:57ab]/?login=true#justkidding"); 815 assert(url.scheme == "https"); 816 assert(url.host == "[2001:0db8:0:0:0:0:1428:57ab]"); 817 assert(url.path == "/"); 818 assert(url.fragment == "justkidding"); 819 } 820 } 821 822 unittest 823 { 824 auto url = "localhost:5984".parseURL; 825 auto url2 = url ~ "db1"; 826 assert(url2.toString == "http://localhost:5984/db1", url2.toString); 827 auto url3 = url2 ~ "_all_docs"; 828 assert(url3.toString == "http://localhost:5984/db1/_all_docs", url3.toString); 829 } 830 831 /// 832 unittest { 833 { 834 // Basic. 835 URL url; 836 with (url) { 837 scheme = "https"; 838 host = "example.org"; 839 path = "/foo/bar"; 840 queryParams.add("hello", "world"); 841 queryParams.add("gibe", "clay"); 842 fragment = "frag"; 843 } 844 assert( 845 // Not sure what order it'll come out in. 846 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 847 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 848 url.toString); 849 } 850 { 851 // Passing an array of query values. 852 URL url; 853 with (url) { 854 scheme = "https"; 855 host = "example.org"; 856 path = "/foo/bar"; 857 queryParams.add("hello", "world"); 858 queryParams.add("hello", "aether"); 859 fragment = "frag"; 860 } 861 assert( 862 // Not sure what order it'll come out in. 863 url.toString == "https://example.org/foo/bar?hello=world&hello=aether#frag" || 864 url.toString == "https://example.org/foo/bar?hello=aether&hello=world#frag", 865 url.toString); 866 } 867 { 868 // Percent encoded. 869 URL url; 870 with (url) { 871 scheme = "https"; 872 host = "example.org"; 873 path = "/f☃o"; 874 queryParams.add("❄", "❀"); 875 queryParams.add("[", "]"); 876 fragment = "ş"; 877 } 878 assert( 879 // Not sure what order it'll come out in. 880 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 881 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 882 url.toString); 883 } 884 { 885 // Port, user, pass. 886 URL url; 887 with (url) { 888 scheme = "https"; 889 host = "example.org"; 890 user = "dhasenan"; 891 pass = "itsasecret"; 892 port = 17; 893 } 894 assert( 895 url.toString == "https://dhasenan:itsasecret@example.org:17/", 896 url.toString); 897 } 898 { 899 // Query with no path. 900 URL url; 901 with (url) { 902 scheme = "https"; 903 host = "example.org"; 904 queryParams.add("hi", "bye"); 905 } 906 assert( 907 url.toString == "https://example.org/?hi=bye", 908 url.toString); 909 } 910 } 911 912 unittest { 913 // Percent decoding. 914 915 // http://#:!:@ 916 auto urlString = "http://%23:%21%3A@example.org/%7B/%7D?%3B&%26=%3D#%23hash"; 917 auto url = urlString.parseURL; 918 assert(url.user == "#"); 919 assert(url.pass == "!:"); 920 assert(url.host == "example.org"); 921 assert(url.path == "/{/}"); 922 assert(url.queryParams[";"].front == ""); 923 assert(url.queryParams["&"].front == "="); 924 assert(url.fragment == "#hash"); 925 926 // Round trip. 927 assert(urlString == urlString.parseURL.toString, urlString.parseURL.toString); 928 assert(urlString == urlString.parseURL.toString.parseURL.toString); 929 } 930 931 unittest { 932 auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL; 933 assert(url.host == "☂.☃.org", url.host); 934 } 935 936 unittest { 937 auto url = "https://☂.☃.org/?hi=bye".parseURL; 938 assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye"); 939 } 940 941 /// 942 unittest { 943 // There's an existing path. 944 auto url = parseURL("http://example.org/foo"); 945 URL url2; 946 // No slash? Assume it needs a slash. 947 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 948 // With slash? Don't add another. 949 url2 = url ~ "/bar"; 950 assert(url2.toString == "http://example.org/foo/bar", url2.toString); 951 url ~= "bar"; 952 assert(url.toString == "http://example.org/foo/bar"); 953 954 // Path already ends with a slash; don't add another. 955 url = parseURL("http://example.org/foo/"); 956 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 957 // Still don't add one even if you're appending with a slash. 958 assert((url ~ "/bar").toString == "http://example.org/foo/bar"); 959 url ~= "/bar"; 960 assert(url.toString == "http://example.org/foo/bar"); 961 962 // No path. 963 url = parseURL("http://example.org"); 964 assert((url ~ "bar").toString == "http://example.org/bar"); 965 assert((url ~ "/bar").toString == "http://example.org/bar"); 966 url ~= "bar"; 967 assert(url.toString == "http://example.org/bar"); 968 969 // Path is just a slash. 970 url = parseURL("http://example.org/"); 971 assert((url ~ "bar").toString == "http://example.org/bar"); 972 assert((url ~ "/bar").toString == "http://example.org/bar"); 973 url ~= "bar"; 974 assert(url.toString == "http://example.org/bar", url.toString); 975 976 // No path, just fragment. 977 url = "ircs://irc.freenode.com/#d".parseURL; 978 assert(url.toString == "ircs://irc.freenode.com/#d", url.toString); 979 } 980 unittest 981 { 982 // basic resolve() 983 { 984 auto base = "https://example.org/this/".parseURL; 985 assert(base.resolve("that") == "https://example.org/this/that"); 986 assert(base.resolve("/that") == "https://example.org/that"); 987 assert(base.resolve("//example.net/that") == "https://example.net/that"); 988 } 989 990 // ensure we don't preserve query params 991 { 992 auto base = "https://example.org/this?query=value&other=value2".parseURL; 993 assert(base.resolve("that") == "https://example.org/that"); 994 assert(base.resolve("/that") == "https://example.org/that"); 995 assert(base.resolve("//example.net/that") == "https://example.net/that"); 996 } 997 } 998 999 1000 unittest 1001 { 1002 import std.net.curl; 1003 auto url = "http://example.org".parseURL; 1004 assert(is(typeof(std.net.curl.get(url)))); 1005 } 1006 1007 /** 1008 * Parse the input string as a URL. 1009 * 1010 * Throws: 1011 * URLException if the string was in an incorrect format. 1012 */ 1013 URL parseURL(string value) { 1014 URL url; 1015 if (tryParseURL(value, url)) { 1016 return url; 1017 } 1018 throw new URLException("failed to parse URL " ~ value); 1019 } 1020 1021 /// 1022 unittest { 1023 { 1024 // Infer scheme 1025 auto u1 = parseURL("example.org"); 1026 assert(u1.scheme == "http"); 1027 assert(u1.host == "example.org"); 1028 assert(u1.path == ""); 1029 assert(u1.port == 80); 1030 assert(u1.providedPort == 0); 1031 assert(u1.fragment == ""); 1032 } 1033 { 1034 // Simple host and scheme 1035 auto u1 = parseURL("https://example.org"); 1036 assert(u1.scheme == "https"); 1037 assert(u1.host == "example.org"); 1038 assert(u1.path == ""); 1039 assert(u1.port == 443); 1040 assert(u1.providedPort == 0); 1041 } 1042 { 1043 // With path 1044 auto u1 = parseURL("https://example.org/foo/bar"); 1045 assert(u1.scheme == "https"); 1046 assert(u1.host == "example.org"); 1047 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 1048 assert(u1.port == 443); 1049 assert(u1.providedPort == 0); 1050 } 1051 { 1052 // With explicit port 1053 auto u1 = parseURL("https://example.org:1021/foo/bar"); 1054 assert(u1.scheme == "https"); 1055 assert(u1.host == "example.org"); 1056 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 1057 assert(u1.port == 1021); 1058 assert(u1.providedPort == 1021); 1059 } 1060 { 1061 // With user 1062 auto u1 = parseURL("https://bob:secret@example.org/foo/bar"); 1063 assert(u1.scheme == "https"); 1064 assert(u1.host == "example.org"); 1065 assert(u1.path == "/foo/bar"); 1066 assert(u1.port == 443); 1067 assert(u1.user == "bob"); 1068 assert(u1.pass == "secret"); 1069 } 1070 { 1071 // With user, URL-encoded 1072 auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar"); 1073 assert(u1.scheme == "https"); 1074 assert(u1.host == "example.org"); 1075 assert(u1.path == "/foo/bar"); 1076 assert(u1.port == 443); 1077 assert(u1.user == "bob!"); 1078 assert(u1.pass == "secret!?"); 1079 } 1080 { 1081 // With user and port and path 1082 auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar"); 1083 assert(u1.scheme == "https"); 1084 assert(u1.host == "example.org"); 1085 assert(u1.path == "/foo/bar"); 1086 assert(u1.port == 2210); 1087 assert(u1.user == "bob"); 1088 assert(u1.pass == "secret"); 1089 assert(u1.fragment == ""); 1090 } 1091 { 1092 // With query string 1093 auto u1 = parseURL("https://example.org/?login=true"); 1094 assert(u1.scheme == "https"); 1095 assert(u1.host == "example.org"); 1096 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 1097 assert(u1.queryParams["login"].front == "true"); 1098 assert(u1.fragment == ""); 1099 } 1100 { 1101 // With query string and fragment 1102 auto u1 = parseURL("https://example.org/?login=true#justkidding"); 1103 assert(u1.scheme == "https"); 1104 assert(u1.host == "example.org"); 1105 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 1106 assert(u1.queryParams["login"].front == "true"); 1107 assert(u1.fragment == "justkidding"); 1108 } 1109 { 1110 // With URL-encoded values 1111 auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E"); 1112 assert(u1.scheme == "https"); 1113 assert(u1.host == "example.org"); 1114 assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path); 1115 assert(u1.queryParams["❄"].front == "="); 1116 assert(u1.fragment == "^"); 1117 } 1118 } 1119 1120 unittest { 1121 assert(parseURL("http://example.org").port == 80); 1122 assert(parseURL("http://example.org:5326").port == 5326); 1123 1124 auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment"); 1125 assert(url.scheme == "redis"); 1126 assert(url.user == "admin"); 1127 assert(url.pass == "password"); 1128 1129 assert(parseURL("example.org").toString == "http://example.org/"); 1130 assert(parseURL("http://example.org:80").toString == "http://example.org/"); 1131 1132 assert(parseURL("localhost:8070").toString == "http://localhost:8070/"); 1133 } 1134 1135 /** 1136 * Percent-encode a string. 1137 * 1138 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1139 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1140 * everything else, there is percent encoding. 1141 */ 1142 string percentEncode(string raw) { 1143 // We *must* encode these characters: :/?#[]@!$&'()*+,;=" 1144 // We *can* encode any other characters. 1145 // We *should not* encode alpha, numeric, or -._~. 1146 import std.utf : encode; 1147 import std.array : Appender; 1148 Appender!string app; 1149 foreach (dchar d; raw) { 1150 if (('a' <= d && 'z' >= d) || 1151 ('A' <= d && 'Z' >= d) || 1152 ('0' <= d && '9' >= d) || 1153 d == '-' || d == '.' || d == '_' || d == '~') { 1154 app ~= d; 1155 continue; 1156 } 1157 // Something simple like a space character? Still in 7-bit ASCII? 1158 // Then we get a single-character string out of it and just encode 1159 // that one bit. 1160 // Something not in 7-bit ASCII? Then we percent-encode each octet 1161 // in the UTF-8 encoding (and hope the server understands UTF-8). 1162 char[] c; 1163 encode(c, d); 1164 auto bytes = cast(ubyte[])c; 1165 foreach (b; bytes) { 1166 app ~= format("%%%02X", b); 1167 } 1168 } 1169 return cast(string)app.data; 1170 } 1171 1172 /// 1173 unittest { 1174 assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding"); 1175 assert(percentEncode("~~--..__") == "~~--..__"); 1176 assert(percentEncode("0123456789") == "0123456789"); 1177 1178 string e; 1179 1180 e = percentEncode("☃"); 1181 assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e); 1182 } 1183 1184 /** 1185 * Percent-decode a string. 1186 * 1187 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1188 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1189 * everything else, there is percent encoding. 1190 * 1191 * This explicitly ensures that the result is a valid UTF-8 string. 1192 */ 1193 string percentDecode(string encoded) 1194 { 1195 import std.utf : validate, UTFException; 1196 auto raw = percentDecodeRaw(encoded); 1197 auto s = cast(string) raw; 1198 try 1199 { 1200 validate(s); 1201 } 1202 catch (UTFException e) 1203 { 1204 throw new URLException( 1205 "The percent-encoded data `" ~ encoded ~ "` does not represent a valid UTF-8 sequence."); 1206 } 1207 return s; 1208 } 1209 1210 /// 1211 unittest { 1212 assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding"); 1213 assert(percentDecode("~~--..__") == "~~--..__"); 1214 assert(percentDecode("0123456789") == "0123456789"); 1215 1216 string e; 1217 1218 e = percentDecode("%E2%98%83"); 1219 assert(e == "☃", "expected a snowman but got" ~ e); 1220 1221 e = percentDecode("%e2%98%83"); 1222 assert(e == "☃", "expected a snowman but got" ~ e); 1223 1224 try { 1225 // %ES is an invalid percent sequence: 'S' is not a hex digit. 1226 percentDecode("%es"); 1227 assert(false, "expected exception not thrown"); 1228 } catch (URLException) { 1229 } 1230 1231 try { 1232 percentDecode("%e"); 1233 assert(false, "expected exception not thrown"); 1234 } catch (URLException) { 1235 } 1236 } 1237 1238 /** 1239 * Percent-decode a string into a ubyte array. 1240 * 1241 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1242 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1243 * everything else, there is percent encoding. 1244 * 1245 * This yields a ubyte array and will not perform validation on the output. However, an improperly 1246 * formatted input string will result in a URLException. 1247 */ 1248 immutable(ubyte)[] percentDecodeRaw(string encoded) 1249 { 1250 // We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now. 1251 import std.array : Appender; 1252 Appender!(immutable(ubyte)[]) app; 1253 for (int i = 0; i < encoded.length; i++) { 1254 if (encoded[i] != '%') { 1255 app ~= encoded[i]; 1256 continue; 1257 } 1258 if (i >= encoded.length - 2) { 1259 throw new URLException("Invalid percent encoded value: expected two characters after " ~ 1260 "percent symbol. Error at index " ~ i.to!string); 1261 } 1262 if (isHex(encoded[i + 1]) && isHex(encoded[i + 2])) { 1263 auto b = fromHex(encoded[i + 1]); 1264 auto c = fromHex(encoded[i + 2]); 1265 app ~= cast(ubyte)((b << 4) | c); 1266 } else { 1267 throw new URLException("Invalid percent encoded value: expected two hex digits after " ~ 1268 "percent symbol. Error at index " ~ i.to!string); 1269 } 1270 i += 2; 1271 } 1272 return app.data; 1273 } 1274 1275 private bool isHex(char c) { 1276 return ('0' <= c && '9' >= c) || 1277 ('a' <= c && 'f' >= c) || 1278 ('A' <= c && 'F' >= c); 1279 } 1280 1281 private ubyte fromHex(char s) { 1282 enum caseDiff = 'a' - 'A'; 1283 if (s >= 'a' && s <= 'z') { 1284 s -= caseDiff; 1285 } 1286 return cast(ubyte)("0123456789ABCDEF".indexOf(s)); 1287 } 1288 1289 private string toPuny(string unicodeHostname) 1290 { 1291 if (unicodeHostname[0] == '[') 1292 { 1293 // It's an ipv6 name. 1294 return unicodeHostname; 1295 } 1296 bool mustEncode = false; 1297 foreach (i, dchar d; unicodeHostname) { 1298 auto c = cast(uint) d; 1299 if (c > 0x80) { 1300 mustEncode = true; 1301 break; 1302 } 1303 if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) { 1304 throw new URLException( 1305 format( 1306 "domain name '%s' contains illegal character '%s' at position %s", 1307 unicodeHostname, d, i)); 1308 } 1309 } 1310 if (!mustEncode) { 1311 return unicodeHostname; 1312 } 1313 import std.algorithm.iteration : map; 1314 return unicodeHostname.split('.').map!punyEncode.join("."); 1315 } 1316 1317 private string fromPuny(string hostname) 1318 { 1319 import std.algorithm.iteration : map; 1320 return hostname.split('.').map!punyDecode.join("."); 1321 } 1322 1323 private { 1324 enum delimiter = '-'; 1325 enum marker = "xn--"; 1326 enum ulong damp = 700; 1327 enum ulong tmin = 1; 1328 enum ulong tmax = 26; 1329 enum ulong skew = 38; 1330 enum ulong base = 36; 1331 enum ulong initialBias = 72; 1332 enum dchar initialN = cast(dchar)128; 1333 1334 ulong adapt(ulong delta, ulong numPoints, bool firstTime) { 1335 if (firstTime) { 1336 delta /= damp; 1337 } else { 1338 delta /= 2; 1339 } 1340 delta += delta / numPoints; 1341 ulong k = 0; 1342 while (delta > ((base - tmin) * tmax) / 2) { 1343 delta /= (base - tmin); 1344 k += base; 1345 } 1346 return k + (((base - tmin + 1) * delta) / (delta + skew)); 1347 } 1348 } 1349 1350 /** 1351 * Encode the input string using the Punycode algorithm. 1352 * 1353 * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked 1354 * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com" 1355 * in Punycode, you will get "xn--m3h.xn--n3h.com". 1356 * 1357 * In order to puny-encode a domain name, you must split it into its components. The following will 1358 * typically suffice: 1359 * --- 1360 * auto domain = "☂.☃.com"; 1361 * auto encodedDomain = domain.splitter(".").map!(punyEncode).join("."); 1362 * --- 1363 */ 1364 string punyEncode(string input) 1365 { 1366 import std.array : Appender; 1367 ulong delta = 0; 1368 dchar n = initialN; 1369 auto i = 0; 1370 auto bias = initialBias; 1371 Appender!string output; 1372 output ~= marker; 1373 auto pushed = 0; 1374 auto codePoints = 0; 1375 foreach (dchar c; input) { 1376 codePoints++; 1377 if (c <= initialN) { 1378 output ~= c; 1379 pushed++; 1380 } 1381 } 1382 if (pushed < codePoints) { 1383 if (pushed > 0) { 1384 output ~= delimiter; 1385 } 1386 } else { 1387 // No encoding to do. 1388 return input; 1389 } 1390 bool first = true; 1391 while (pushed < codePoints) { 1392 auto best = dchar.max; 1393 foreach (dchar c; input) { 1394 if (n <= c && c < best) { 1395 best = c; 1396 } 1397 } 1398 if (best == dchar.max) { 1399 throw new URLException("failed to find a new codepoint to process during punyencode"); 1400 } 1401 delta += (best - n) * (pushed + 1); 1402 if (delta > uint.max) { 1403 // TODO better error message 1404 throw new URLException("overflow during punyencode"); 1405 } 1406 n = best; 1407 foreach (dchar c; input) { 1408 if (c < n) { 1409 delta++; 1410 } 1411 if (c == n) { 1412 ulong q = delta; 1413 auto k = base; 1414 while (true) { 1415 ulong t; 1416 if (k <= bias) { 1417 t = tmin; 1418 } else if (k >= bias + tmax) { 1419 t = tmax; 1420 } else { 1421 t = k - bias; 1422 } 1423 if (q < t) { 1424 break; 1425 } 1426 output ~= digitToBasic(t + ((q - t) % (base - t))); 1427 q = (q - t) / (base - t); 1428 k += base; 1429 } 1430 output ~= digitToBasic(q); 1431 pushed++; 1432 bias = adapt(delta, pushed, first); 1433 first = false; 1434 delta = 0; 1435 } 1436 } 1437 delta++; 1438 n++; 1439 } 1440 return cast(string)output.data; 1441 } 1442 1443 /** 1444 * Decode the input string using the Punycode algorithm. 1445 * 1446 * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked 1447 * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com" 1448 * in Punycode, you will get "xn--m3h.xn--n3h.com". 1449 * 1450 * In order to puny-decode a domain name, you must split it into its components. The following will 1451 * typically suffice: 1452 * --- 1453 * auto domain = "xn--m3h.xn--n3h.com"; 1454 * auto decodedDomain = domain.splitter(".").map!(punyDecode).join("."); 1455 * --- 1456 */ 1457 string punyDecode(string input) { 1458 if (!input.startsWith(marker)) { 1459 return input; 1460 } 1461 input = input[marker.length..$]; 1462 1463 // let n = initial_n 1464 dchar n = cast(dchar)128; 1465 1466 // let i = 0 1467 // let bias = initial_bias 1468 // let output = an empty string indexed from 0 1469 size_t i = 0; 1470 auto bias = initialBias; 1471 dchar[] output; 1472 // This reserves a bit more than necessary, but it should be more efficient overall than just 1473 // appending and inserting volo-nolo. 1474 output.reserve(input.length); 1475 1476 // consume all code points before the last delimiter (if there is one) 1477 // and copy them to output, fail on any non-basic code point 1478 // if more than zero code points were consumed then consume one more 1479 // (which will be the last delimiter) 1480 auto end = input.lastIndexOf(delimiter); 1481 if (end > -1) { 1482 foreach (dchar c; input[0..end]) { 1483 output ~= c; 1484 } 1485 input = input[end+1 .. $]; 1486 } 1487 1488 // while the input is not exhausted do begin 1489 size_t pos = 0; 1490 while (pos < input.length) { 1491 // let oldi = i 1492 // let w = 1 1493 auto oldi = i; 1494 auto w = 1; 1495 // for k = base to infinity in steps of base do begin 1496 for (ulong k = base; k < uint.max; k += base) { 1497 // consume a code point, or fail if there was none to consume 1498 // Note that the input is all ASCII, so we can simply index the input string bytewise. 1499 auto c = input[pos]; 1500 pos++; 1501 // let digit = the code point's digit-value, fail if it has none 1502 auto digit = basicToDigit(c); 1503 // let i = i + digit * w, fail on overflow 1504 i += digit * w; 1505 // let t = tmin if k <= bias {+ tmin}, or 1506 // tmax if k >= bias + tmax, or k - bias otherwise 1507 ulong t; 1508 if (k <= bias) { 1509 t = tmin; 1510 } else if (k >= bias + tmax) { 1511 t = tmax; 1512 } else { 1513 t = k - bias; 1514 } 1515 // if digit < t then break 1516 if (digit < t) { 1517 break; 1518 } 1519 // let w = w * (base - t), fail on overflow 1520 w *= (base - t); 1521 // end 1522 } 1523 // let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?) 1524 bias = adapt(i - oldi, output.length + 1, oldi == 0); 1525 // let n = n + i div (length(output) + 1), fail on overflow 1526 n += i / (output.length + 1); 1527 // let i = i mod (length(output) + 1) 1528 i %= (output.length + 1); 1529 // {if n is a basic code point then fail} 1530 // (We aren't actually going to fail here; it's clear what this means.) 1531 // insert n into output at position i 1532 import std.array : insertInPlace; 1533 (() @trusted { output.insertInPlace(i, cast(dchar)n); })(); // should be @safe but isn't marked 1534 // increment i 1535 i++; 1536 // end 1537 } 1538 return output.to!string; 1539 } 1540 1541 // Lifted from punycode.js. 1542 private dchar digitToBasic(ulong digit) { 1543 return cast(dchar)(digit + 22 + 75 * (digit < 26)); 1544 } 1545 1546 // Lifted from punycode.js. 1547 private uint basicToDigit(char c) { 1548 auto codePoint = cast(uint)c; 1549 if (codePoint - 48 < 10) { 1550 return codePoint - 22; 1551 } 1552 if (codePoint - 65 < 26) { 1553 return codePoint - 65; 1554 } 1555 if (codePoint - 97 < 26) { 1556 return codePoint - 97; 1557 } 1558 return base; 1559 } 1560 1561 unittest { 1562 { 1563 auto a = "b\u00FCcher"; 1564 assert(punyEncode(a) == "xn--bcher-kva"); 1565 } 1566 { 1567 auto a = "b\u00FCc\u00FCher"; 1568 assert(punyEncode(a) == "xn--bcher-kvab"); 1569 } 1570 { 1571 auto a = "ýbücher"; 1572 auto b = punyEncode(a); 1573 assert(b == "xn--bcher-kvaf", b); 1574 } 1575 1576 { 1577 auto a = "mañana"; 1578 assert(punyEncode(a) == "xn--maana-pta"); 1579 } 1580 1581 { 1582 auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" 1583 ~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F"; 1584 auto b = punyEncode(a); 1585 assert(b == "xn--egbpdaj6bu4bxfgehfvwxn", b); 1586 } 1587 import std.stdio; 1588 } 1589 1590 unittest { 1591 { 1592 auto b = punyDecode("xn--egbpdaj6bu4bxfgehfvwxn"); 1593 assert(b == "ليهمابتكلموشعربي؟", b); 1594 } 1595 { 1596 assert(punyDecode("xn--maana-pta") == "mañana"); 1597 } 1598 } 1599 1600 unittest { 1601 import std.string, std.algorithm, std.array, std.range; 1602 { 1603 auto domain = "xn--m3h.xn--n3h.com"; 1604 auto decodedDomain = domain.splitter(".").map!(punyDecode).join("."); 1605 assert(decodedDomain == "☂.☃.com", decodedDomain); 1606 } 1607 { 1608 auto domain = "☂.☃.com"; 1609 auto decodedDomain = domain.splitter(".").map!(punyEncode).join("."); 1610 assert(decodedDomain == "xn--m3h.xn--n3h.com", decodedDomain); 1611 } 1612 } 1613