1 /** 2 * A URL handling library. 3 * 4 * URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional 5 * elements like port, path, username, and password. 6 * 7 * This module aims to make it simple to muck about with them. 8 * 9 * Example usage: 10 * --- 11 * auto url = "ssh://me:password@192.168.0.8/".parseURL; 12 * auto files = system("ssh", url.toString, "ls").splitLines; 13 * foreach (file; files) { 14 * auto fileURL = url; 15 * fileURL.path = file; 16 * system("scp", fileURL.toString, "."); 17 * } 18 * --- 19 */ 20 module url; 21 22 import std.algorithm; 23 import std.array; 24 import std.conv; 25 import std.encoding; 26 import std.string; 27 import std.utf; 28 29 /// An exception thrown when something bad happens with URLs. 30 class URLException : Exception { 31 this(string msg) { super(msg); } 32 } 33 34 /** 35 * A mapping from schemes to their default ports. 36 * 37 * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to 38 * use even if they use ports. Entries here should be treated as best guesses. 39 */ 40 ushort[string] schemeToDefaultPort; 41 42 static this() { 43 schemeToDefaultPort = [ 44 "aaa": 3868, 45 "aaas": 5658, 46 "acap": 674, 47 "cap": 1026, 48 "coap": 5683, 49 "coaps": 5684, 50 "dav": 443, 51 "dict": 2628, 52 "ftp": 21, 53 "git": 9418, 54 "go": 1096, 55 "gopher": 70, 56 "http": 80, 57 "https": 443, 58 "iac": 4569, 59 "icap": 1344, 60 "imap": 143, 61 "ipp": 631, 62 "ipps": 631, // yes, they're both mapped to port 631 63 "irc": 6667, // De facto default port, not the IANA reserved port. 64 "ircs": 6697, 65 "iris": 702, // defaults to iris.beep 66 "iris.beep": 702, 67 "iris.lwz": 715, 68 "iris.xpc": 713, 69 "iris.xpcs": 714, 70 "jabber": 5222, // client-to-server 71 "ldap": 389, 72 "ldaps": 636, 73 "msrp": 2855, 74 "msrps": 2855, 75 "mtqp": 1038, 76 "mupdate": 3905, 77 "news": 119, 78 "nfs": 2049, 79 "pop": 110, 80 "redis": 6379, 81 "reload": 6084, 82 "rsync": 873, 83 "rtmfp": 1935, 84 "rtsp": 554, 85 "shttp": 80, 86 "sieve": 4190, 87 "sip": 5060, 88 "sips": 5061, 89 "smb": 445, 90 "smtp": 25, 91 "snews": 563, 92 "snmp": 161, 93 "soap.beep": 605, 94 "ssh": 22, 95 "stun": 3478, 96 "stuns": 5349, 97 "svn": 3690, 98 "teamspeak": 9987, 99 "telnet": 23, 100 "tftp": 69, 101 "tip": 3372, 102 ]; 103 } 104 105 /** 106 * A Unique Resource Locator. 107 * 108 * The syntax for URLs is scheme:[//[user:password@]host[:port]][/]path[?query][#fragment]. 109 * 110 */ 111 struct URL { 112 /// The URL scheme. For instance, ssh, ftp, or https. 113 string scheme; 114 115 /// The username in this URL. Usually absent. If present, there will also be a password. 116 string user; 117 118 /// The password in this URL. Usually absent. 119 string pass; 120 121 /// The hostname. 122 string host; 123 124 /// The port. 125 /// This is inferred from the scheme if it isn't present in the URL itself. 126 /// If the scheme is not known and the port is not present, the port will be given as 0. 127 /// For some schemes, port will not be sensible -- for instance, file or chrome-extension. 128 /// 129 /// If you explicitly need to detect whether the user provided a port, check the providedPort 130 /// field. 131 @property ushort port() { 132 if (providedPort != 0) { 133 return providedPort; 134 } 135 if (auto p = scheme in schemeToDefaultPort) { 136 return *p; 137 } 138 return 0; 139 } 140 141 /// Set the port. 142 /// This is a shortcut for convenience because you probably don't care about the difference 143 /// between port and providedPort. 144 @property ushort port(ushort value) { 145 return providedPort = value; 146 } 147 148 /// The port that was explicitly provided in the URL. 149 /// 150 ushort providedPort; 151 152 /// The path. This excludes the query string. 153 /// For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is 154 /// "/news/story/17774". 155 string path; 156 157 /// The query string elements. 158 /// For instance, in the URL https://cnn.com/news/story/17774?visited=false, the query string 159 /// elements will be ["visited": "false"]. 160 /// Similarly, in the URL https://bbc.co.uk/news?item, the query string elements will be 161 /// ["item": ""]. 162 /// 163 /// This field is mutable. (There is no alternative in this case.) So be cautious. 164 string[string] query; 165 166 /// The fragment. In web documents, this typically refers to an anchor element. 167 /// For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2". 168 string fragment; 169 170 /// Convert this URL to a string. 171 /// The string is properly formatted and usable for, eg, a web request. 172 string toString() { 173 Appender!string s; 174 s ~= scheme; 175 s ~= "://"; 176 if (user) { 177 s ~= user.percentEncode; 178 s ~= ":"; 179 s ~= pass.percentEncode; 180 s ~= "@"; 181 } 182 s ~= host; 183 if (providedPort) { 184 if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) { 185 s ~= ":"; 186 s ~= providedPort.to!string; 187 } 188 } 189 string p = path; 190 if (p.length == 0) { 191 s ~= '/'; 192 } else { 193 if (p[0] == '/') { 194 p = p[1..$]; 195 } 196 foreach (part; p.split('/')) { 197 s ~= '/'; 198 s ~= part.percentEncode; 199 } 200 } 201 if (query) { 202 s ~= '?'; 203 bool first = true; 204 foreach (k, v; query) { 205 if (!first) { 206 s ~= '&'; 207 } 208 first = false; 209 s ~= k.percentEncode; 210 if (v) { 211 s ~= '='; 212 s ~= v.percentEncode; 213 } 214 } 215 } 216 if (fragment) { 217 s ~= '#'; 218 s ~= fragment.percentEncode; 219 } 220 return s.data; 221 } 222 223 /** 224 * The append operator (~). 225 * 226 * The append operator for URLs returns a new URL with the given string appended as a path 227 * element to the URL's path. It only adds new path elements (or sequences of path elements). 228 * 229 * Don't worry about path separators; whether you include them or not, it will just work. 230 * 231 * Query elements are copied. 232 * 233 * Examples: 234 * --- 235 * auto random = "http://testdata.org/random".parseURL; 236 * auto randInt = random ~ "int"; 237 * writeln(randInt); // prints "http://testdata.org/random/int" 238 * --- 239 */ 240 URL opBinary(string op : "~")(string subsequentPath) { 241 URL other = this; 242 other ~= subsequentPath; 243 if (query) { 244 other.query = other.query.dup; 245 } 246 return other; 247 } 248 249 /** 250 * The append-in-place operator (~=). 251 * 252 * The append operator for URLs adds a path element to this URL. It only adds new path elements 253 * (or sequences of path elements). 254 * 255 * Don't worry about path separators; whether you include them or not, it will just work. 256 * 257 * Examples: 258 * --- 259 * auto random = "http://testdata.org/random".parseURL; 260 * random ~= "int"; 261 * writeln(random); // prints "http://testdata.org/random/int" 262 * --- 263 */ 264 URL opOpAssign(string op : "~")(string subsequentPath) { 265 if (path.endsWith("/") || subsequentPath.startsWith("/")) { 266 if (path.endsWith("/") && subsequentPath.startsWith("/")) { 267 path ~= subsequentPath[1..$]; 268 } else { 269 path ~= subsequentPath; 270 } 271 } else { 272 path ~= '/'; 273 path ~= subsequentPath; 274 } 275 return this; 276 } 277 } 278 279 /** 280 * Parse a URL from a string. 281 * 282 * This attempts to parse a wide range of URLs as people might actually type them. Some mistakes 283 * may be made. However, any URL in a correct format will be parsed correctly. 284 * 285 * Punycode is not supported. 286 */ 287 bool tryParseURL(string value, out URL url) { 288 url = URL.init; 289 // scheme:[//[user:password@]host[:port]][/]path[?query][#fragment] 290 // Scheme is optional in common use. We infer 'http' if it's not given. 291 auto i = value.indexOf("://"); 292 if (i > -1) { 293 url.scheme = value[0..i]; 294 value = value[i+3 .. $]; 295 } else { 296 url.scheme = "http"; 297 } 298 // [user:password@]host[:port]][/]path[?query][#fragment 299 i = value.indexOfAny([':', '/']); 300 if (i == -1) { 301 // Just a hostname. 302 url.host = value; 303 return true; 304 } 305 306 if (value[i] == ':') { 307 // This could be between username and password, or it could be between host and port. 308 auto j = value.indexOfAny(['@', '/']); 309 if (j > -1 && value[j] == '@') { 310 try { 311 url.user = value[0..i].percentDecode; 312 url.pass = value[i+1 .. j].percentDecode; 313 } catch (URLException) { 314 return false; 315 } 316 value = value[j+1 .. $]; 317 } 318 } 319 320 // It's trying to be a host/port, not a user/pass. 321 i = value.indexOfAny([':', '/']); 322 if (i == -1) { 323 url.host = value; 324 return true; 325 } 326 url.host = value[0..i]; 327 value = value[i .. $]; 328 if (value[0] == ':') { 329 auto end = value.indexOf('/'); 330 if (end == -1) { 331 end = value.length; 332 } 333 try { 334 url.port = value[1 .. end].to!ushort; 335 } catch (ConvException) { 336 return false; 337 } 338 value = value[end .. $]; 339 if (value.length == 0) { 340 return true; 341 } 342 } 343 344 i = value.indexOfAny("?#"); 345 if (i == -1) { 346 url.path = value; 347 return true; 348 } 349 350 try { 351 url.path = value[0..i].percentDecode; 352 } catch (URLException) { 353 return false; 354 } 355 auto c = value[i]; 356 value = value[i + 1 .. $]; 357 if (c == '?') { 358 i = value.indexOf('#'); 359 string query; 360 if (i < 0) { 361 query = value; 362 value = null; 363 } else { 364 query = value[0..i]; 365 value = value[i + 1 .. $]; 366 } 367 auto queries = query.split('&'); 368 foreach (q; queries) { 369 auto j = q.indexOf('='); 370 try { 371 if (j == -1) { 372 url.query[q.percentDecode] = ""; 373 } else { 374 url.query[q[0..j].percentDecode] = q[j + 1 .. $].percentDecode; 375 } 376 } catch (URLException) { 377 return false; 378 } 379 } 380 } 381 382 try { 383 url.fragment = value.percentDecode; 384 } catch (URLException) { 385 return false; 386 } 387 388 return true; 389 } 390 391 /// 392 unittest { 393 { 394 // Basic. 395 URL url; 396 with (url) { 397 scheme = "https"; 398 host = "example.org"; 399 path = "/foo/bar"; 400 query["hello"] = "world"; 401 query["gibe"] = "clay"; 402 fragment = "frag"; 403 } 404 assert( 405 // Not sure what order it'll come out in. 406 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 407 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 408 url.toString); 409 } 410 { 411 // Percent encoded. 412 URL url; 413 with (url) { 414 scheme = "https"; 415 host = "example.org"; 416 path = "/f☃o"; 417 query["❄"] = "❀"; 418 query["["] = "]"; 419 fragment = "ş"; 420 } 421 assert( 422 // Not sure what order it'll come out in. 423 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 424 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 425 url.toString); 426 } 427 { 428 // Port, user, pass. 429 URL url; 430 with (url) { 431 scheme = "https"; 432 host = "example.org"; 433 user = "dhasenan"; 434 pass = "itsasecret"; 435 port = 17; 436 } 437 assert( 438 url.toString == "https://dhasenan:itsasecret@example.org:17/", 439 url.toString); 440 } 441 { 442 // Query with no path. 443 URL url; 444 with (url) { 445 scheme = "https"; 446 host = "example.org"; 447 query["hi"] = "bye"; 448 } 449 assert( 450 url.toString == "https://example.org/?hi=bye", 451 url.toString); 452 } 453 } 454 455 /// 456 unittest { 457 // There's an existing path. 458 auto url = parseURL("http://example.org/foo"); 459 // No slash? Assume it needs a slash. 460 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 461 // With slash? Don't add another. 462 assert((url ~ "/bar").toString == "http://example.org/foo/bar"); 463 url ~= "bar"; 464 assert(url.toString == "http://example.org/foo/bar"); 465 466 // Path already ends with a slash; don't add another. 467 url = parseURL("http://example.org/foo/"); 468 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 469 // Still don't add one even if you're appending with a slash. 470 assert((url ~ "/bar").toString == "http://example.org/foo/bar"); 471 url ~= "/bar"; 472 assert(url.toString == "http://example.org/foo/bar"); 473 474 // No path. 475 url = parseURL("http://example.org"); 476 assert((url ~ "bar").toString == "http://example.org/bar"); 477 assert((url ~ "/bar").toString == "http://example.org/bar"); 478 url ~= "bar"; 479 assert(url.toString == "http://example.org/bar"); 480 481 // Path is just a slash. 482 url = parseURL("http://example.org/"); 483 assert((url ~ "bar").toString == "http://example.org/bar"); 484 assert((url ~ "/bar").toString == "http://example.org/bar"); 485 url ~= "bar"; 486 assert(url.toString == "http://example.org/bar", url.toString); 487 } 488 489 /** 490 * Parse the input string as a URL. 491 * 492 * Throws: 493 * URLException if the string was in an incorrect format. 494 */ 495 URL parseURL(string value) { 496 URL url; 497 if (tryParseURL(value, url)) { 498 return url; 499 } 500 throw new URLException("failed to parse URL " ~ value); 501 } 502 503 /// 504 unittest { 505 { 506 // Infer scheme 507 auto u1 = parseURL("example.org"); 508 assert(u1.scheme == "http"); 509 assert(u1.host == "example.org"); 510 assert(u1.path == ""); 511 assert(u1.port == 80); 512 assert(u1.providedPort == 0); 513 assert(u1.fragment == ""); 514 } 515 { 516 // Simple host and scheme 517 auto u1 = parseURL("https://example.org"); 518 assert(u1.scheme == "https"); 519 assert(u1.host == "example.org"); 520 assert(u1.path == ""); 521 assert(u1.port == 443); 522 assert(u1.providedPort == 0); 523 } 524 { 525 // With path 526 auto u1 = parseURL("https://example.org/foo/bar"); 527 assert(u1.scheme == "https"); 528 assert(u1.host == "example.org"); 529 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 530 assert(u1.port == 443); 531 assert(u1.providedPort == 0); 532 } 533 { 534 // With explicit port 535 auto u1 = parseURL("https://example.org:1021/foo/bar"); 536 assert(u1.scheme == "https"); 537 assert(u1.host == "example.org"); 538 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 539 assert(u1.port == 1021); 540 assert(u1.providedPort == 1021); 541 } 542 { 543 // With user 544 auto u1 = parseURL("https://bob:secret@example.org/foo/bar"); 545 assert(u1.scheme == "https"); 546 assert(u1.host == "example.org"); 547 assert(u1.path == "/foo/bar"); 548 assert(u1.port == 443); 549 assert(u1.user == "bob"); 550 assert(u1.pass == "secret"); 551 } 552 { 553 // With user, URL-encoded 554 auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar"); 555 assert(u1.scheme == "https"); 556 assert(u1.host == "example.org"); 557 assert(u1.path == "/foo/bar"); 558 assert(u1.port == 443); 559 assert(u1.user == "bob!"); 560 assert(u1.pass == "secret!?"); 561 } 562 { 563 // With user and port and path 564 auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar"); 565 assert(u1.scheme == "https"); 566 assert(u1.host == "example.org"); 567 assert(u1.path == "/foo/bar"); 568 assert(u1.port == 2210); 569 assert(u1.user == "bob"); 570 assert(u1.pass == "secret"); 571 assert(u1.fragment == ""); 572 } 573 { 574 // With query string 575 auto u1 = parseURL("https://example.org/?login=true"); 576 assert(u1.scheme == "https"); 577 assert(u1.host == "example.org"); 578 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 579 assert(u1.query["login"] == "true"); 580 assert(u1.fragment == ""); 581 } 582 { 583 // With query string and fragment 584 auto u1 = parseURL("https://example.org/?login=true#justkidding"); 585 assert(u1.scheme == "https"); 586 assert(u1.host == "example.org"); 587 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 588 assert(u1.query["login"] == "true"); 589 assert(u1.fragment == "justkidding"); 590 } 591 { 592 // With URL-encoded values 593 auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E"); 594 assert(u1.scheme == "https"); 595 assert(u1.host == "example.org"); 596 assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path); 597 assert(u1.query["❄"] == "="); 598 assert(u1.fragment == "^"); 599 } 600 } 601 602 unittest { 603 assert(parseURL("http://example.org").port == 80); 604 assert(parseURL("http://example.org:5326").port == 5326); 605 606 auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment"); 607 assert(url.scheme == "redis"); 608 assert(url.user == "admin"); 609 assert(url.pass == "password"); 610 611 assert(parseURL("example.org").toString == "http://example.org/"); 612 assert(parseURL("http://example.org:80").toString == "http://example.org/"); 613 614 assert(parseURL("localhost:8070").toString == "http://localhost:8070/"); 615 } 616 617 /** 618 * Percent-encode a string. 619 * 620 * URL components cannot contain non-ASCII characters, and there are very few characters that are 621 * safe to include as URL components. Domain names using Unicode values use Punycode. For 622 * everything else, there is percent encoding. 623 */ 624 string percentEncode(string raw) { 625 // We *must* encode these characters: :/?#[]@!$&'()*+,;=" 626 // We *can* encode any other characters. 627 // We *should not* encode alpha, numeric, or -._~. 628 Appender!string app; 629 foreach (dchar d; raw) { 630 if (('a' <= d && 'z' >= d) || 631 ('A' <= d && 'Z' >= d) || 632 ('0' <= d && '9' >= d) || 633 d == '-' || d == '.' || d == '_' || d == '~') { 634 app ~= d; 635 continue; 636 } 637 // Something simple like a space character? Still in 7-bit ASCII? 638 // Then we get a single-character string out of it and just encode 639 // that one bit. 640 // Something not in 7-bit ASCII? Then we percent-encode each octet 641 // in the UTF-8 encoding (and hope the server understands UTF-8). 642 char[] c; 643 encode(c, d); 644 auto bytes = cast(ubyte[])c; 645 foreach (b; bytes) { 646 app ~= format("%%%02X", b); 647 } 648 } 649 return cast(string)app.data; 650 } 651 652 /// 653 unittest { 654 assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding"); 655 assert(percentEncode("~~--..__") == "~~--..__"); 656 assert(percentEncode("0123456789") == "0123456789"); 657 658 string e; 659 660 e = percentEncode("☃"); 661 assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e); 662 } 663 664 /** 665 * Percent-decode a string. 666 * 667 * URL components cannot contain non-ASCII characters, and there are very few characters that are 668 * safe to include as URL components. Domain names using Unicode values use Punycode. For 669 * everything else, there is percent encoding. 670 * 671 * This explicitly ensures that the result is a valid UTF-8 string. 672 */ 673 string percentDecode(string encoded) { 674 ubyte[] raw = percentDecodeRaw(encoded); 675 auto s = cast(string) raw; 676 if (!s.isValid) { 677 // TODO(dhasenan): 678 throw new URLException("input contains invalid UTF data"); 679 } 680 return s; 681 } 682 683 /// 684 unittest { 685 assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding"); 686 assert(percentDecode("~~--..__") == "~~--..__"); 687 assert(percentDecode("0123456789") == "0123456789"); 688 689 string e; 690 691 e = percentDecode("%E2%98%83"); 692 assert(e == "☃", "expected a snowman but got" ~ e); 693 } 694 695 /** 696 * Percent-decode a string into a ubyte array. 697 * 698 * URL components cannot contain non-ASCII characters, and there are very few characters that are 699 * safe to include as URL components. Domain names using Unicode values use Punycode. For 700 * everything else, there is percent encoding. 701 * 702 * This yields a ubyte array and will not perform validation on the output. However, an improperly 703 * formatted input string will result in a URLException. 704 */ 705 ubyte[] percentDecodeRaw(string encoded) { 706 // We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now. 707 Appender!(ubyte[]) app; 708 for (int i = 0; i < encoded.length; i++) { 709 if (encoded[i] != '%') { 710 app ~= encoded[i]; 711 continue; 712 } 713 if (i >= encoded.length - 2) { 714 throw new URLException("Invalid percent encoded value: expected two characters after " ~ 715 "percent symbol. Error at index " ~ i.to!string); 716 } 717 auto b = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 1])); 718 auto c = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 2])); 719 app ~= cast(ubyte)((b << 4) | c); 720 i += 2; 721 } 722 return app.data; 723 } 724 725 /++ 726 string toAscii(string unicodeHostname) { 727 bool mustEncode = false; 728 foreach (i, dchar d; unicodeHostname) { 729 auto c = cast(uint) d; 730 if (c > 0x80) { 731 mustEncode = true; 732 break; 733 } 734 if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) { 735 throw new URLException( 736 format( 737 "domain name '%s' contains illegal character '%s' at position %s", 738 unicodeHostname, d, i)); 739 } 740 } 741 if (!mustEncode) { 742 return unicodeHostname; 743 } 744 auto parts = unicodeHostname.split('.'); 745 char[] result; 746 foreach (part; parts) { 747 result ~= punyEncode(part); 748 } 749 return cast(string)result; 750 } 751 752 string punyEncode(string item, string delimiter = null, string marker = null) { 753 // Puny state machine initial variables. 754 auto base = 36; 755 auto tmin = 1; 756 auto tmax = 26; 757 auto skew = 38; 758 auto damp = 700; 759 auto initialBias = 72; 760 long b = 0; 761 762 bool needToEncode = false; 763 Appender!(char[]) app; 764 app ~= marker; 765 foreach (dchar d; item) { 766 if (d > '~') { // Max printable ASCII. The DEL char isn't allowed in hostnames. 767 needToEncode = true; 768 } else { 769 app ~= d; 770 b++; 771 } 772 } 773 if (!needToEncode) { 774 return item; 775 } 776 app ~= delimiter; 777 778 // The puny algorithm. 779 // We use 64-bit arithmetic to avoid overflow issues -- unicode only defines up to 0x10FFFF, 780 // and we won't be encoding gigabytes of data, but just to be safe. 781 // Also we use signed values just to make things easier. 782 long delta = 0; 783 long bias = initialBias; 784 long h = b; 785 long lastIndex = 0; 786 787 dchar digitToBasic(ulong digit) { 788 if (digit < 26) { 789 return 'a' + cast(dchar)digit; 790 } 791 return cast(dchar)('0' + (digit - 26)); 792 } 793 794 ulong adapt(ulong delta, ulong numPoints, bool firstTime) { 795 auto k = 0; 796 delta = firstTime ? (delta / damp) : delta >> 1; 797 delta += (delta / numPoints); 798 for (; delta > (base - tmin) * tmax >> 1; k += base) { 799 delta = (delta / (base - tmin)); 800 } 801 return k + (base - tmin + 1) * delta / (delta + skew); 802 } 803 804 auto f = filter!(x => x >= cast(dchar)128)(item).array; 805 auto uniqueChars = uniq(std.algorithm.sorting.sort(f)); 806 foreach (dchar n; uniqueChars) { 807 foreach (dchar c; item) { 808 if (c < n) { 809 delta++; 810 } else if (c == n) { 811 auto q = delta; 812 for (ulong k = 0; k < cast(ulong)uint.max; k += base) { 813 auto t = k <= bias ? tmin : (k >= bias + tmax ? tmax : k - bias); 814 if (q < t) { 815 break; 816 } 817 app ~= digitToBasic(t + ((q - t) % (base - t))); 818 q = (q - t) / (base - t); 819 } 820 app ~= digitToBasic(q); 821 bias = adapt(delta, h + 1, h == b); 822 h++; 823 } 824 } 825 delta++; 826 } 827 return cast(string)app.data; 828 } 829 830 unittest { 831 import std.stdio; 832 auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" 833 ~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F"; 834 writeln(a); 835 writeln(punyEncode(a)); 836 assert(punyEncode(a) == "egbpdaj6bu4bxfgehfvwxn"); 837 } 838 839 struct URL { 840 Host host; 841 } 842 ++/