1 /** 2 * A URL handling library. 3 * 4 * URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional 5 * elements like port, path, username, and password. 6 * 7 * This module aims to make it simple to muck about with them. 8 * 9 * Example usage: 10 * --- 11 * auto url = "ssh://me:password@192.168.0.8/".parseURL; 12 * auto files = system("ssh", url.toString, "ls").splitLines; 13 * foreach (file; files) { 14 * system("scp", url ~ file, "."); 15 * } 16 * --- 17 * 18 * License: The MIT license. 19 */ 20 module url; 21 22 import std.algorithm; 23 import std.array; 24 import std.conv; 25 import std.encoding; 26 import std.string; 27 import std.utf; 28 29 /// An exception thrown when something bad happens with URLs. 30 class URLException : Exception { 31 this(string msg) { super(msg); } 32 } 33 34 /** 35 * A mapping from schemes to their default ports. 36 * 37 * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to 38 * use even if they use ports. Entries here should be treated as best guesses. 39 */ 40 ushort[string] schemeToDefaultPort; 41 42 static this() { 43 schemeToDefaultPort = [ 44 "aaa": 3868, 45 "aaas": 5658, 46 "acap": 674, 47 "cap": 1026, 48 "coap": 5683, 49 "coaps": 5684, 50 "dav": 443, 51 "dict": 2628, 52 "ftp": 21, 53 "git": 9418, 54 "go": 1096, 55 "gopher": 70, 56 "http": 80, 57 "https": 443, 58 "iac": 4569, 59 "icap": 1344, 60 "imap": 143, 61 "ipp": 631, 62 "ipps": 631, // yes, they're both mapped to port 631 63 "irc": 6667, // De facto default port, not the IANA reserved port. 64 "ircs": 6697, 65 "iris": 702, // defaults to iris.beep 66 "iris.beep": 702, 67 "iris.lwz": 715, 68 "iris.xpc": 713, 69 "iris.xpcs": 714, 70 "jabber": 5222, // client-to-server 71 "ldap": 389, 72 "ldaps": 636, 73 "msrp": 2855, 74 "msrps": 2855, 75 "mtqp": 1038, 76 "mupdate": 3905, 77 "news": 119, 78 "nfs": 2049, 79 "pop": 110, 80 "redis": 6379, 81 "reload": 6084, 82 "rsync": 873, 83 "rtmfp": 1935, 84 "rtsp": 554, 85 "shttp": 80, 86 "sieve": 4190, 87 "sip": 5060, 88 "sips": 5061, 89 "smb": 445, 90 "smtp": 25, 91 "snews": 563, 92 "snmp": 161, 93 "soap.beep": 605, 94 "ssh": 22, 95 "stun": 3478, 96 "stuns": 5349, 97 "svn": 3690, 98 "teamspeak": 9987, 99 "telnet": 23, 100 "tftp": 69, 101 "tip": 3372, 102 ]; 103 } 104 105 /** 106 * A Unique Resource Locator. 107 * 108 * URLs can be parsed (see parseURL) and implicitly convert to strings. 109 */ 110 struct URL { 111 /// The URL scheme. For instance, ssh, ftp, or https. 112 string scheme; 113 114 /// The username in this URL. Usually absent. If present, there will also be a password. 115 string user; 116 117 /// The password in this URL. Usually absent. 118 string pass; 119 120 /// The hostname. 121 string host; 122 123 /** 124 * The port. 125 * 126 * This is inferred from the scheme if it isn't present in the URL itself. 127 * If the scheme is not known and the port is not present, the port will be given as 0. 128 * For some schemes, port will not be sensible -- for instance, file or chrome-extension. 129 * 130 * If you explicitly need to detect whether the user provided a port, check the providedPort 131 * field. 132 */ 133 @property ushort port() { 134 if (providedPort != 0) { 135 return providedPort; 136 } 137 if (auto p = scheme in schemeToDefaultPort) { 138 return *p; 139 } 140 return 0; 141 } 142 143 /** 144 * Set the port. 145 * 146 * This sets the providedPort field and is provided for convenience. 147 */ 148 @property ushort port(ushort value) { 149 return providedPort = value; 150 } 151 152 /// The port that was explicitly provided in the URL. 153 ushort providedPort; 154 155 /** 156 * The path. 157 * 158 * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is 159 * "/news/story/17774". 160 */ 161 string path; 162 163 /** 164 * The query string elements. 165 * 166 * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the query string 167 * elements will be ["visited": "false"]. 168 * 169 * Similarly, in the URL https://bbc.co.uk/news?item, the query string elements will be 170 * ["item": ""]. 171 * 172 * This field is mutable, so be cautious. 173 */ 174 string[string] query; 175 176 /** 177 * The fragment. In web documents, this typically refers to an anchor element. 178 * For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2". 179 */ 180 string fragment; 181 182 /** 183 * Convert this URL to a string. 184 * The string is properly formatted and usable for, eg, a web request. 185 */ 186 string toString() { 187 Appender!string s; 188 s ~= scheme; 189 s ~= "://"; 190 if (user) { 191 s ~= user.percentEncode; 192 s ~= ":"; 193 s ~= pass.percentEncode; 194 s ~= "@"; 195 } 196 s ~= host; 197 if (providedPort) { 198 if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) { 199 s ~= ":"; 200 s ~= providedPort.to!string; 201 } 202 } 203 string p = path; 204 if (p.length == 0) { 205 s ~= '/'; 206 } else { 207 if (p[0] == '/') { 208 p = p[1..$]; 209 } 210 foreach (part; p.split('/')) { 211 s ~= '/'; 212 s ~= part.percentEncode; 213 } 214 } 215 if (query) { 216 s ~= '?'; 217 bool first = true; 218 foreach (k, v; query) { 219 if (!first) { 220 s ~= '&'; 221 } 222 first = false; 223 s ~= k.percentEncode; 224 if (v) { 225 s ~= '='; 226 s ~= v.percentEncode; 227 } 228 } 229 } 230 if (fragment) { 231 s ~= '#'; 232 s ~= fragment.percentEncode; 233 } 234 return s.data; 235 } 236 237 /// Implicitly convert URLs to strings. 238 alias toString this; 239 240 /** 241 * The append operator (~). 242 * 243 * The append operator for URLs returns a new URL with the given string appended as a path 244 * element to the URL's path. It only adds new path elements (or sequences of path elements). 245 * 246 * Don't worry about path separators; whether you include them or not, it will just work. 247 * 248 * Query elements are copied. 249 * 250 * Examples: 251 * --- 252 * auto random = "http://testdata.org/random".parseURL; 253 * auto randInt = random ~ "int"; 254 * writeln(randInt); // prints "http://testdata.org/random/int" 255 * --- 256 */ 257 URL opBinary(string op : "~")(string subsequentPath) { 258 URL other = this; 259 other ~= subsequentPath; 260 if (query) { 261 other.query = other.query.dup; 262 } 263 return other; 264 } 265 266 /** 267 * The append-in-place operator (~=). 268 * 269 * The append operator for URLs adds a path element to this URL. It only adds new path elements 270 * (or sequences of path elements). 271 * 272 * Don't worry about path separators; whether you include them or not, it will just work. 273 * 274 * Examples: 275 * --- 276 * auto random = "http://testdata.org/random".parseURL; 277 * random ~= "int"; 278 * writeln(random); // prints "http://testdata.org/random/int" 279 * --- 280 */ 281 URL opOpAssign(string op : "~")(string subsequentPath) { 282 if (path.endsWith("/") || subsequentPath.startsWith("/")) { 283 if (path.endsWith("/") && subsequentPath.startsWith("/")) { 284 path ~= subsequentPath[1..$]; 285 } else { 286 path ~= subsequentPath; 287 } 288 } else { 289 path ~= '/'; 290 path ~= subsequentPath; 291 } 292 return this; 293 } 294 } 295 296 /** 297 * Parse a URL from a string. 298 * 299 * This attempts to parse a wide range of URLs as people might actually type them. Some mistakes 300 * may be made. However, any URL in a correct format will be parsed correctly. 301 * 302 * Punycode is not supported. 303 */ 304 bool tryParseURL(string value, out URL url) { 305 url = URL.init; 306 // scheme:[//[user:password@]host[:port]][/]path[?query][#fragment] 307 // Scheme is optional in common use. We infer 'http' if it's not given. 308 auto i = value.indexOf("://"); 309 if (i > -1) { 310 url.scheme = value[0..i]; 311 value = value[i+3 .. $]; 312 } else { 313 url.scheme = "http"; 314 } 315 // [user:password@]host[:port]][/]path[?query][#fragment 316 i = value.indexOfAny([':', '/']); 317 if (i == -1) { 318 // Just a hostname. 319 url.host = value; 320 return true; 321 } 322 323 if (value[i] == ':') { 324 // This could be between username and password, or it could be between host and port. 325 auto j = value.indexOfAny(['@', '/']); 326 if (j > -1 && value[j] == '@') { 327 try { 328 url.user = value[0..i].percentDecode; 329 url.pass = value[i+1 .. j].percentDecode; 330 } catch (URLException) { 331 return false; 332 } 333 value = value[j+1 .. $]; 334 } 335 } 336 337 // It's trying to be a host/port, not a user/pass. 338 i = value.indexOfAny([':', '/']); 339 if (i == -1) { 340 url.host = value; 341 return true; 342 } 343 url.host = value[0..i]; 344 value = value[i .. $]; 345 if (value[0] == ':') { 346 auto end = value.indexOf('/'); 347 if (end == -1) { 348 end = value.length; 349 } 350 try { 351 url.port = value[1 .. end].to!ushort; 352 } catch (ConvException) { 353 return false; 354 } 355 value = value[end .. $]; 356 if (value.length == 0) { 357 return true; 358 } 359 } 360 361 i = value.indexOfAny("?#"); 362 if (i == -1) { 363 url.path = value; 364 return true; 365 } 366 367 try { 368 url.path = value[0..i].percentDecode; 369 } catch (URLException) { 370 return false; 371 } 372 auto c = value[i]; 373 value = value[i + 1 .. $]; 374 if (c == '?') { 375 i = value.indexOf('#'); 376 string query; 377 if (i < 0) { 378 query = value; 379 value = null; 380 } else { 381 query = value[0..i]; 382 value = value[i + 1 .. $]; 383 } 384 auto queries = query.split('&'); 385 foreach (q; queries) { 386 auto j = q.indexOf('='); 387 try { 388 if (j == -1) { 389 url.query[q.percentDecode] = ""; 390 } else { 391 url.query[q[0..j].percentDecode] = q[j + 1 .. $].percentDecode; 392 } 393 } catch (URLException) { 394 return false; 395 } 396 } 397 } 398 399 try { 400 url.fragment = value.percentDecode; 401 } catch (URLException) { 402 return false; 403 } 404 405 return true; 406 } 407 408 /// 409 unittest { 410 { 411 // Basic. 412 URL url; 413 with (url) { 414 scheme = "https"; 415 host = "example.org"; 416 path = "/foo/bar"; 417 query["hello"] = "world"; 418 query["gibe"] = "clay"; 419 fragment = "frag"; 420 } 421 assert( 422 // Not sure what order it'll come out in. 423 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 424 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 425 url.toString); 426 } 427 { 428 // Percent encoded. 429 URL url; 430 with (url) { 431 scheme = "https"; 432 host = "example.org"; 433 path = "/f☃o"; 434 query["❄"] = "❀"; 435 query["["] = "]"; 436 fragment = "ş"; 437 } 438 assert( 439 // Not sure what order it'll come out in. 440 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 441 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 442 url.toString); 443 } 444 { 445 // Port, user, pass. 446 URL url; 447 with (url) { 448 scheme = "https"; 449 host = "example.org"; 450 user = "dhasenan"; 451 pass = "itsasecret"; 452 port = 17; 453 } 454 assert( 455 url.toString == "https://dhasenan:itsasecret@example.org:17/", 456 url.toString); 457 } 458 { 459 // Query with no path. 460 URL url; 461 with (url) { 462 scheme = "https"; 463 host = "example.org"; 464 query["hi"] = "bye"; 465 } 466 assert( 467 url.toString == "https://example.org/?hi=bye", 468 url.toString); 469 } 470 } 471 472 /// 473 unittest { 474 // There's an existing path. 475 auto url = parseURL("http://example.org/foo"); 476 // No slash? Assume it needs a slash. 477 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 478 // With slash? Don't add another. 479 assert((url ~ "/bar").toString == "http://example.org/foo/bar"); 480 url ~= "bar"; 481 assert(url.toString == "http://example.org/foo/bar"); 482 483 // Path already ends with a slash; don't add another. 484 url = parseURL("http://example.org/foo/"); 485 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 486 // Still don't add one even if you're appending with a slash. 487 assert((url ~ "/bar").toString == "http://example.org/foo/bar"); 488 url ~= "/bar"; 489 assert(url.toString == "http://example.org/foo/bar"); 490 491 // No path. 492 url = parseURL("http://example.org"); 493 assert((url ~ "bar").toString == "http://example.org/bar"); 494 assert((url ~ "/bar").toString == "http://example.org/bar"); 495 url ~= "bar"; 496 assert(url.toString == "http://example.org/bar"); 497 498 // Path is just a slash. 499 url = parseURL("http://example.org/"); 500 assert((url ~ "bar").toString == "http://example.org/bar"); 501 assert((url ~ "/bar").toString == "http://example.org/bar"); 502 url ~= "bar"; 503 assert(url.toString == "http://example.org/bar", url.toString); 504 } 505 506 unittest { 507 import std.net.curl; 508 auto url = "http://example.org".parseURL; 509 assert(is(typeof(std.net.curl.get(url)))); 510 } 511 512 /** 513 * Parse the input string as a URL. 514 * 515 * Throws: 516 * URLException if the string was in an incorrect format. 517 */ 518 URL parseURL(string value) { 519 URL url; 520 if (tryParseURL(value, url)) { 521 return url; 522 } 523 throw new URLException("failed to parse URL " ~ value); 524 } 525 526 /// 527 unittest { 528 { 529 // Infer scheme 530 auto u1 = parseURL("example.org"); 531 assert(u1.scheme == "http"); 532 assert(u1.host == "example.org"); 533 assert(u1.path == ""); 534 assert(u1.port == 80); 535 assert(u1.providedPort == 0); 536 assert(u1.fragment == ""); 537 } 538 { 539 // Simple host and scheme 540 auto u1 = parseURL("https://example.org"); 541 assert(u1.scheme == "https"); 542 assert(u1.host == "example.org"); 543 assert(u1.path == ""); 544 assert(u1.port == 443); 545 assert(u1.providedPort == 0); 546 } 547 { 548 // With path 549 auto u1 = parseURL("https://example.org/foo/bar"); 550 assert(u1.scheme == "https"); 551 assert(u1.host == "example.org"); 552 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 553 assert(u1.port == 443); 554 assert(u1.providedPort == 0); 555 } 556 { 557 // With explicit port 558 auto u1 = parseURL("https://example.org:1021/foo/bar"); 559 assert(u1.scheme == "https"); 560 assert(u1.host == "example.org"); 561 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 562 assert(u1.port == 1021); 563 assert(u1.providedPort == 1021); 564 } 565 { 566 // With user 567 auto u1 = parseURL("https://bob:secret@example.org/foo/bar"); 568 assert(u1.scheme == "https"); 569 assert(u1.host == "example.org"); 570 assert(u1.path == "/foo/bar"); 571 assert(u1.port == 443); 572 assert(u1.user == "bob"); 573 assert(u1.pass == "secret"); 574 } 575 { 576 // With user, URL-encoded 577 auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar"); 578 assert(u1.scheme == "https"); 579 assert(u1.host == "example.org"); 580 assert(u1.path == "/foo/bar"); 581 assert(u1.port == 443); 582 assert(u1.user == "bob!"); 583 assert(u1.pass == "secret!?"); 584 } 585 { 586 // With user and port and path 587 auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar"); 588 assert(u1.scheme == "https"); 589 assert(u1.host == "example.org"); 590 assert(u1.path == "/foo/bar"); 591 assert(u1.port == 2210); 592 assert(u1.user == "bob"); 593 assert(u1.pass == "secret"); 594 assert(u1.fragment == ""); 595 } 596 { 597 // With query string 598 auto u1 = parseURL("https://example.org/?login=true"); 599 assert(u1.scheme == "https"); 600 assert(u1.host == "example.org"); 601 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 602 assert(u1.query["login"] == "true"); 603 assert(u1.fragment == ""); 604 } 605 { 606 // With query string and fragment 607 auto u1 = parseURL("https://example.org/?login=true#justkidding"); 608 assert(u1.scheme == "https"); 609 assert(u1.host == "example.org"); 610 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 611 assert(u1.query["login"] == "true"); 612 assert(u1.fragment == "justkidding"); 613 } 614 { 615 // With URL-encoded values 616 auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E"); 617 assert(u1.scheme == "https"); 618 assert(u1.host == "example.org"); 619 assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path); 620 assert(u1.query["❄"] == "="); 621 assert(u1.fragment == "^"); 622 } 623 } 624 625 unittest { 626 assert(parseURL("http://example.org").port == 80); 627 assert(parseURL("http://example.org:5326").port == 5326); 628 629 auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment"); 630 assert(url.scheme == "redis"); 631 assert(url.user == "admin"); 632 assert(url.pass == "password"); 633 634 assert(parseURL("example.org").toString == "http://example.org/"); 635 assert(parseURL("http://example.org:80").toString == "http://example.org/"); 636 637 assert(parseURL("localhost:8070").toString == "http://localhost:8070/"); 638 } 639 640 /** 641 * Percent-encode a string. 642 * 643 * URL components cannot contain non-ASCII characters, and there are very few characters that are 644 * safe to include as URL components. Domain names using Unicode values use Punycode. For 645 * everything else, there is percent encoding. 646 */ 647 string percentEncode(string raw) { 648 // We *must* encode these characters: :/?#[]@!$&'()*+,;=" 649 // We *can* encode any other characters. 650 // We *should not* encode alpha, numeric, or -._~. 651 Appender!string app; 652 foreach (dchar d; raw) { 653 if (('a' <= d && 'z' >= d) || 654 ('A' <= d && 'Z' >= d) || 655 ('0' <= d && '9' >= d) || 656 d == '-' || d == '.' || d == '_' || d == '~') { 657 app ~= d; 658 continue; 659 } 660 // Something simple like a space character? Still in 7-bit ASCII? 661 // Then we get a single-character string out of it and just encode 662 // that one bit. 663 // Something not in 7-bit ASCII? Then we percent-encode each octet 664 // in the UTF-8 encoding (and hope the server understands UTF-8). 665 char[] c; 666 encode(c, d); 667 auto bytes = cast(ubyte[])c; 668 foreach (b; bytes) { 669 app ~= format("%%%02X", b); 670 } 671 } 672 return cast(string)app.data; 673 } 674 675 /// 676 unittest { 677 assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding"); 678 assert(percentEncode("~~--..__") == "~~--..__"); 679 assert(percentEncode("0123456789") == "0123456789"); 680 681 string e; 682 683 e = percentEncode("☃"); 684 assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e); 685 } 686 687 /** 688 * Percent-decode a string. 689 * 690 * URL components cannot contain non-ASCII characters, and there are very few characters that are 691 * safe to include as URL components. Domain names using Unicode values use Punycode. For 692 * everything else, there is percent encoding. 693 * 694 * This explicitly ensures that the result is a valid UTF-8 string. 695 */ 696 string percentDecode(string encoded) { 697 ubyte[] raw = percentDecodeRaw(encoded); 698 auto s = cast(string) raw; 699 if (!s.isValid) { 700 // TODO(dhasenan): 701 throw new URLException("input contains invalid UTF data"); 702 } 703 return s; 704 } 705 706 /// 707 unittest { 708 assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding"); 709 assert(percentDecode("~~--..__") == "~~--..__"); 710 assert(percentDecode("0123456789") == "0123456789"); 711 712 string e; 713 714 e = percentDecode("%E2%98%83"); 715 assert(e == "☃", "expected a snowman but got" ~ e); 716 } 717 718 /** 719 * Percent-decode a string into a ubyte array. 720 * 721 * URL components cannot contain non-ASCII characters, and there are very few characters that are 722 * safe to include as URL components. Domain names using Unicode values use Punycode. For 723 * everything else, there is percent encoding. 724 * 725 * This yields a ubyte array and will not perform validation on the output. However, an improperly 726 * formatted input string will result in a URLException. 727 */ 728 ubyte[] percentDecodeRaw(string encoded) { 729 // We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now. 730 Appender!(ubyte[]) app; 731 for (int i = 0; i < encoded.length; i++) { 732 if (encoded[i] != '%') { 733 app ~= encoded[i]; 734 continue; 735 } 736 if (i >= encoded.length - 2) { 737 throw new URLException("Invalid percent encoded value: expected two characters after " ~ 738 "percent symbol. Error at index " ~ i.to!string); 739 } 740 auto b = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 1])); 741 auto c = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 2])); 742 app ~= cast(ubyte)((b << 4) | c); 743 i += 2; 744 } 745 return app.data; 746 } 747 748 /++ 749 string toAscii(string unicodeHostname) { 750 bool mustEncode = false; 751 foreach (i, dchar d; unicodeHostname) { 752 auto c = cast(uint) d; 753 if (c > 0x80) { 754 mustEncode = true; 755 break; 756 } 757 if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) { 758 throw new URLException( 759 format( 760 "domain name '%s' contains illegal character '%s' at position %s", 761 unicodeHostname, d, i)); 762 } 763 } 764 if (!mustEncode) { 765 return unicodeHostname; 766 } 767 auto parts = unicodeHostname.split('.'); 768 char[] result; 769 foreach (part; parts) { 770 result ~= punyEncode(part); 771 } 772 return cast(string)result; 773 } 774 775 string punyEncode(string item, string delimiter = null, string marker = null) { 776 // Puny state machine initial variables. 777 auto base = 36; 778 auto tmin = 1; 779 auto tmax = 26; 780 auto skew = 38; 781 auto damp = 700; 782 auto initialBias = 72; 783 long b = 0; 784 785 bool needToEncode = false; 786 Appender!(char[]) app; 787 app ~= marker; 788 foreach (dchar d; item) { 789 if (d > '~') { // Max printable ASCII. The DEL char isn't allowed in hostnames. 790 needToEncode = true; 791 } else { 792 app ~= d; 793 b++; 794 } 795 } 796 if (!needToEncode) { 797 return item; 798 } 799 app ~= delimiter; 800 801 // The puny algorithm. 802 // We use 64-bit arithmetic to avoid overflow issues -- unicode only defines up to 0x10FFFF, 803 // and we won't be encoding gigabytes of data, but just to be safe. 804 // Also we use signed values just to make things easier. 805 long delta = 0; 806 long bias = initialBias; 807 long h = b; 808 long lastIndex = 0; 809 810 dchar digitToBasic(ulong digit) { 811 if (digit < 26) { 812 return 'a' + cast(dchar)digit; 813 } 814 return cast(dchar)('0' + (digit - 26)); 815 } 816 817 ulong adapt(ulong delta, ulong numPoints, bool firstTime) { 818 auto k = 0; 819 delta = firstTime ? (delta / damp) : delta >> 1; 820 delta += (delta / numPoints); 821 for (; delta > (base - tmin) * tmax >> 1; k += base) { 822 delta = (delta / (base - tmin)); 823 } 824 return k + (base - tmin + 1) * delta / (delta + skew); 825 } 826 827 auto f = filter!(x => x >= cast(dchar)128)(item).array; 828 auto uniqueChars = uniq(std.algorithm.sorting.sort(f)); 829 foreach (dchar n; uniqueChars) { 830 foreach (dchar c; item) { 831 if (c < n) { 832 delta++; 833 } else if (c == n) { 834 auto q = delta; 835 for (ulong k = 0; k < cast(ulong)uint.max; k += base) { 836 auto t = k <= bias ? tmin : (k >= bias + tmax ? tmax : k - bias); 837 if (q < t) { 838 break; 839 } 840 app ~= digitToBasic(t + ((q - t) % (base - t))); 841 q = (q - t) / (base - t); 842 } 843 app ~= digitToBasic(q); 844 bias = adapt(delta, h + 1, h == b); 845 h++; 846 } 847 } 848 delta++; 849 } 850 return cast(string)app.data; 851 } 852 853 unittest { 854 import std.stdio; 855 auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" 856 ~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F"; 857 writeln(a); 858 writeln(punyEncode(a)); 859 assert(punyEncode(a) == "egbpdaj6bu4bxfgehfvwxn"); 860 } 861 862 struct URL { 863 Host host; 864 } 865 ++/