1 /** 2 * A URL handling library. 3 * 4 * URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional 5 * elements like port, path, username, and password. 6 * 7 * This module aims to make it simple to muck about with them. 8 * 9 * Example usage: 10 * --- 11 * auto url = "ssh://me:password@192.168.0.8/".parseURL; 12 * auto files = system("ssh", url.toString, "ls").splitLines; 13 * foreach (file; files) { 14 * system("scp", url ~ file, "."); 15 * } 16 * --- 17 * 18 * License: The MIT license. 19 */ 20 module url; 21 22 import std.algorithm; 23 import std.array; 24 import std.conv; 25 import std.encoding; 26 import std.string; 27 import std.utf; 28 29 /// An exception thrown when something bad happens with URLs. 30 class URLException : Exception { 31 this(string msg) { super(msg); } 32 } 33 34 /** 35 * A mapping from schemes to their default ports. 36 * 37 * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to 38 * use even if they use ports. Entries here should be treated as best guesses. 39 */ 40 ushort[string] schemeToDefaultPort; 41 42 static this() { 43 schemeToDefaultPort = [ 44 "aaa": 3868, 45 "aaas": 5658, 46 "acap": 674, 47 "cap": 1026, 48 "coap": 5683, 49 "coaps": 5684, 50 "dav": 443, 51 "dict": 2628, 52 "ftp": 21, 53 "git": 9418, 54 "go": 1096, 55 "gopher": 70, 56 "http": 80, 57 "https": 443, 58 "iac": 4569, 59 "icap": 1344, 60 "imap": 143, 61 "ipp": 631, 62 "ipps": 631, // yes, they're both mapped to port 631 63 "irc": 6667, // De facto default port, not the IANA reserved port. 64 "ircs": 6697, 65 "iris": 702, // defaults to iris.beep 66 "iris.beep": 702, 67 "iris.lwz": 715, 68 "iris.xpc": 713, 69 "iris.xpcs": 714, 70 "jabber": 5222, // client-to-server 71 "ldap": 389, 72 "ldaps": 636, 73 "msrp": 2855, 74 "msrps": 2855, 75 "mtqp": 1038, 76 "mupdate": 3905, 77 "news": 119, 78 "nfs": 2049, 79 "pop": 110, 80 "redis": 6379, 81 "reload": 6084, 82 "rsync": 873, 83 "rtmfp": 1935, 84 "rtsp": 554, 85 "shttp": 80, 86 "sieve": 4190, 87 "sip": 5060, 88 "sips": 5061, 89 "smb": 445, 90 "smtp": 25, 91 "snews": 563, 92 "snmp": 161, 93 "soap.beep": 605, 94 "ssh": 22, 95 "stun": 3478, 96 "stuns": 5349, 97 "svn": 3690, 98 "teamspeak": 9987, 99 "telnet": 23, 100 "tftp": 69, 101 "tip": 3372, 102 ]; 103 } 104 105 /** 106 * A Unique Resource Locator. 107 * 108 * URLs can be parsed (see parseURL) and implicitly convert to strings. 109 */ 110 struct URL { 111 /// The URL scheme. For instance, ssh, ftp, or https. 112 string scheme; 113 114 /// The username in this URL. Usually absent. If present, there will also be a password. 115 string user; 116 117 /// The password in this URL. Usually absent. 118 string pass; 119 120 /// The hostname. 121 string host; 122 123 /** 124 * The port. 125 * 126 * This is inferred from the scheme if it isn't present in the URL itself. 127 * If the scheme is not known and the port is not present, the port will be given as 0. 128 * For some schemes, port will not be sensible -- for instance, file or chrome-extension. 129 * 130 * If you explicitly need to detect whether the user provided a port, check the providedPort 131 * field. 132 */ 133 @property ushort port() { 134 if (providedPort != 0) { 135 return providedPort; 136 } 137 if (auto p = scheme in schemeToDefaultPort) { 138 return *p; 139 } 140 return 0; 141 } 142 143 /** 144 * Set the port. 145 * 146 * This sets the providedPort field and is provided for convenience. 147 */ 148 @property ushort port(ushort value) { 149 return providedPort = value; 150 } 151 152 /// The port that was explicitly provided in the URL. 153 ushort providedPort; 154 155 /** 156 * The path. 157 * 158 * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is 159 * "/news/story/17774". 160 */ 161 string path; 162 163 /** 164 * The query string elements. 165 * 166 * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the query string 167 * elements will be ["visited": "false"]. 168 * 169 * Similarly, in the URL https://bbc.co.uk/news?item, the query string elements will be 170 * ["item": ""]. 171 * 172 * This field is mutable, so be cautious. 173 */ 174 string[string] query; 175 176 /** 177 * The fragment. In web documents, this typically refers to an anchor element. 178 * For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2". 179 */ 180 string fragment; 181 182 /** 183 * Convert this URL to a string. 184 * The string is properly formatted and usable for, eg, a web request. 185 */ 186 string toString() { 187 Appender!string s; 188 s ~= scheme; 189 s ~= "://"; 190 if (user) { 191 s ~= user.percentEncode; 192 s ~= ":"; 193 s ~= pass.percentEncode; 194 s ~= "@"; 195 } 196 s ~= host; 197 if (providedPort) { 198 if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) { 199 s ~= ":"; 200 s ~= providedPort.to!string; 201 } 202 } 203 string p = path; 204 if (p.length == 0 || p == "/") { 205 s ~= '/'; 206 } else { 207 if (p[0] == '/') { 208 p = p[1..$]; 209 } 210 foreach (part; p.split('/')) { 211 s ~= '/'; 212 s ~= part.percentEncode; 213 } 214 } 215 if (query) { 216 s ~= '?'; 217 bool first = true; 218 foreach (k, v; query) { 219 if (!first) { 220 s ~= '&'; 221 } 222 first = false; 223 s ~= k.percentEncode; 224 if (v) { 225 s ~= '='; 226 s ~= v.percentEncode; 227 } 228 } 229 } 230 if (fragment) { 231 s ~= '#'; 232 s ~= fragment.percentEncode; 233 } 234 return s.data; 235 } 236 237 /// Implicitly convert URLs to strings. 238 alias toString this; 239 240 /** 241 * The append operator (~). 242 * 243 * The append operator for URLs returns a new URL with the given string appended as a path 244 * element to the URL's path. It only adds new path elements (or sequences of path elements). 245 * 246 * Don't worry about path separators; whether you include them or not, it will just work. 247 * 248 * Query elements are copied. 249 * 250 * Examples: 251 * --- 252 * auto random = "http://testdata.org/random".parseURL; 253 * auto randInt = random ~ "int"; 254 * writeln(randInt); // prints "http://testdata.org/random/int" 255 * --- 256 */ 257 URL opBinary(string op : "~")(string subsequentPath) { 258 URL other = this; 259 other ~= subsequentPath; 260 if (query) { 261 other.query = other.query.dup; 262 } 263 return other; 264 } 265 266 /** 267 * The append-in-place operator (~=). 268 * 269 * The append operator for URLs adds a path element to this URL. It only adds new path elements 270 * (or sequences of path elements). 271 * 272 * Don't worry about path separators; whether you include them or not, it will just work. 273 * 274 * Examples: 275 * --- 276 * auto random = "http://testdata.org/random".parseURL; 277 * random ~= "int"; 278 * writeln(random); // prints "http://testdata.org/random/int" 279 * --- 280 */ 281 URL opOpAssign(string op : "~")(string subsequentPath) { 282 if (path.endsWith("/") || subsequentPath.startsWith("/")) { 283 if (path.endsWith("/") && subsequentPath.startsWith("/")) { 284 path ~= subsequentPath[1..$]; 285 } else { 286 path ~= subsequentPath; 287 } 288 } else { 289 path ~= '/'; 290 path ~= subsequentPath; 291 } 292 return this; 293 } 294 } 295 296 /** 297 * Parse a URL from a string. 298 * 299 * This attempts to parse a wide range of URLs as people might actually type them. Some mistakes 300 * may be made. However, any URL in a correct format will be parsed correctly. 301 * 302 * Punycode is not supported. 303 */ 304 bool tryParseURL(string value, out URL url) { 305 url = URL.init; 306 // scheme:[//[user:password@]host[:port]][/]path[?query][#fragment] 307 // Scheme is optional in common use. We infer 'http' if it's not given. 308 auto i = value.indexOf("://"); 309 if (i > -1) { 310 url.scheme = value[0..i]; 311 value = value[i+3 .. $]; 312 } else { 313 url.scheme = "http"; 314 } 315 // [user:password@]host[:port]][/]path[?query][#fragment 316 i = value.indexOfAny([':', '/']); 317 if (i == -1) { 318 // Just a hostname. 319 url.host = value; 320 return true; 321 } 322 323 if (value[i] == ':') { 324 // This could be between username and password, or it could be between host and port. 325 auto j = value.indexOfAny(['@', '/']); 326 if (j > -1 && value[j] == '@') { 327 try { 328 url.user = value[0..i].percentDecode; 329 url.pass = value[i+1 .. j].percentDecode; 330 } catch (URLException) { 331 return false; 332 } 333 value = value[j+1 .. $]; 334 } 335 } 336 337 // It's trying to be a host/port, not a user/pass. 338 i = value.indexOfAny([':', '/']); 339 if (i == -1) { 340 url.host = value; 341 return true; 342 } 343 url.host = value[0..i]; 344 value = value[i .. $]; 345 if (value[0] == ':') { 346 auto end = value.indexOf('/'); 347 if (end == -1) { 348 end = value.length; 349 } 350 try { 351 url.port = value[1 .. end].to!ushort; 352 } catch (ConvException) { 353 return false; 354 } 355 value = value[end .. $]; 356 if (value.length == 0) { 357 return true; 358 } 359 } 360 361 i = value.indexOfAny("?#"); 362 if (i == -1) { 363 url.path = value; 364 return true; 365 } 366 367 try { 368 url.path = value[0..i].percentDecode; 369 } catch (URLException) { 370 return false; 371 } 372 auto c = value[i]; 373 value = value[i + 1 .. $]; 374 if (c == '?') { 375 i = value.indexOf('#'); 376 string query; 377 if (i < 0) { 378 query = value; 379 value = null; 380 } else { 381 query = value[0..i]; 382 value = value[i + 1 .. $]; 383 } 384 auto queries = query.split('&'); 385 foreach (q; queries) { 386 auto j = q.indexOf('='); 387 try { 388 if (j == -1) { 389 url.query[q.percentDecode] = ""; 390 } else { 391 url.query[q[0..j].percentDecode] = q[j + 1 .. $].percentDecode; 392 } 393 } catch (URLException) { 394 return false; 395 } 396 } 397 } 398 399 try { 400 url.fragment = value.percentDecode; 401 } catch (URLException) { 402 return false; 403 } 404 405 return true; 406 } 407 408 /// 409 unittest { 410 { 411 // Basic. 412 URL url; 413 with (url) { 414 scheme = "https"; 415 host = "example.org"; 416 path = "/foo/bar"; 417 query["hello"] = "world"; 418 query["gibe"] = "clay"; 419 fragment = "frag"; 420 } 421 assert( 422 // Not sure what order it'll come out in. 423 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 424 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 425 url.toString); 426 } 427 { 428 // Percent encoded. 429 URL url; 430 with (url) { 431 scheme = "https"; 432 host = "example.org"; 433 path = "/f☃o"; 434 query["❄"] = "❀"; 435 query["["] = "]"; 436 fragment = "ş"; 437 } 438 assert( 439 // Not sure what order it'll come out in. 440 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 441 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 442 url.toString); 443 } 444 { 445 // Port, user, pass. 446 URL url; 447 with (url) { 448 scheme = "https"; 449 host = "example.org"; 450 user = "dhasenan"; 451 pass = "itsasecret"; 452 port = 17; 453 } 454 assert( 455 url.toString == "https://dhasenan:itsasecret@example.org:17/", 456 url.toString); 457 } 458 { 459 // Query with no path. 460 URL url; 461 with (url) { 462 scheme = "https"; 463 host = "example.org"; 464 query["hi"] = "bye"; 465 } 466 assert( 467 url.toString == "https://example.org/?hi=bye", 468 url.toString); 469 } 470 } 471 472 /// 473 unittest { 474 // There's an existing path. 475 auto url = parseURL("http://example.org/foo"); 476 // No slash? Assume it needs a slash. 477 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 478 // With slash? Don't add another. 479 assert((url ~ "/bar").toString == "http://example.org/foo/bar"); 480 url ~= "bar"; 481 assert(url.toString == "http://example.org/foo/bar"); 482 483 // Path already ends with a slash; don't add another. 484 url = parseURL("http://example.org/foo/"); 485 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 486 // Still don't add one even if you're appending with a slash. 487 assert((url ~ "/bar").toString == "http://example.org/foo/bar"); 488 url ~= "/bar"; 489 assert(url.toString == "http://example.org/foo/bar"); 490 491 // No path. 492 url = parseURL("http://example.org"); 493 assert((url ~ "bar").toString == "http://example.org/bar"); 494 assert((url ~ "/bar").toString == "http://example.org/bar"); 495 url ~= "bar"; 496 assert(url.toString == "http://example.org/bar"); 497 498 // Path is just a slash. 499 url = parseURL("http://example.org/"); 500 assert((url ~ "bar").toString == "http://example.org/bar"); 501 assert((url ~ "/bar").toString == "http://example.org/bar"); 502 url ~= "bar"; 503 assert(url.toString == "http://example.org/bar", url.toString); 504 505 // No path, just fragment. 506 url = "ircs://irc.freenode.com/#d".parseURL; 507 assert(url.toString == "ircs://irc.freenode.com/#d", url.toString); 508 } 509 510 unittest { 511 import std.net.curl; 512 auto url = "http://example.org".parseURL; 513 assert(is(typeof(std.net.curl.get(url)))); 514 } 515 516 /** 517 * Parse the input string as a URL. 518 * 519 * Throws: 520 * URLException if the string was in an incorrect format. 521 */ 522 URL parseURL(string value) { 523 URL url; 524 if (tryParseURL(value, url)) { 525 return url; 526 } 527 throw new URLException("failed to parse URL " ~ value); 528 } 529 530 /// 531 unittest { 532 { 533 // Infer scheme 534 auto u1 = parseURL("example.org"); 535 assert(u1.scheme == "http"); 536 assert(u1.host == "example.org"); 537 assert(u1.path == ""); 538 assert(u1.port == 80); 539 assert(u1.providedPort == 0); 540 assert(u1.fragment == ""); 541 } 542 { 543 // Simple host and scheme 544 auto u1 = parseURL("https://example.org"); 545 assert(u1.scheme == "https"); 546 assert(u1.host == "example.org"); 547 assert(u1.path == ""); 548 assert(u1.port == 443); 549 assert(u1.providedPort == 0); 550 } 551 { 552 // With path 553 auto u1 = parseURL("https://example.org/foo/bar"); 554 assert(u1.scheme == "https"); 555 assert(u1.host == "example.org"); 556 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 557 assert(u1.port == 443); 558 assert(u1.providedPort == 0); 559 } 560 { 561 // With explicit port 562 auto u1 = parseURL("https://example.org:1021/foo/bar"); 563 assert(u1.scheme == "https"); 564 assert(u1.host == "example.org"); 565 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 566 assert(u1.port == 1021); 567 assert(u1.providedPort == 1021); 568 } 569 { 570 // With user 571 auto u1 = parseURL("https://bob:secret@example.org/foo/bar"); 572 assert(u1.scheme == "https"); 573 assert(u1.host == "example.org"); 574 assert(u1.path == "/foo/bar"); 575 assert(u1.port == 443); 576 assert(u1.user == "bob"); 577 assert(u1.pass == "secret"); 578 } 579 { 580 // With user, URL-encoded 581 auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar"); 582 assert(u1.scheme == "https"); 583 assert(u1.host == "example.org"); 584 assert(u1.path == "/foo/bar"); 585 assert(u1.port == 443); 586 assert(u1.user == "bob!"); 587 assert(u1.pass == "secret!?"); 588 } 589 { 590 // With user and port and path 591 auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar"); 592 assert(u1.scheme == "https"); 593 assert(u1.host == "example.org"); 594 assert(u1.path == "/foo/bar"); 595 assert(u1.port == 2210); 596 assert(u1.user == "bob"); 597 assert(u1.pass == "secret"); 598 assert(u1.fragment == ""); 599 } 600 { 601 // With query string 602 auto u1 = parseURL("https://example.org/?login=true"); 603 assert(u1.scheme == "https"); 604 assert(u1.host == "example.org"); 605 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 606 assert(u1.query["login"] == "true"); 607 assert(u1.fragment == ""); 608 } 609 { 610 // With query string and fragment 611 auto u1 = parseURL("https://example.org/?login=true#justkidding"); 612 assert(u1.scheme == "https"); 613 assert(u1.host == "example.org"); 614 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 615 assert(u1.query["login"] == "true"); 616 assert(u1.fragment == "justkidding"); 617 } 618 { 619 // With URL-encoded values 620 auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E"); 621 assert(u1.scheme == "https"); 622 assert(u1.host == "example.org"); 623 assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path); 624 assert(u1.query["❄"] == "="); 625 assert(u1.fragment == "^"); 626 } 627 } 628 629 unittest { 630 assert(parseURL("http://example.org").port == 80); 631 assert(parseURL("http://example.org:5326").port == 5326); 632 633 auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment"); 634 assert(url.scheme == "redis"); 635 assert(url.user == "admin"); 636 assert(url.pass == "password"); 637 638 assert(parseURL("example.org").toString == "http://example.org/"); 639 assert(parseURL("http://example.org:80").toString == "http://example.org/"); 640 641 assert(parseURL("localhost:8070").toString == "http://localhost:8070/"); 642 } 643 644 /** 645 * Percent-encode a string. 646 * 647 * URL components cannot contain non-ASCII characters, and there are very few characters that are 648 * safe to include as URL components. Domain names using Unicode values use Punycode. For 649 * everything else, there is percent encoding. 650 */ 651 string percentEncode(string raw) { 652 // We *must* encode these characters: :/?#[]@!$&'()*+,;=" 653 // We *can* encode any other characters. 654 // We *should not* encode alpha, numeric, or -._~. 655 Appender!string app; 656 foreach (dchar d; raw) { 657 if (('a' <= d && 'z' >= d) || 658 ('A' <= d && 'Z' >= d) || 659 ('0' <= d && '9' >= d) || 660 d == '-' || d == '.' || d == '_' || d == '~') { 661 app ~= d; 662 continue; 663 } 664 // Something simple like a space character? Still in 7-bit ASCII? 665 // Then we get a single-character string out of it and just encode 666 // that one bit. 667 // Something not in 7-bit ASCII? Then we percent-encode each octet 668 // in the UTF-8 encoding (and hope the server understands UTF-8). 669 char[] c; 670 encode(c, d); 671 auto bytes = cast(ubyte[])c; 672 foreach (b; bytes) { 673 app ~= format("%%%02X", b); 674 } 675 } 676 return cast(string)app.data; 677 } 678 679 /// 680 unittest { 681 assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding"); 682 assert(percentEncode("~~--..__") == "~~--..__"); 683 assert(percentEncode("0123456789") == "0123456789"); 684 685 string e; 686 687 e = percentEncode("☃"); 688 assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e); 689 } 690 691 /** 692 * Percent-decode a string. 693 * 694 * URL components cannot contain non-ASCII characters, and there are very few characters that are 695 * safe to include as URL components. Domain names using Unicode values use Punycode. For 696 * everything else, there is percent encoding. 697 * 698 * This explicitly ensures that the result is a valid UTF-8 string. 699 */ 700 string percentDecode(string encoded) { 701 ubyte[] raw = percentDecodeRaw(encoded); 702 auto s = cast(string) raw; 703 if (!s.isValid) { 704 // TODO(dhasenan): 705 throw new URLException("input contains invalid UTF data"); 706 } 707 return s; 708 } 709 710 /// 711 unittest { 712 assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding"); 713 assert(percentDecode("~~--..__") == "~~--..__"); 714 assert(percentDecode("0123456789") == "0123456789"); 715 716 string e; 717 718 e = percentDecode("%E2%98%83"); 719 assert(e == "☃", "expected a snowman but got" ~ e); 720 } 721 722 /** 723 * Percent-decode a string into a ubyte array. 724 * 725 * URL components cannot contain non-ASCII characters, and there are very few characters that are 726 * safe to include as URL components. Domain names using Unicode values use Punycode. For 727 * everything else, there is percent encoding. 728 * 729 * This yields a ubyte array and will not perform validation on the output. However, an improperly 730 * formatted input string will result in a URLException. 731 */ 732 ubyte[] percentDecodeRaw(string encoded) { 733 // We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now. 734 Appender!(ubyte[]) app; 735 for (int i = 0; i < encoded.length; i++) { 736 if (encoded[i] != '%') { 737 app ~= encoded[i]; 738 continue; 739 } 740 if (i >= encoded.length - 2) { 741 throw new URLException("Invalid percent encoded value: expected two characters after " ~ 742 "percent symbol. Error at index " ~ i.to!string); 743 } 744 auto b = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 1])); 745 auto c = cast(ubyte)("0123456789ABCDEF".indexOf(encoded[i + 2])); 746 app ~= cast(ubyte)((b << 4) | c); 747 i += 2; 748 } 749 return app.data; 750 } 751 752 /++ 753 string toAscii(string unicodeHostname) { 754 bool mustEncode = false; 755 foreach (i, dchar d; unicodeHostname) { 756 auto c = cast(uint) d; 757 if (c > 0x80) { 758 mustEncode = true; 759 break; 760 } 761 if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) { 762 throw new URLException( 763 format( 764 "domain name '%s' contains illegal character '%s' at position %s", 765 unicodeHostname, d, i)); 766 } 767 } 768 if (!mustEncode) { 769 return unicodeHostname; 770 } 771 auto parts = unicodeHostname.split('.'); 772 char[] result; 773 foreach (part; parts) { 774 result ~= punyEncode(part); 775 } 776 return cast(string)result; 777 } 778 779 string punyEncode(string item, string delimiter = null, string marker = null) { 780 // Puny state machine initial variables. 781 auto base = 36; 782 auto tmin = 1; 783 auto tmax = 26; 784 auto skew = 38; 785 auto damp = 700; 786 auto initialBias = 72; 787 long b = 0; 788 789 bool needToEncode = false; 790 Appender!(char[]) app; 791 app ~= marker; 792 foreach (dchar d; item) { 793 if (d > '~') { // Max printable ASCII. The DEL char isn't allowed in hostnames. 794 needToEncode = true; 795 } else { 796 app ~= d; 797 b++; 798 } 799 } 800 if (!needToEncode) { 801 return item; 802 } 803 app ~= delimiter; 804 805 // The puny algorithm. 806 // We use 64-bit arithmetic to avoid overflow issues -- unicode only defines up to 0x10FFFF, 807 // and we won't be encoding gigabytes of data, but just to be safe. 808 // Also we use signed values just to make things easier. 809 long delta = 0; 810 long bias = initialBias; 811 long h = b; 812 long lastIndex = 0; 813 814 dchar digitToBasic(ulong digit) { 815 if (digit < 26) { 816 return 'a' + cast(dchar)digit; 817 } 818 return cast(dchar)('0' + (digit - 26)); 819 } 820 821 ulong adapt(ulong delta, ulong numPoints, bool firstTime) { 822 auto k = 0; 823 delta = firstTime ? (delta / damp) : delta >> 1; 824 delta += (delta / numPoints); 825 for (; delta > (base - tmin) * tmax >> 1; k += base) { 826 delta = (delta / (base - tmin)); 827 } 828 return k + (base - tmin + 1) * delta / (delta + skew); 829 } 830 831 auto f = filter!(x => x >= cast(dchar)128)(item).array; 832 auto uniqueChars = uniq(std.algorithm.sorting.sort(f)); 833 foreach (dchar n; uniqueChars) { 834 foreach (dchar c; item) { 835 if (c < n) { 836 delta++; 837 } else if (c == n) { 838 auto q = delta; 839 for (ulong k = 0; k < cast(ulong)uint.max; k += base) { 840 auto t = k <= bias ? tmin : (k >= bias + tmax ? tmax : k - bias); 841 if (q < t) { 842 break; 843 } 844 app ~= digitToBasic(t + ((q - t) % (base - t))); 845 q = (q - t) / (base - t); 846 } 847 app ~= digitToBasic(q); 848 bias = adapt(delta, h + 1, h == b); 849 h++; 850 } 851 } 852 delta++; 853 } 854 return cast(string)app.data; 855 } 856 857 unittest { 858 import std.stdio; 859 auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" 860 ~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F"; 861 writeln(a); 862 writeln(punyEncode(a)); 863 assert(punyEncode(a) == "egbpdaj6bu4bxfgehfvwxn"); 864 } 865 866 struct URL { 867 Host host; 868 } 869 ++/