diff options
author | libuv upstream <libuv@googlegroups.com> | 2019-01-15 15:42:13 (GMT) |
---|---|---|
committer | Brad King <brad.king@kitware.com> | 2019-01-16 19:48:27 (GMT) |
commit | 4fcb0d0213112bb2fdb04bb27e82543b93cfe41d (patch) | |
tree | 746a72d3795b562075d66fab8947dfd7527b6044 /src/idna.c | |
parent | c8b67ea119c4000018238f6c3201a1364356d93a (diff) | |
download | CMake-4fcb0d0213112bb2fdb04bb27e82543b93cfe41d.zip CMake-4fcb0d0213112bb2fdb04bb27e82543b93cfe41d.tar.gz CMake-4fcb0d0213112bb2fdb04bb27e82543b93cfe41d.tar.bz2 |
libuv 2019-01-15 (f84c5e69)
Code extracted from:
https://github.com/libuv/libuv.git
at commit f84c5e693b80cb0c62bcefba147e7a66e2b839c9 (v1.x).
Diffstat (limited to 'src/idna.c')
-rw-r--r-- | src/idna.c | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/src/idna.c b/src/idna.c new file mode 100644 index 0000000..13ffac6 --- /dev/null +++ b/src/idna.c @@ -0,0 +1,291 @@ +/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* Derived from https://github.com/bnoordhuis/punycode + * but updated to support IDNA 2008. + */ + +#include "uv.h" +#include "idna.h" +#include <string.h> + +static unsigned uv__utf8_decode1_slow(const char** p, + const char* pe, + unsigned a) { + unsigned b; + unsigned c; + unsigned d; + unsigned min; + + if (a > 0xF7) + return -1; + + switch (*p - pe) { + default: + if (a > 0xEF) { + min = 0x10000; + a = a & 7; + b = (unsigned char) *(*p)++; + c = (unsigned char) *(*p)++; + d = (unsigned char) *(*p)++; + break; + } + /* Fall through. */ + case 2: + if (a > 0xDF) { + min = 0x800; + b = 0x80 | (a & 15); + c = (unsigned char) *(*p)++; + d = (unsigned char) *(*p)++; + a = 0; + break; + } + /* Fall through. */ + case 1: + if (a > 0xBF) { + min = 0x80; + b = 0x80; + c = 0x80 | (a & 31); + d = (unsigned char) *(*p)++; + a = 0; + break; + } + return -1; /* Invalid continuation byte. */ + } + + if (0x80 != (0xC0 & (b ^ c ^ d))) + return -1; /* Invalid sequence. */ + + b &= 63; + c &= 63; + d &= 63; + a = (a << 18) | (b << 12) | (c << 6) | d; + + if (a < min) + return -1; /* Overlong sequence. */ + + if (a > 0x10FFFF) + return -1; /* Four-byte sequence > U+10FFFF. */ + + if (a >= 0xD800 && a <= 0xDFFF) + return -1; /* Surrogate pair. */ + + return a; +} + +unsigned uv__utf8_decode1(const char** p, const char* pe) { + unsigned a; + + a = (unsigned char) *(*p)++; + + if (a < 128) + return a; /* ASCII, common case. */ + + return uv__utf8_decode1_slow(p, pe, a); +} + +#define foreach_codepoint(c, p, pe) \ + for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;) + +static int uv__idna_toascii_label(const char* s, const char* se, + char** d, char* de) { + static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789"; + const char* ss; + unsigned c; + unsigned h; + unsigned k; + unsigned n; + unsigned m; + unsigned q; + unsigned t; + unsigned x; + unsigned y; + unsigned bias; + unsigned delta; + unsigned todo; + int first; + + h = 0; + ss = s; + todo = 0; + + foreach_codepoint(c, &s, se) { + if (c < 128) + h++; + else if (c == (unsigned) -1) + return UV_EINVAL; + else + todo++; + } + + if (todo > 0) { + if (*d < de) *(*d)++ = 'x'; + if (*d < de) *(*d)++ = 'n'; + if (*d < de) *(*d)++ = '-'; + if (*d < de) *(*d)++ = '-'; + } + + x = 0; + s = ss; + foreach_codepoint(c, &s, se) { + if (c > 127) + continue; + + if (*d < de) + *(*d)++ = c; + + if (++x == h) + break; /* Visited all ASCII characters. */ + } + + if (todo == 0) + return h; + + /* Only write separator when we've written ASCII characters first. */ + if (h > 0) + if (*d < de) + *(*d)++ = '-'; + + n = 128; + bias = 72; + delta = 0; + first = 1; + + while (todo > 0) { + m = -1; + s = ss; + foreach_codepoint(c, &s, se) + if (c >= n) + if (c < m) + m = c; + + x = m - n; + y = h + 1; + + if (x > ~delta / y) + return UV_E2BIG; /* Overflow. */ + + delta += x * y; + n = m; + + s = ss; + foreach_codepoint(c, &s, se) { + if (c < n) + if (++delta == 0) + return UV_E2BIG; /* Overflow. */ + + if (c != n) + continue; + + for (k = 36, q = delta; /* empty */; k += 36) { + t = 1; + + if (k > bias) + t = k - bias; + + if (t > 26) + t = 26; + + if (q < t) + break; + + /* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore + * 10 <= y <= 35, we can optimize the long division + * into a table-based reciprocal multiplication. + */ + x = q - t; + y = 36 - t; /* 10 <= y <= 35 since 1 <= t <= 26. */ + q = x / y; + t = t + x % y; /* 1 <= t <= 35 because of y. */ + + if (*d < de) + *(*d)++ = alphabet[t]; + } + + if (*d < de) + *(*d)++ = alphabet[q]; + + delta /= 2; + + if (first) { + delta /= 350; + first = 0; + } + + /* No overflow check is needed because |delta| was just + * divided by 2 and |delta+delta >= delta + delta/h|. + */ + h++; + delta += delta / h; + + for (bias = 0; delta > 35 * 26 / 2; bias += 36) + delta /= 35; + + bias += 36 * delta / (delta + 38); + delta = 0; + todo--; + } + + delta++; + n++; + } + + return 0; +} + +#undef foreach_codepoint + +long uv__idna_toascii(const char* s, const char* se, char* d, char* de) { + const char* si; + const char* st; + unsigned c; + char* ds; + int rc; + + ds = d; + + for (si = s; si < se; /* empty */) { + st = si; + c = uv__utf8_decode1(&si, se); + + if (c != '.') + if (c != 0x3002) /* 。 */ + if (c != 0xFF0E) /* . */ + if (c != 0xFF61) /* 。 */ + continue; + + rc = uv__idna_toascii_label(s, st, &d, de); + + if (rc < 0) + return rc; + + if (d < de) + *d++ = '.'; + + s = si; + } + + if (s < se) { + rc = uv__idna_toascii_label(s, se, &d, de); + + if (rc < 0) + return rc; + } + + if (d < de) + *d++ = '\0'; + + return d - ds; /* Number of bytes written. */ +} |