summaryrefslogtreecommitdiffstats
path: root/src/idna.c
diff options
context:
space:
mode:
authorlibuv upstream <libuv@googlegroups.com>2019-01-15 15:42:13 (GMT)
committerBrad King <brad.king@kitware.com>2019-01-16 19:48:27 (GMT)
commit4fcb0d0213112bb2fdb04bb27e82543b93cfe41d (patch)
tree746a72d3795b562075d66fab8947dfd7527b6044 /src/idna.c
parentc8b67ea119c4000018238f6c3201a1364356d93a (diff)
downloadCMake-4fcb0d0213112bb2fdb04bb27e82543b93cfe41d.zip
CMake-4fcb0d0213112bb2fdb04bb27e82543b93cfe41d.tar.gz
CMake-4fcb0d0213112bb2fdb04bb27e82543b93cfe41d.tar.bz2
libuv 2019-01-15 (f84c5e69)
Code extracted from: https://github.com/libuv/libuv.git at commit f84c5e693b80cb0c62bcefba147e7a66e2b839c9 (v1.x).
Diffstat (limited to 'src/idna.c')
-rw-r--r--src/idna.c291
1 files changed, 291 insertions, 0 deletions
diff --git a/src/idna.c b/src/idna.c
new file mode 100644
index 0000000..13ffac6
--- /dev/null
+++ b/src/idna.c
@@ -0,0 +1,291 @@
+/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* Derived from https://github.com/bnoordhuis/punycode
+ * but updated to support IDNA 2008.
+ */
+
+#include "uv.h"
+#include "idna.h"
+#include <string.h>
+
+static unsigned uv__utf8_decode1_slow(const char** p,
+ const char* pe,
+ unsigned a) {
+ unsigned b;
+ unsigned c;
+ unsigned d;
+ unsigned min;
+
+ if (a > 0xF7)
+ return -1;
+
+ switch (*p - pe) {
+ default:
+ if (a > 0xEF) {
+ min = 0x10000;
+ a = a & 7;
+ b = (unsigned char) *(*p)++;
+ c = (unsigned char) *(*p)++;
+ d = (unsigned char) *(*p)++;
+ break;
+ }
+ /* Fall through. */
+ case 2:
+ if (a > 0xDF) {
+ min = 0x800;
+ b = 0x80 | (a & 15);
+ c = (unsigned char) *(*p)++;
+ d = (unsigned char) *(*p)++;
+ a = 0;
+ break;
+ }
+ /* Fall through. */
+ case 1:
+ if (a > 0xBF) {
+ min = 0x80;
+ b = 0x80;
+ c = 0x80 | (a & 31);
+ d = (unsigned char) *(*p)++;
+ a = 0;
+ break;
+ }
+ return -1; /* Invalid continuation byte. */
+ }
+
+ if (0x80 != (0xC0 & (b ^ c ^ d)))
+ return -1; /* Invalid sequence. */
+
+ b &= 63;
+ c &= 63;
+ d &= 63;
+ a = (a << 18) | (b << 12) | (c << 6) | d;
+
+ if (a < min)
+ return -1; /* Overlong sequence. */
+
+ if (a > 0x10FFFF)
+ return -1; /* Four-byte sequence > U+10FFFF. */
+
+ if (a >= 0xD800 && a <= 0xDFFF)
+ return -1; /* Surrogate pair. */
+
+ return a;
+}
+
+unsigned uv__utf8_decode1(const char** p, const char* pe) {
+ unsigned a;
+
+ a = (unsigned char) *(*p)++;
+
+ if (a < 128)
+ return a; /* ASCII, common case. */
+
+ return uv__utf8_decode1_slow(p, pe, a);
+}
+
+#define foreach_codepoint(c, p, pe) \
+ for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;)
+
+static int uv__idna_toascii_label(const char* s, const char* se,
+ char** d, char* de) {
+ static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
+ const char* ss;
+ unsigned c;
+ unsigned h;
+ unsigned k;
+ unsigned n;
+ unsigned m;
+ unsigned q;
+ unsigned t;
+ unsigned x;
+ unsigned y;
+ unsigned bias;
+ unsigned delta;
+ unsigned todo;
+ int first;
+
+ h = 0;
+ ss = s;
+ todo = 0;
+
+ foreach_codepoint(c, &s, se) {
+ if (c < 128)
+ h++;
+ else if (c == (unsigned) -1)
+ return UV_EINVAL;
+ else
+ todo++;
+ }
+
+ if (todo > 0) {
+ if (*d < de) *(*d)++ = 'x';
+ if (*d < de) *(*d)++ = 'n';
+ if (*d < de) *(*d)++ = '-';
+ if (*d < de) *(*d)++ = '-';
+ }
+
+ x = 0;
+ s = ss;
+ foreach_codepoint(c, &s, se) {
+ if (c > 127)
+ continue;
+
+ if (*d < de)
+ *(*d)++ = c;
+
+ if (++x == h)
+ break; /* Visited all ASCII characters. */
+ }
+
+ if (todo == 0)
+ return h;
+
+ /* Only write separator when we've written ASCII characters first. */
+ if (h > 0)
+ if (*d < de)
+ *(*d)++ = '-';
+
+ n = 128;
+ bias = 72;
+ delta = 0;
+ first = 1;
+
+ while (todo > 0) {
+ m = -1;
+ s = ss;
+ foreach_codepoint(c, &s, se)
+ if (c >= n)
+ if (c < m)
+ m = c;
+
+ x = m - n;
+ y = h + 1;
+
+ if (x > ~delta / y)
+ return UV_E2BIG; /* Overflow. */
+
+ delta += x * y;
+ n = m;
+
+ s = ss;
+ foreach_codepoint(c, &s, se) {
+ if (c < n)
+ if (++delta == 0)
+ return UV_E2BIG; /* Overflow. */
+
+ if (c != n)
+ continue;
+
+ for (k = 36, q = delta; /* empty */; k += 36) {
+ t = 1;
+
+ if (k > bias)
+ t = k - bias;
+
+ if (t > 26)
+ t = 26;
+
+ if (q < t)
+ break;
+
+ /* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore
+ * 10 <= y <= 35, we can optimize the long division
+ * into a table-based reciprocal multiplication.
+ */
+ x = q - t;
+ y = 36 - t; /* 10 <= y <= 35 since 1 <= t <= 26. */
+ q = x / y;
+ t = t + x % y; /* 1 <= t <= 35 because of y. */
+
+ if (*d < de)
+ *(*d)++ = alphabet[t];
+ }
+
+ if (*d < de)
+ *(*d)++ = alphabet[q];
+
+ delta /= 2;
+
+ if (first) {
+ delta /= 350;
+ first = 0;
+ }
+
+ /* No overflow check is needed because |delta| was just
+ * divided by 2 and |delta+delta >= delta + delta/h|.
+ */
+ h++;
+ delta += delta / h;
+
+ for (bias = 0; delta > 35 * 26 / 2; bias += 36)
+ delta /= 35;
+
+ bias += 36 * delta / (delta + 38);
+ delta = 0;
+ todo--;
+ }
+
+ delta++;
+ n++;
+ }
+
+ return 0;
+}
+
+#undef foreach_codepoint
+
+long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
+ const char* si;
+ const char* st;
+ unsigned c;
+ char* ds;
+ int rc;
+
+ ds = d;
+
+ for (si = s; si < se; /* empty */) {
+ st = si;
+ c = uv__utf8_decode1(&si, se);
+
+ if (c != '.')
+ if (c != 0x3002) /* 。 */
+ if (c != 0xFF0E) /* . */
+ if (c != 0xFF61) /* 。 */
+ continue;
+
+ rc = uv__idna_toascii_label(s, st, &d, de);
+
+ if (rc < 0)
+ return rc;
+
+ if (d < de)
+ *d++ = '.';
+
+ s = si;
+ }
+
+ if (s < se) {
+ rc = uv__idna_toascii_label(s, se, &d, de);
+
+ if (rc < 0)
+ return rc;
+ }
+
+ if (d < de)
+ *d++ = '\0';
+
+ return d - ds; /* Number of bytes written. */
+}