From bf4f51ce7866dc5c5ce3acf483636f7a281e3d18 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 7 Dec 2019 18:23:41 +0000 Subject: Fix 2 test-cases, which were failing when TCL_UTF_MAX=6. Add UTF_MAX=6 UNIX build to Travis as proof that now all test-cases pass for UTF_MAX=6 (still not officially supported) --- .travis.yml | 7 +++++++ tests/encoding.test | 8 ++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9fa6f1f..537621c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,6 +17,13 @@ matrix: env: - BUILD_DIR=unix - CFGOPT=CFLAGS=-DTCL_UTF_MAX=4 + - name: "Linux/GCC/Shared: UTF_MAX=6" + os: linux + dist: xenial + compiler: gcc + env: + - BUILD_DIR=unix + - CFGOPT=CFLAGS=-DTCL_UTF_MAX=6 - name: "Linux/GCC/Static" os: linux dist: xenial diff --git a/tests/encoding.test b/tests/encoding.test index 15aba11..b11c731 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -331,13 +331,13 @@ test encoding-15.3 {UtfToUtfProc null character input} { test encoding-15.4 {UtfToUtfProc emoji character input} { set x \xED\xA0\xBD\xED\xB8\x82 set y [encoding convertfrom utf-8 \xED\xA0\xBD\xED\xB8\x82] - list [string length $x] [string length $y] $y -} "6 2 \uD83D\uDE02" + list [string length $x] $y +} "6 \uD83D\uDE02" test encoding-15.5 {UtfToUtfProc emoji character input} { set x \xF0\x9F\x98\x82 set y [encoding convertfrom utf-8 \xF0\x9F\x98\x82] - list [string length $x] [string length $y] $y -} "4 2 \uD83D\uDE02" + list [string length $x] $y +} "4 \uD83D\uDE02" test encoding-15.6 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uDE02\uD83D set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D] -- cgit v0.12 From d67affb10098d8289d0fd12a5ea738068740ef39 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 8 Dec 2019 16:33:18 +0000 Subject: Fix Valgrind problem reported as follow-up in [fc4393e9b0]. Since it happens only for TCL_UTF_MAX>3 it's not actully a 'bug' in 8.6, but it might be a corner-case not handled well in 8.7 --- generic/tclEncoding.c | 4 ++-- generic/tclExecute.c | 2 +- generic/tclStringObj.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index e080d6e..69075bd 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2361,11 +2361,11 @@ UtfToUtfProc( dst += Tcl_UniCharToUtf(*chPtr, dst); } else { src += TclUtfToUniChar(src, chPtr); - if ((*chPtr & 0xFC00) == 0xD800) { + if ((*chPtr | 0x3FF) == 0xDBFF) { /* A high surrogate character is detected, handle especially */ Tcl_UniChar low = *chPtr; size_t len = (src <= srcEnd-3) ? Tcl_UtfToUniChar(src, &low) : 0; - if ((low & 0xFC00) != 0xDC00) { + if ((low | 0x3FF) != 0xDFFF) { *dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF); *dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF); *dst++ = (char) ((*chPtr | 0x80) & 0xBF); diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 6394eea..20e2c34 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5544,7 +5544,7 @@ TEBCresume( objResultPtr = Tcl_NewStringObj((const char *) valuePtr->bytes+index, 1); } else { - char buf[TCL_UTF_MAX] = ""; + char buf[4] = ""; Tcl_UniChar ch = Tcl_GetUniChar(valuePtr, index); /* diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index e4db140..1534a8b 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -1994,7 +1994,7 @@ Tcl_AppendFormatToObj( } break; case 'c': { - char buf[TCL_UTF_MAX]; + char buf[4] = ""; int code, length; if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) { @@ -3135,7 +3135,7 @@ ExtendStringRepWithUnicode( */ int i, origLength, size = 0; - char *dst, buf[TCL_UTF_MAX]; + char *dst, buf[4] = ""; String *stringPtr = GET_STRING(objPtr); if (numChars < 0) { -- cgit v0.12