From e9c0ec1219e3c42df67c414bfda0bb5aab9a5bbb Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 18 Jun 2018 05:59:22 +0000 Subject: Plug leak in TclSetEnv. --- generic/tclEnv.c | 4 ++++ tests/pkgIndex.tcl | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/generic/tclEnv.c b/generic/tclEnv.c index 8cc4b74..c559c69 100644 --- a/generic/tclEnv.c +++ b/generic/tclEnv.c @@ -730,6 +730,10 @@ TclFinalizeEnvironment(void) ckfree(env.cache); env.cache = NULL; env.cacheSize = 0; + if ((env.ourEnviron != NULL)) { + ckfree(env.ourEnviron); + env.ourEnviron = NULL; + } #ifndef USE_PUTENV env.ourEnvironSize = 0; #endif diff --git a/tests/pkgIndex.tcl b/tests/pkgIndex.tcl index 48ab71b..0feb0eb 100644 --- a/tests/pkgIndex.tcl +++ b/tests/pkgIndex.tcl @@ -1,6 +1,6 @@ #! /usr/bin/env tclsh -package ifneeded tcltests 0.1 { - source [file dirname [file dirname [file normalize [info script]/...]]]/tcltests.tcl - package provide tcltests 0.1 -} +package ifneeded tcltests 0.1 " + source [list $dir]/tcltests.tcl + package provide tcltests 0.1 +" -- cgit v0.12 From 625aca976acac85e85a36f68a3727d2eec785922 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 18 Jun 2018 07:06:04 +0000 Subject: Full cleanup of env cache when in a PURIFY build. --- generic/tclEnv.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/generic/tclEnv.c b/generic/tclEnv.c index c559c69..4a48f65 100644 --- a/generic/tclEnv.c +++ b/generic/tclEnv.c @@ -723,10 +723,18 @@ TclFinalizeEnvironment(void) * strings. This may leak more memory that strictly necessary, since some * of the strings may no longer be in the environment. However, * determining which ones are ok to delete is n-squared, and is pretty - * unlikely, so we don't bother. + * unlikely, so we don't bother. However, in the case of DPURIFY, just + * free all strings in the cache. */ + size_t i; + if (env.cache) { +#ifdef PURIFY + for (i = 0; i < env.cacheSize; i++) { + ckfree(env.cache[i]); + } +#endif ckfree(env.cache); env.cache = NULL; env.cacheSize = 0; -- cgit v0.12 From 155e8a1ad56291fb61f3578f3c7cda632556d1da Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 18 Jun 2018 08:09:32 +0000 Subject: Avoid valgrind "still reachable" reports stemming from early termination of threads. --- tests/async.test | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/tests/async.test b/tests/async.test index cb67cc2..e7fc45a 100644 --- a/tests/async.test +++ b/tests/async.test @@ -157,17 +157,24 @@ test async-4.1 {async interrupting bytecode sequence} -constraints { } } -body { apply {{handle} { - global aresult - set aresult {Async event not delivered} - testasync marklater $handle - for {set i 0} { - $i < 2500000 && $aresult eq "Async event not delivered" - } {incr i} { - nothing - } + global aresult + set aresult {Async event not delivered} + testasync marklater $handle + # allow plenty of time to pass in case valgrind is running + set start [clock seconds] + while { + [clock seconds] - $start < 180 && $aresult eq "Async event not delivered" + } { + # be less busy + after 100 + nothing + } return $aresult }} $hm } -result {test pattern} -cleanup { + # give other threads some time to go way so that valgrind doesn't pick up + # "still reachable" cases from early thread termination + after 100 testasync delete $hm } test async-4.2 {async interrupting straight bytecode sequence} -constraints { @@ -179,12 +186,20 @@ test async-4.2 {async interrupting straight bytecode sequence} -constraints { global aresult set aresult {Async event not delivered} testasync marklater $handle - for {set i 0} { - $i < 2500000 && $aresult eq "Async event not delivered" - } {incr i} {} + # allow plenty of time to pass in case valgrind is running + set start [clock seconds] + while { + [clock seconds] - $start < 180 && $aresult eq "Async event not delivered" + } { + # be less busy + after 100 + } return $aresult }} $hm } -result {test pattern} -cleanup { + # give other threads some time to go way so that valgrind doesn't pick up + # "still reachable" cases from early thread termination + after 100 testasync delete $hm } test async-4.3 {async interrupting loop-less bytecode sequence} -constraints { @@ -201,6 +216,9 @@ test async-4.3 {async interrupting loop-less bytecode sequence} -constraints { return $aresult }]] $hm } -result {test pattern} -cleanup { + # give other threads some time to go way so that valgrind doesn't pick up + # "still reachable" cases from early thread termination + after 100 testasync delete $hm } -- cgit v0.12 From 04004f3abcabe486568af1e7b026d03670b48ca8 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 18 Jun 2018 15:51:43 +0000 Subject: Unbreak build on Windows (and - most likely - some other platforms too) --- generic/tclEnv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/generic/tclEnv.c b/generic/tclEnv.c index 4a48f65..40ced17 100644 --- a/generic/tclEnv.c +++ b/generic/tclEnv.c @@ -727,10 +727,9 @@ TclFinalizeEnvironment(void) * free all strings in the cache. */ - size_t i; - if (env.cache) { #ifdef PURIFY + int i; for (i = 0; i < env.cacheSize; i++) { ckfree(env.cache[i]); } @@ -738,11 +737,11 @@ TclFinalizeEnvironment(void) ckfree(env.cache); env.cache = NULL; env.cacheSize = 0; +#ifndef USE_PUTENV if ((env.ourEnviron != NULL)) { ckfree(env.ourEnviron); env.ourEnviron = NULL; } -#ifndef USE_PUTENV env.ourEnvironSize = 0; #endif } -- cgit v0.12 From 3c57d80efed172427e5aafa447365cb61439613c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 18 Jun 2018 15:54:38 +0000 Subject: Fix [53cad613d8]: TIP 389 implementation makes Tk tests font-4.12 and font-4.15 fail. One more situation in which high surrogate causes problem --- generic/tclParse.c | 12 +++++------- generic/tclStringObj.c | 6 ++++++ generic/tclUtf.c | 7 +++++++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/generic/tclParse.c b/generic/tclParse.c index fc7f77b..f26f933 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -991,15 +991,13 @@ TclParseBackslash( if (readPtr != NULL) { *readPtr = count; } -#if TCL_UTF_MAX >= 4 - if ((result & 0xFC00) == 0xD800) { - dst[2] = (char) ((result | 0x80) & 0xBF); - dst[1] = (char) (((result >> 6) | 0x80) & 0xBF); - dst[0] = (char) ((result >> 12) | 0xE0); - return 3; + count = Tcl_UniCharToUtf(result, dst); +#if TCL_UTF_MAX > 3 + if (!count) { + count = Tcl_UniCharToUtf(-1, dst); } #endif - return Tcl_UniCharToUtf(result, dst); + return count; } /* diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index a503392..1795d0c 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -1996,6 +1996,12 @@ Tcl_AppendFormatToObj( goto error; } length = Tcl_UniCharToUtf(code, buf); +#if TCL_UTF_MAX > 3 + if (!length) { + /* Special case for handling upper surrogates. */ + length = Tcl_UniCharToUtf(-1, buf); + } +#endif segment = Tcl_NewStringObj(buf, length); Tcl_IncrRefCount(segment); allocSegment = 1; diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 46ce4ef..c2963bf 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -189,6 +189,13 @@ Tcl_UniCharToUtf( buf[0] = (char) ((ch >> 18) | 0xF0); return 4; } + } else if (ch == -1) { + if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80) + && ((buf[2] & 0xCF) == 0)) { + ch = 0xD7C0 + ((buf[0] & 0x07) << 8) + ((buf[1] & 0x3F) << 2) + + ((buf[2] & 0x30) >> 4); + goto three; + } #endif } -- cgit v0.12 From 94f9cf81ed3e156bd372a3cac249974d1acb4e1d Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 24 Jun 2018 20:26:27 +0000 Subject: Fix "string tolower" and friends for handling unpaired surrogates correctly. Also add test-cases for those situations. Various typo's in comments. --- doc/Utf.3 | 5 ++++- generic/tclCmdMZ.c | 3 +++ generic/tclDisassemble.c | 2 +- generic/tclExecute.c | 14 ++++++++++---- generic/tclParse.c | 1 + generic/tclUtf.c | 21 +++++++++++++++------ tests/utf.test | 15 +++++++++++++++ 7 files changed, 49 insertions(+), 12 deletions(-) diff --git a/doc/Utf.3 b/doc/Utf.3 index 160575b..922fd81 100644 --- a/doc/Utf.3 +++ b/doc/Utf.3 @@ -132,7 +132,10 @@ represent one Unicode character in the UTF-8 representation. .PP \fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string in starting at \fIbuf\fR. The return value is the number of bytes stored -in \fIbuf\fR. +in \fIbuf\fR. If ch is an upper surrogate (range U+D800 - U+DBFF), then +the return value will be 0 and nothing will be stored. If you still +want to produce UTF-8 output for it (even though knowing it's an illegal +code-point on its own), just call \fBTcl_UniCharToUtf\fR again using ch = -1. .PP \fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR and stores it as a Tcl_UniChar in \fI*chPtr\fR. The return value is the diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index d64299e..0bd6cb4 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1447,6 +1447,9 @@ StringIndexCmd( char buf[4]; length = Tcl_UniCharToUtf(ch, buf); + if (!length) { + length = Tcl_UniCharToUtf(-1, buf); + } Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length)); } } diff --git a/generic/tclDisassemble.c b/generic/tclDisassemble.c index e9aaec4..a0d1258 100644 --- a/generic/tclDisassemble.c +++ b/generic/tclDisassemble.c @@ -894,7 +894,7 @@ PrintSourceToObj( Tcl_AppendPrintfToObj(appendObj, "\\U%08x", ch); i += 10; } else -#elif TCL_UTF_MAX > 3 +#else /* If len == 0, this means we have a char > 0xffff, resulting in * TclUtfToUniChar producing a surrogate pair. We want to output * this pair as a single Unicode character. diff --git a/generic/tclExecute.c b/generic/tclExecute.c index fda50b2..82de752 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -4964,7 +4964,7 @@ TEBCresume( /* Decode index value operands. */ - /* + /* assert ( toIdx != TCL_INDEX_AFTER); * * Extra safety for legacy bytecodes: @@ -5223,9 +5223,15 @@ TEBCresume( * but creating the object as a string seems to be faster in * practical use. */ - - length = (ch != -1) ? Tcl_UniCharToUtf(ch, buf) : 0; - objResultPtr = Tcl_NewStringObj(buf, length); + if (ch == -1) { + objResultPtr = Tcl_NewObj(); + } else { + length = Tcl_UniCharToUtf(ch, buf); + if (!length) { + length = Tcl_UniCharToUtf(-1, buf); + } + objResultPtr = Tcl_NewStringObj(buf, length); + } } TRACE_APPEND(("\"%s\"\n", O2S(objResultPtr))); diff --git a/generic/tclParse.c b/generic/tclParse.c index 581270c..00b83a1 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -981,6 +981,7 @@ TclParseBackslash( } count = Tcl_UniCharToUtf(result, dst); if (!count) { + /* Special case for handling upper surrogates. */ count = Tcl_UniCharToUtf(-1, dst); } return count; diff --git a/generic/tclUtf.c b/generic/tclUtf.c index ed10ab2..c8292a2 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -218,7 +218,7 @@ Tcl_UniCharToUtfDString( { const Tcl_UniChar *w, *wEnd; char *p, *string; - int oldLength; + int oldLength, len = 1; /* * UTF-8 string length in bytes will be <= Unicode string length * 4. @@ -231,9 +231,18 @@ Tcl_UniCharToUtfDString( p = string; wEnd = uniStr + uniLength; for (w = uniStr; w < wEnd; ) { - p += Tcl_UniCharToUtf(*w, p); + if (!len && ((*w & 0xFC00) != 0xDC00)) { + /* Special case for handling upper surrogates. */ + p += Tcl_UniCharToUtf(-1, p); + } + len = Tcl_UniCharToUtf(*w, p); + p += len; w++; } + if (!len) { + /* Special case for handling upper surrogates. */ + p += Tcl_UniCharToUtf(-1, p); + } Tcl_DStringSetLength(dsPtr, oldLength + (p - string)); return string; @@ -899,7 +908,7 @@ Tcl_UtfToUpper( * char to dst if its size is <= the original char. */ - if (bytes < TclUtfCount(upChar)) { + if ((bytes < TclUtfCount(upChar)) || ((upChar & 0xF800) == 0xD800)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { @@ -962,7 +971,7 @@ Tcl_UtfToLower( * char to dst if its size is <= the original char. */ - if (bytes < TclUtfCount(lowChar)) { + if ((bytes < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { @@ -1022,7 +1031,7 @@ Tcl_UtfToTitle( #endif titleChar = Tcl_UniCharToTitle(titleChar); - if (bytes < TclUtfCount(titleChar)) { + if ((bytes < TclUtfCount(titleChar)) || ((titleChar & 0xF800) == 0xD800)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { @@ -1046,7 +1055,7 @@ Tcl_UtfToTitle( lowChar = Tcl_UniCharToLower(lowChar); } - if (bytes < TclUtfCount(lowChar)) { + if ((bytes < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { diff --git a/tests/utf.test b/tests/utf.test index 67a6778..e820359 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -158,6 +158,12 @@ test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4e4e\u25a\xff\u543 2 } "\uff" +test utf-8.5 {Tcl_UniCharAtIndex: upper surrogate} { + string index \ud842 0 +} "\ud842" +test utf-8.5 {Tcl_UniCharAtIndex: lower surrogate} { + string index \udc42 0 +} "\udc42" test utf-9.1 {Tcl_UtfAtIndex: index = 0} { string range abcd 0 2 @@ -263,6 +269,9 @@ test utf-11.4 {Tcl_UtfToUpper} { test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} { string toupper \u10d0\u1c90 } \u1c90\u1c90 +test utf-11.6 {Tcl_UtfToUpper low/high surrogate)} { + string toupper \udc24\ud824 +} \udc24\ud824 test utf-12.1 {Tcl_UtfToLower} { string tolower {} @@ -279,6 +288,9 @@ test utf-12.4 {Tcl_UtfToLower} { test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { string tolower \u10d0\u1c90 } \u10d0\u10d0 +test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} { + string tolower \udc24\ud824 +} \udc24\ud824 test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -298,6 +310,9 @@ test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { string totitle \u1c90\u10d0 } \u1c90\u10d0 +test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} { + string totitle \udc24\ud824 +} \udc24\ud824 test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b -- cgit v0.12