From 145a2574d993bba4942edc57ebfbf4d1ec9314af Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 21 Jan 2024 16:26:40 +0000 Subject: Optimize Tcl_UniCharIsControl(). Don't worry about range >= U+F0000, that's for TCL_UTF_MAX>3, which is unsupported for 8.6. --- generic/regc_locale.c | 3 +-- generic/tclUtf.c | 8 +------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/generic/regc_locale.c b/generic/regc_locale.c index 449cff6..c0ae530 100644 --- a/generic/regc_locale.c +++ b/generic/regc_locale.c @@ -302,8 +302,7 @@ static const crange controlRangeTable[] = { {0x202A, 0x202E}, {0x2060, 0x2064}, {0x2066, 0x206F}, {0xE000, 0xF8FF}, {0xFFF9, 0xFFFB} #if CHRBITS > 16 - ,{0x13430, 0x1343F}, {0x1BCA0, 0x1BCA3}, {0x1D173, 0x1D17A}, {0xE0020, 0xE007F}, - {0xF0000, 0xFFFFD}, {0x100000, 0x10FFFD} + ,{0x13430, 0x1343F}, {0x1BCA0, 0x1BCA3}, {0x1D173, 0x1D17A}, {0xE0020, 0xE007F} #endif }; diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 196c5fb..736da66 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1805,13 +1805,7 @@ Tcl_UniCharIsControl( if (UNICODE_OUT_OF_RANGE(ch)) { /* Clear away extension bits, if any */ ch &= 0x1FFFFF; - if ((ch == 0xE0001) || ((ch >= 0xE0020) && (ch <= 0xE007F))) { - return 1; - } - if ((ch >= 0xF0000) && ((ch & 0xFFFF) <= 0xFFFD)) { - return 1; - } - return 0; + return ((ch == 0xE0001) || ((unsigned)(ch - 0xE0020) <= 0x5F)); } #endif return ((CONTROL_BITS >> GetCategory(ch)) & 1); -- cgit v0.12 From 785abcfc3ad99e86993befbced53fe3c3059d1b3 Mon Sep 17 00:00:00 2001 From: oehhar Date: Mon, 22 Jan 2024 14:26:21 +0000 Subject: Doc of Tcl_AppendToObj: Document NULL data argument behaviour and buffer growth optimization. --- doc/StringObj.3 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/StringObj.3 b/doc/StringObj.3 index e569e62..0568d5a 100644 --- a/doc/StringObj.3 +++ b/doc/StringObj.3 @@ -221,12 +221,16 @@ to bytes) in the string value. \fBTcl_AppendToObj\fR appends the data given by \fIbytes\fR and \fIlength\fR to the string representation of the value specified by \fIobjPtr\fR. If the value has an invalid string representation, -then an attempt is made to convert \fIbytes\fR is to the Unicode +then an attempt is made to convert \fIbytes\fR to the Unicode format. If the conversion is successful, then the converted form of \fIbytes\fR is appended to the value's Unicode representation. Otherwise, the value's Unicode representation is invalidated and converted to the UTF format, and \fIbytes\fR is appended to the value's new string representation. +\fIlength\fR bytes are allocated and not filled, if \fIbytes\fR is a +null pointer. +Eventually buffer growth is optimized by large allocations to optimize +multiple calls. .PP \fBTcl_AppendUnicodeToObj\fR appends the Unicode string given by \fIunicode\fR and \fInumChars\fR to the value specified by -- cgit v0.12 From 3c748667c71e566793f3a86b6e988d000952cd9b Mon Sep 17 00:00:00 2001 From: oehhar Date: Mon, 22 Jan 2024 15:07:33 +0000 Subject: [db4f2843cd],[da16d15574]: revert Tcl_ReadChars fix, due to performance impact. --- generic/tclIO.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 0153646..0f79f1e 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -6112,7 +6112,7 @@ ReadChars( int dstLimit = TCL_UTF_MAX - 1 + toRead * factor / UTF_EXPANSION_FACTOR; (void) TclGetStringFromObj(objPtr, &numBytes); - Tcl_SetObjLength(objPtr, numBytes + dstLimit); + Tcl_AppendToObj(objPtr, NULL, dstLimit); if (toRead == srcLen) { unsigned int size; -- cgit v0.12 From 36303748ad11c49eb94ff612665c57407b021098 Mon Sep 17 00:00:00 2001 From: oehhar Date: Tue, 23 Jan 2024 08:35:57 +0000 Subject: Doc of Tcl_AppendToObj: Remove documentation of NULL data argument as this is an internal feature --- doc/StringObj.3 | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/StringObj.3 b/doc/StringObj.3 index 0568d5a..aea8d62 100644 --- a/doc/StringObj.3 +++ b/doc/StringObj.3 @@ -227,9 +227,7 @@ format. If the conversion is successful, then the converted form of Otherwise, the value's Unicode representation is invalidated and converted to the UTF format, and \fIbytes\fR is appended to the value's new string representation. -\fIlength\fR bytes are allocated and not filled, if \fIbytes\fR is a -null pointer. -Eventually buffer growth is optimized by large allocations to optimize +Eventually buffer growth is done by large allocations to optimize multiple calls. .PP \fBTcl_AppendUnicodeToObj\fR appends the Unicode string given by -- cgit v0.12