summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorapnadkarni <apnmbx-wits@yahoo.com>2024-08-23 06:10:56 (GMT)
committerapnadkarni <apnmbx-wits@yahoo.com>2024-08-23 06:10:56 (GMT)
commite3f4e1187fee73ffe66ff7924ed2f09b4db5cc61 (patch)
tree7f2a729121453272afb3bb205d4978d19080cd9e
parent123458fa6b9487cee12e4c1952c59b125b2274ed (diff)
downloadtcl-e3f4e1187fee73ffe66ff7924ed2f09b4db5cc61.zip
tcl-e3f4e1187fee73ffe66ff7924ed2f09b4db5cc61.tar.gz
tcl-e3f4e1187fee73ffe66ff7924ed2f09b4db5cc61.tar.bz2
Reenable utf16 tests after fixes
-rw-r--r--generic/tclEncoding.c22
-rw-r--r--tests/utfext.test4
2 files changed, 11 insertions, 15 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index cb5dd20..8af87d3 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -3051,25 +3051,22 @@ Utf16ToUtfProc(
* have been combined into one character.
*/
dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst);
- --numChars;
} else {
/* High surrogate was not followed by a low surrogate */
if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_SYNTAX;
src -= 2; /* Go back to beginning of high surrogate */
dst--; /* Also undo writing a single byte too much */
- numChars--;
break;
}
if (PROFILE_REPLACE(flags)) {
/*
* Previous loop wrote a single byte to mark the high surrogate.
- * Replace it with the replacement character. Further, restart
- * current loop iteration since need to recheck destination
- * space and reset processing of current character.
+ * Replace it with the replacement character.
*/
ch = UNICODE_REPLACE_CHAR;
dst--;
+ numChars++;
dst += Tcl_UniCharToUtf(ch, dst);
} else {
/*
@@ -3081,7 +3078,6 @@ Utf16ToUtfProc(
/* Loop around again so destination space and other checks are done */
prev = 0; /* Reset high surrogate tracker */
src -= 2;
- numChars--;
}
} else {
/* Previous char was not a high surrogate */
@@ -3098,6 +3094,8 @@ Utf16ToUtfProc(
dst += Tcl_UniCharToUtf(ch, dst);
} else if (HIGH_SURROGATE(ch)) {
dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst);
+ /* Do not count this just yet. Compensate for numChars++ in loop counter */
+ numChars--;
} else {
assert(LOW_SURROGATE(ch));
if (PROFILE_STRICT(flags)) {
@@ -3119,14 +3117,12 @@ Utf16ToUtfProc(
* there may be a leftover high surrogate we need to deal with.
* 2. TCL_CONVERT_NOSPACE - Ran out of room in the destination buffer.
* Same considerations as (1)
- * 3. TCL_CONVERT_SYNTAX - decoding error. src, dst, numChars will
- * hold the correct values up to the point of error even if the
- * the last character decoded was a high surrogate.
+ * 3. TCL_CONVERT_SYNTAX - decoding error.
* 4. TCL_CONVERT_MULTIBYTE - the buffer passed in was not fully
* processed, because there was a trailing single byte. However,
- * we may have processed the requested number of characters already
+ * we *may* have processed the requested number of characters already
* in which case the trailing byte does not matter. We still
- * may still be a leftover high surrogate as in (1) and (2).
+ * *may* still be a leftover high surrogate as in (1) and (2).
*/
switch (result) {
case TCL_CONVERT_MULTIBYTE: /* FALLTHRU */
@@ -3144,19 +3140,19 @@ Utf16ToUtfProc(
result = TCL_CONVERT_SYNTAX;
src -= 2;
dst--;
- numChars--;
} else if (PROFILE_REPLACE(flags)) {
dst--;
+ numChars++;
dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
} else {
/* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
+ numChars++;
dst += Tcl_UniCharToUtf(-1, dst);
}
} else {
/* More data is expected. Revert the surrogate state */
src -= 2;
dst--;
- numChars--;
/* Note: leave result of TCL_CONVERT_NOSPACE as is */
if (result == TCL_OK) {
result = TCL_CONVERT_MULTIBYTE;
diff --git a/tests/utfext.test b/tests/utfext.test
index 4b15a8d..01f5184 100644
--- a/tests/utfext.test
+++ b/tests/utfext.test
@@ -248,8 +248,8 @@ namespace eval utftest {
testfragment fromutf $enc $comment $utfhex $hex $internalfragindex
# Char limits - note no fromutf as Tcl_UtfToExternal does not support it
- if {![string match utf-16* $enc] && $enc ne "cesu-8"} {
- # TODO - utf16 hangs
+ if {$enc ne "cesu-8"} {
+ # TODO - cesu-8
testcharlimit toutf $enc $comment $hex $utfhex
}
}