summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
authorapnadkarni <apnmbx-wits@yahoo.com>2023-02-16 17:15:35 (GMT)
committerapnadkarni <apnmbx-wits@yahoo.com>2023-02-16 17:15:35 (GMT)
commitbd084c2fc97ffe2e19f0f44e23f441b89c139e9b (patch)
tree8f83258bdfebc1fc14c28f4468a5c2f50ee9144a /generic/tclEncoding.c
parentd9046229bc814b561eb59c03e0aa3627264c07ea (diff)
downloadtcl-bd084c2fc97ffe2e19f0f44e23f441b89c139e9b.zip
tcl-bd084c2fc97ffe2e19f0f44e23f441b89c139e9b.tar.gz
tcl-bd084c2fc97ffe2e19f0f44e23f441b89c139e9b.tar.bz2
Bit more work on encoding test framework. Long way to go.
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c65
1 files changed, 36 insertions, 29 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 8cd970f..470f8f3 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2368,6 +2368,7 @@ UtfToUtfProc(
const char *dstStart, *dstEnd;
int result, numChars, charLimit = INT_MAX;
int ch;
+ int profile;
result = TCL_OK;
@@ -2385,8 +2386,8 @@ UtfToUtfProc(
flags |= PTR2INT(clientData);
dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6);
+ profile = TCL_ENCODING_PROFILE_GET(flags);
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
- int profile = TCL_ENCODING_PROFILE_GET(flags);
if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
/*
@@ -2415,15 +2416,15 @@ UtfToUtfProc(
(!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) ||
PROFILE_REPLACE(profile))) {
/* Special sequence \xC0\x80 */
- if (PROFILE_STRICT(profile)) {
- result = TCL_CONVERT_SYNTAX;
- break;
- }
-
- if (PROFILE_REPLACE(profile)) {
- dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
- src += 1; /* C0, 80 handled in next loop iteration
- since dst limit has to be checked */
+ if (flags & ENCODING_INPUT) {
+ if (PROFILE_REPLACE(profile)) {
+ dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+ src += 2;
+ } else {
+ /* PROFILE_STRICT */
+ result = TCL_CONVERT_SYNTAX;
+ break;
+ }
} else {
/*
* Convert 0xC080 to real nulls when we are in output mode,
@@ -2432,6 +2433,7 @@ UtfToUtfProc(
*dst++ = 0;
src += 2;
}
+
}
else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*
@@ -2516,32 +2518,37 @@ UtfToUtfProc(
/*
* A surrogate character is detected, handle especially.
*/
- /* TODO - what about REPLACE profile? */
if (PROFILE_STRICT(profile) && (flags & ENCODING_UTF)) {
result = TCL_CONVERT_UNKNOWN;
src = saveSrc;
break;
}
-
- low = ch;
- len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
-
- if ((!LOW_SURROGATE(low)) || (ch & 0x400)) {
-
- if (PROFILE_STRICT(profile)) {
- result = TCL_CONVERT_UNKNOWN;
- src = saveSrc;
- break;
+ if (0 && PROFILE_REPLACE(profile)) {
+ ch = UNICODE_REPLACE_CHAR;
+ src += len;
+ // dst += Tcl_UniCharToUtf(ch, dst);
+ }
+ else {
+ low = ch;
+ len = (src <= srcEnd - 3) ? TclUtfToUCS4(src, &low) : 0;
+
+ if ((!LOW_SURROGATE(low)) || (ch & 0x400)) {
+
+ if (PROFILE_STRICT(profile)) {
+ result = TCL_CONVERT_UNKNOWN;
+ src = saveSrc;
+ break;
+ }
+cesu8:
+ *dst++ = (char)(((ch >> 12) | 0xE0) & 0xEF);
+ *dst++ = (char)(((ch >> 6) | 0x80) & 0xBF);
+ *dst++ = (char)((ch | 0x80) & 0xBF);
+ continue;
}
- cesu8:
- *dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF);
- *dst++ = (char) (((ch >> 6) | 0x80) & 0xBF);
- *dst++ = (char) ((ch | 0x80) & 0xBF);
- continue;
+ src += len;
+ dst += Tcl_UniCharToUtf(ch, dst);
+ ch = low;
}
- src += len;
- dst += Tcl_UniCharToUtf(ch, dst);
- ch = low;
} else if (PROFILE_STRICT(profile) &&
(!(flags & ENCODING_INPUT)) &&
SURROGATE(ch)) {