summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2024-02-12 21:55:57 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2024-02-12 21:55:57 (GMT)
commita868b5be5e91ce4137cce12a45b36007f18d1707 (patch)
tree90f3713d479303e9a82aecdd51dd6083513133fb /generic/tclEncoding.c
parent07181fbe1e09709ceec52380ea7ead77692bf8c8 (diff)
downloadtcl-a868b5be5e91ce4137cce12a45b36007f18d1707.zip
tcl-a868b5be5e91ce4137cce12a45b36007f18d1707.tar.gz
tcl-a868b5be5e91ce4137cce12a45b36007f18d1707.tar.bz2
More code cleanup
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c38
1 files changed, 17 insertions, 21 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 1cba87e..0658d21 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -210,7 +210,7 @@ static const struct TclEncodingProfiles {
#define PROFILE_TCL8(flags_) \
((ENCODING_PROFILE_GET(flags_) != TCL_ENCODING_PROFILE_REPLACE) && !PROFILE_STRICT(flags_))
-#define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD)
+#define UNICODE_REPLACE_CHAR 0xFFFD
#define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800)
#define HIGH_SURROGATE(c_) (((c_) & ~0x3FF) == 0xD800)
#define LOW_SURROGATE(c_) (((c_) & ~0x3FF) == 0xDC00)
@@ -2559,10 +2559,9 @@ UtfToUtfProc(
*dst++ = *src++;
} else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) &&
(UCHAR(src[1]) == 0x80) &&
- (!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) ||
- PROFILE_REPLACE(profile))) {
+ (!(flags & ENCODING_INPUT) || !PROFILE_TCL8(profile))) {
/* Special sequence \xC0\x80 */
- if ((PROFILE_STRICT(profile) || PROFILE_REPLACE(profile)) && (flags & ENCODING_INPUT)) {
+ if (!PROFILE_TCL8(profile) && (flags & ENCODING_INPUT)) {
if (PROFILE_REPLACE(profile)) {
dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
src += 2;
@@ -2610,15 +2609,9 @@ UtfToUtfProc(
dst += Tcl_UniCharToUtf(ch, dst);
} else {
int low;
- int isInvalid = 0;
size_t len = Tcl_UtfToUniChar(src, &ch);
if (flags & ENCODING_INPUT) {
- if ((len < 2) && (ch != 0)) {
- isInvalid = 1;
- } else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF)) {
- isInvalid = 1;
- }
- if (isInvalid) {
+ if (((len < 2) && (ch != 0)) || ((ch > 0xFFFF) && !(flags & ENCODING_UTF))) {
if (PROFILE_STRICT(profile)) {
result = TCL_CONVERT_SYNTAX;
break;
@@ -2672,7 +2665,7 @@ cesu8:
dst += Tcl_UniCharToUtf(ch, dst);
ch = low;
}
- } else if (PROFILE_STRICT(profile) && SURROGATE(ch)) {
+ } else if (SURROGATE(ch) && PROFILE_STRICT(profile)) {
result = (flags & ENCODING_INPUT) ? TCL_CONVERT_SYNTAX : TCL_CONVERT_UNKNOWN;
src = saveSrc;
break;
@@ -2783,17 +2776,20 @@ Utf32ToUtfProc(
}
if ((unsigned)ch > 0x10FFFF) {
- ch = UNICODE_REPLACE_CHAR;
if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_SYNTAX;
break;
}
- } else if (PROFILE_STRICT(flags) && SURROGATE(ch)) {
- result = TCL_CONVERT_SYNTAX;
- ch = 0;
- break;
- } else if (PROFILE_REPLACE(flags) && SURROGATE(ch)) {
ch = UNICODE_REPLACE_CHAR;
+ } else if (SURROGATE(ch)) {
+ if (PROFILE_STRICT(flags)) {
+ result = TCL_CONVERT_SYNTAX;
+ ch = 0;
+ break;
+ }
+ if (PROFILE_REPLACE(flags)) {
+ ch = UNICODE_REPLACE_CHAR;
+ }
}
/*
@@ -3413,7 +3409,7 @@ TableToUtfProc(
ch = UNICODE_REPLACE_CHAR;
} else {
/* For prefix bytes, we don't fallback to cp1252, see [1355b9a874] */
- ch = (Tcl_UniChar)byte;
+ ch = byte;
}
} else {
ch = toUnicode[byte][*((unsigned char *)++src)];
@@ -3632,7 +3628,7 @@ Iso88591ToUtfProc(
result = TCL_CONVERT_NOSPACE;
break;
}
- ch = (Tcl_UniChar) *((unsigned char *) src);
+ ch = *((unsigned char *) src);
/*
* Special case for 1-byte utf chars for speed.
@@ -3733,7 +3729,7 @@ Iso88591FromUtfProc(
* Plunge on, using '?' as a fallback character.
*/
- ch = (Tcl_UniChar) '?'; /* Profiles TCL8 and REPLACE */
+ ch = '?'; /* Profiles TCL8 and REPLACE */
}
if (dst > dstEnd) {