diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2024-10-09 16:26:52 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2024-10-09 16:26:52 (GMT) |
| commit | ee1bf9db84efa0bd7640a74fcc8afab45b01f352 (patch) | |
| tree | 628703e3dacb6621556f438d9c3e22583ff90859 /generic/tclEncoding.c | |
| parent | 2ba12cd8d22afd194e80417a4c408bfd42bdc7c7 (diff) | |
| parent | b592080d7e28c6f351cecba26fce523a512f56c8 (diff) | |
| download | tcl-ee1bf9db84efa0bd7640a74fcc8afab45b01f352.zip tcl-ee1bf9db84efa0bd7640a74fcc8afab45b01f352.tar.gz tcl-ee1bf9db84efa0bd7640a74fcc8afab45b01f352.tar.bz2 | |
Merge-mark 8.7. Indenting (8 spaces -> tabs)
Diffstat (limited to 'generic/tclEncoding.c')
| -rw-r--r-- | generic/tclEncoding.c | 454 |
1 files changed, 227 insertions, 227 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 9acedd5..2c97901 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2452,8 +2452,8 @@ UtfToUtfProc( int profile; if (flags & TCL_ENCODING_START) { - /* *statePtr will hold high surrogate in a split surrogate pair */ - *statePtr = 0; + /* *statePtr will hold high surrogate in a split surrogate pair */ + *statePtr = 0; } result = TCL_OK; @@ -2483,17 +2483,17 @@ UtfToUtfProc( */ #define OUTPUT_ISOLATEDSURROGATE \ do { \ - Tcl_UniChar high; \ - if (PROFILE_REPLACE(profile)) { \ - high = UNICODE_REPLACE_CHAR; \ - } else { \ - high = (Tcl_UniChar)(ptrdiff_t) *statePtr; \ - } \ - assert(!(flags & ENCODING_UTF)); /* Must be CESU-8 */ \ - assert(HIGH_SURROGATE(high)); \ - assert(!PROFILE_STRICT(profile)); \ - dst += Tcl_UniCharToUtf(high, dst); \ - *statePtr = 0; /* Reset state */ \ + Tcl_UniChar high; \ + if (PROFILE_REPLACE(profile)) { \ + high = UNICODE_REPLACE_CHAR; \ + } else { \ + high = (Tcl_UniChar)(ptrdiff_t) *statePtr; \ + } \ + assert(!(flags & ENCODING_UTF)); /* Must be CESU-8 */ \ + assert(HIGH_SURROGATE(high)); \ + assert(!PROFILE_STRICT(profile)); \ + dst += Tcl_UniCharToUtf(high, dst); \ + *statePtr = 0; /* Reset state */ \ } while (0) /* @@ -2503,14 +2503,14 @@ UtfToUtfProc( */ #define CHECK_ISOLATEDSURROGATE \ if (*statePtr) { \ - if (PROFILE_STRICT(profile)) { \ - result = TCL_CONVERT_SYNTAX; \ - break; \ - } \ - OUTPUT_ISOLATEDSURROGATE; \ - continue; /* Rerun loop so length checks etc. repeated */ \ + if (PROFILE_STRICT(profile)) { \ + result = TCL_CONVERT_SYNTAX; \ + break; \ + } \ + OUTPUT_ISOLATEDSURROGATE; \ + continue; /* Rerun loop so length checks etc. repeated */ \ } else \ - (void) 0 + (void) 0 profile = ENCODING_PROFILE_GET(flags); for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { @@ -2531,7 +2531,7 @@ UtfToUtfProc( if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (flags & ENCODING_INPUT))) { - CHECK_ISOLATEDSURROGATE; + CHECK_ISOLATEDSURROGATE; /* * Copy 7bit characters, but skip null-bytes when we are in input * mode, so that they get converted to \xC0\x80. @@ -2542,7 +2542,7 @@ UtfToUtfProc( (!(flags & ENCODING_INPUT) || !PROFILE_TCL8(profile))) { /* Special sequence \xC0\x80 */ - CHECK_ISOLATEDSURROGATE; + CHECK_ISOLATEDSURROGATE; if (!PROFILE_TCL8(profile) && (flags & ENCODING_INPUT)) { if (PROFILE_REPLACE(profile)) { dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); @@ -2563,12 +2563,12 @@ UtfToUtfProc( } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* - * Incomplete byte sequence not because there are insufficient - * bytes in source buffer (have already checked that above) but - * because the UTF-8 sequence is truncated. - */ + * Incomplete byte sequence not because there are insufficient + * bytes in source buffer (have already checked that above) but + * because the UTF-8 sequence is truncated. + */ - CHECK_ISOLATEDSURROGATE; + CHECK_ISOLATEDSURROGATE; if (flags & ENCODING_INPUT) { /* Incomplete bytes for modified UTF-8 target */ @@ -2590,11 +2590,11 @@ UtfToUtfProc( } dst += Tcl_UniCharToUtf(ch, dst); } else { - /* Have a complete character */ + /* Have a complete character */ size_t len = TclUtfToUniChar(src, &ch); - Tcl_UniChar savedSurrogate = (Tcl_UniChar) (ptrdiff_t)*statePtr; - *statePtr = 0; /* Reset surrogate */ + Tcl_UniChar savedSurrogate = (Tcl_UniChar) (ptrdiff_t)*statePtr; + *statePtr = 0; /* Reset surrogate */ if (flags & ENCODING_INPUT) { if (((len < 2) && (ch != 0)) @@ -2627,65 +2627,65 @@ UtfToUtfProc( *dst++ = (char)((ch | 0x80) & 0xBF); continue; } else if (SURROGATE(ch)) { - if ((flags & ENCODING_UTF)) { - /* UTF-8, not CESU-8, so surrogates should not appear */ - if (PROFILE_STRICT(profile)) { - result = (flags & ENCODING_INPUT) + if ((flags & ENCODING_UTF)) { + /* UTF-8, not CESU-8, so surrogates should not appear */ + if (PROFILE_STRICT(profile)) { + result = (flags & ENCODING_INPUT) ? TCL_CONVERT_SYNTAX : TCL_CONVERT_UNKNOWN; - src = saveSrc; - break; - } else if (PROFILE_REPLACE(profile)) { - ch = UNICODE_REPLACE_CHAR; - } else { - /* PROFILE_TCL8 - output as is */ - } - } else { - /* CESU-8 */ - if (LOW_SURROGATE(ch)) { - if (savedSurrogate) { - assert(HIGH_SURROGATE(savedSurrogate)); - ch = 0x10000 + ((savedSurrogate - 0xd800) << 10) + (ch - 0xdc00); - } else { - /* Isolated low surrogate */ - if (PROFILE_STRICT(profile)) { - result = (flags & ENCODING_INPUT) - ? TCL_CONVERT_SYNTAX : TCL_CONVERT_UNKNOWN; - src = saveSrc; - break; - } else if (PROFILE_REPLACE(profile)) { - ch = UNICODE_REPLACE_CHAR; - } else { - /* Tcl8 profile. Output low surrogate as is */ - } - } - } else { - assert(HIGH_SURROGATE(ch)); - /* Save the high surrogate */ - *statePtr = (Tcl_EncodingState) (ptrdiff_t) ch; - if (savedSurrogate) { - assert(HIGH_SURROGATE(savedSurrogate)); - if (PROFILE_STRICT(profile)) { - result = (flags & ENCODING_INPUT) - ? TCL_CONVERT_SYNTAX : TCL_CONVERT_UNKNOWN; - src = saveSrc; - break; - } else if (PROFILE_REPLACE(profile)) { - ch = UNICODE_REPLACE_CHAR; - } else { - /* Output the isolated high surrogate */ - ch = savedSurrogate; - } - } else { - /* High surrogate saved in *statePtr. Do not output anything just yet. */ - --numChars; /* Cancel the increment at end of loop */ - continue; - } - } - } + src = saveSrc; + break; + } else if (PROFILE_REPLACE(profile)) { + ch = UNICODE_REPLACE_CHAR; + } else { + /* PROFILE_TCL8 - output as is */ + } + } else { + /* CESU-8 */ + if (LOW_SURROGATE(ch)) { + if (savedSurrogate) { + assert(HIGH_SURROGATE(savedSurrogate)); + ch = 0x10000 + ((savedSurrogate - 0xd800) << 10) + (ch - 0xdc00); + } else { + /* Isolated low surrogate */ + if (PROFILE_STRICT(profile)) { + result = (flags & ENCODING_INPUT) + ? TCL_CONVERT_SYNTAX : TCL_CONVERT_UNKNOWN; + src = saveSrc; + break; + } else if (PROFILE_REPLACE(profile)) { + ch = UNICODE_REPLACE_CHAR; + } else { + /* Tcl8 profile. Output low surrogate as is */ + } + } + } else { + assert(HIGH_SURROGATE(ch)); + /* Save the high surrogate */ + *statePtr = (Tcl_EncodingState) (ptrdiff_t) ch; + if (savedSurrogate) { + assert(HIGH_SURROGATE(savedSurrogate)); + if (PROFILE_STRICT(profile)) { + result = (flags & ENCODING_INPUT) + ? TCL_CONVERT_SYNTAX : TCL_CONVERT_UNKNOWN; + src = saveSrc; + break; + } else if (PROFILE_REPLACE(profile)) { + ch = UNICODE_REPLACE_CHAR; + } else { + /* Output the isolated high surrogate */ + ch = savedSurrogate; + } + } else { + /* High surrogate saved in *statePtr. Do not output anything just yet. */ + --numChars; /* Cancel the increment at end of loop */ + continue; + } + } + } } else { - /* Normal character */ - CHECK_ISOLATEDSURROGATE; - } + /* Normal character */ + CHECK_ISOLATEDSURROGATE; + } dst += Tcl_UniCharToUtf(ch, dst); } @@ -2693,29 +2693,29 @@ UtfToUtfProc( /* Check if an high surrogate left over */ if (*statePtr) { - assert(!(flags & ENCODING_UTF)); /* CESU-8, Not UTF-8 */ - if (!(flags & TCL_ENCODING_END)) { - /* More data coming */ - } else { - /* No more data coming */ - if (PROFILE_STRICT(profile)) { - result = (flags & ENCODING_INPUT) - ? TCL_CONVERT_SYNTAX : TCL_CONVERT_UNKNOWN; - } else { - if (PROFILE_REPLACE(profile)) { - ch = UNICODE_REPLACE_CHAR; - } else { - ch = (Tcl_UniChar) (ptrdiff_t) *statePtr; - } - if (dst < dstEnd) { - dst += Tcl_UniCharToUtf(ch, dst); - ++numChars; - } else { - /* No room in destination */ - result = TCL_CONVERT_NOSPACE; - } - } - } + assert(!(flags & ENCODING_UTF)); /* CESU-8, Not UTF-8 */ + if (!(flags & TCL_ENCODING_END)) { + /* More data coming */ + } else { + /* No more data coming */ + if (PROFILE_STRICT(profile)) { + result = (flags & ENCODING_INPUT) + ? TCL_CONVERT_SYNTAX : TCL_CONVERT_UNKNOWN; + } else { + if (PROFILE_REPLACE(profile)) { + ch = UNICODE_REPLACE_CHAR; + } else { + ch = (Tcl_UniChar) (ptrdiff_t) *statePtr; + } + if (dst < dstEnd) { + dst += Tcl_UniCharToUtf(ch, dst); + ++numChars; + } else { + /* No room in destination */ + result = TCL_CONVERT_NOSPACE; + } + } + } } @@ -3047,72 +3047,72 @@ Utf16ToUtfProc( ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); } if (HIGH_SURROGATE(prev)) { - if (LOW_SURROGATE(ch)) { - /* - * High surrogate was followed by a low surrogate. - * Tcl_UniCharToUtf would have stashed away the state in dst. - * Call it again to combine that state with the low surrogate. - * We also have to compensate the numChars as two UTF-16 units - * have been combined into one character. - */ - dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); - } else { - /* High surrogate was not followed by a low surrogate */ - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - src -= 2; /* Go back to beginning of high surrogate */ - dst--; /* Also undo writing a single byte too much */ - break; - } - if (PROFILE_REPLACE(flags)) { - /* - * Previous loop wrote a single byte to mark the high surrogate. - * Replace it with the replacement character. - */ - ch = UNICODE_REPLACE_CHAR; - dst--; - numChars++; - dst += Tcl_UniCharToUtf(ch, dst); - } else { - /* - * Bug [10c2c17c32]. If Hi surrogate not followed by Lo - * surrogate, finish 3-byte UTF-8 - */ - dst += Tcl_UniCharToUtf(-1, dst); - } - /* Loop around again so destination space and other checks are done */ - prev = 0; /* Reset high surrogate tracker */ - src -= 2; - } + if (LOW_SURROGATE(ch)) { + /* + * High surrogate was followed by a low surrogate. + * Tcl_UniCharToUtf would have stashed away the state in dst. + * Call it again to combine that state with the low surrogate. + * We also have to compensate the numChars as two UTF-16 units + * have been combined into one character. + */ + dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); + } else { + /* High surrogate was not followed by a low surrogate */ + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + src -= 2; /* Go back to beginning of high surrogate */ + dst--; /* Also undo writing a single byte too much */ + break; + } + if (PROFILE_REPLACE(flags)) { + /* + * Previous loop wrote a single byte to mark the high surrogate. + * Replace it with the replacement character. + */ + ch = UNICODE_REPLACE_CHAR; + dst--; + numChars++; + dst += Tcl_UniCharToUtf(ch, dst); + } else { + /* + * Bug [10c2c17c32]. If Hi surrogate not followed by Lo + * surrogate, finish 3-byte UTF-8 + */ + dst += Tcl_UniCharToUtf(-1, dst); + } + /* Loop around again so destination space and other checks are done */ + prev = 0; /* Reset high surrogate tracker */ + src -= 2; + } } else { - /* Previous char was not a high surrogate */ - - /* - * Special case for 1-byte utf chars for speed. Make sure we work with - * unsigned short-size data. Order checks based on expected frequency. - */ - if ((unsigned)ch - 1 < 0x7F) { - /* ASCII except nul */ - *dst++ = (ch & 0xFF); - } else if (!SURROGATE(ch)) { - /* Not ASCII, not surrogate */ - dst += Tcl_UniCharToUtf(ch, dst); - } else if (HIGH_SURROGATE(ch)) { - dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); - /* Do not count this just yet. Compensate for numChars++ in loop counter */ - numChars--; - } else { - assert(LOW_SURROGATE(ch)); - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - break; - } - if (PROFILE_REPLACE(flags)) { - ch = UNICODE_REPLACE_CHAR; - } - dst += Tcl_UniCharToUtf(ch, dst); - } - } + /* Previous char was not a high surrogate */ + + /* + * Special case for 1-byte utf chars for speed. Make sure we work with + * unsigned short-size data. Order checks based on expected frequency. + */ + if ((unsigned)ch - 1 < 0x7F) { + /* ASCII except nul */ + *dst++ = (ch & 0xFF); + } else if (!SURROGATE(ch)) { + /* Not ASCII, not surrogate */ + dst += Tcl_UniCharToUtf(ch, dst); + } else if (HIGH_SURROGATE(ch)) { + dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); + /* Do not count this just yet. Compensate for numChars++ in loop counter */ + numChars--; + } else { + assert(LOW_SURROGATE(ch)); + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + break; + } + if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; + } + dst += Tcl_UniCharToUtf(ch, dst); + } + } } /* @@ -3133,59 +3133,59 @@ Utf16ToUtfProc( case TCL_CONVERT_MULTIBYTE: /* FALLTHRU */ case TCL_OK: /* FALLTHRU */ case TCL_CONVERT_NOSPACE: - if (HIGH_SURROGATE(ch)) { - if (flags & TCL_ENCODING_END) { - /* - * No more data expected. There will be space for output of - * one character (essentially overwriting the dst area holding - * high surrogate state) - */ - assert((dst-1) <= dstEnd); - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - src -= 2; - dst--; - } else if (PROFILE_REPLACE(flags)) { - dst--; - numChars++; - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - } else { - /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ - numChars++; - dst += Tcl_UniCharToUtf(-1, dst); - } - } else { - /* More data is expected. Revert the surrogate state */ - src -= 2; - dst--; - /* Note: leave result of TCL_CONVERT_NOSPACE as is */ - if (result == TCL_OK) { - result = TCL_CONVERT_MULTIBYTE; - } - } - } else if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { - /* - * If we had a trailing byte at the end AND this is the last - * fragment AND profile is not "strict", stick FFFD in its place. - * Note in this case we DO need to check for room in dst. - */ - if (dst > dstEnd) { - result = TCL_CONVERT_NOSPACE; - } else { - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - } else { - /* PROFILE_REPLACE or PROFILE_TCL8 */ - result = TCL_OK; - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - numChars++; - src++; - } - } - } - break; + if (HIGH_SURROGATE(ch)) { + if (flags & TCL_ENCODING_END) { + /* + * No more data expected. There will be space for output of + * one character (essentially overwriting the dst area holding + * high surrogate state) + */ + assert((dst-1) <= dstEnd); + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + src -= 2; + dst--; + } else if (PROFILE_REPLACE(flags)) { + dst--; + numChars++; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + } else { + /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + numChars++; + dst += Tcl_UniCharToUtf(-1, dst); + } + } else { + /* More data is expected. Revert the surrogate state */ + src -= 2; + dst--; + /* Note: leave result of TCL_CONVERT_NOSPACE as is */ + if (result == TCL_OK) { + result = TCL_CONVERT_MULTIBYTE; + } + } + } else if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { + /* + * If we had a trailing byte at the end AND this is the last + * fragment AND profile is not "strict", stick FFFD in its place. + * Note in this case we DO need to check for room in dst. + */ + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + } else { + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + } else { + /* PROFILE_REPLACE or PROFILE_TCL8 */ + result = TCL_OK; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + numChars++; + src++; + } + } + } + break; case TCL_CONVERT_SYNTAX: - break; /* Nothing to do */ + break; /* Nothing to do */ } *srcReadPtr = src - srcStart; |
