summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c20
1 files changed, 11 insertions, 9 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 4789b7f..444f99e 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2300,7 +2300,7 @@ UtfToUtfProc(
const char *srcStart, *srcEnd, *srcClose;
const char *dstStart, *dstEnd;
int result, numChars, charLimit = INT_MAX;
- Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr;
+ int *chPtr = (int *) statePtr;
if (flags & TCL_ENCODING_START) {
*statePtr = 0;
@@ -2321,7 +2321,7 @@ UtfToUtfProc(
dstEnd = dst + dstLen - TCL_UTF_MAX;
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
- if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
+ if ((src > srcClose) && (!TclUCS4Complete(src, srcEnd - src))) {
/*
* If there is more string to follow, this will ensure that the
* last UTF-8 character in the source buffer hasn't been cut off.
@@ -2341,6 +2341,7 @@ UtfToUtfProc(
*/
*dst++ = *src++;
+ *chPtr = 0; /* reset surrogate handling */
} else if (pureNullMode == 1 && UCHAR(*src) == 0xC0 &&
(src + 1 < srcEnd) && UCHAR(*(src+1)) == 0x80) {
/*
@@ -2348,24 +2349,25 @@ UtfToUtfProc(
*/
*dst++ = 0;
+ *chPtr = 0; /* reset surrogate handling */
src += 2;
- } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
+ } else if (!TclUCS4Complete(src, srcEnd - src)) {
/*
- * Always check before using TclUtfToUniChar. Not doing can so
+ * Always check before using TclUtfToUCS4. Not doing can so
* cause it run beyond the end of the buffer! If we happen such an
* incomplete char its bytes are made to represent themselves.
*/
- *chPtr = (unsigned char) *src;
+ *chPtr = UCHAR(*src);
src += 1;
dst += Tcl_UniCharToUtf(*chPtr, dst);
} else {
- src += TclUtfToUniChar(src, chPtr);
+ src += TclUtfToUCS4(src, chPtr);
if ((*chPtr | 0x7FF) == 0xDFFF) {
/* A surrogate character is detected, handle especially */
- Tcl_UniChar low = *chPtr;
- size_t len = (src <= srcEnd-3) ? Tcl_UtfToUniChar(src, &low) : 0;
- if (((low | 0x3FF) != 0xDFFF) || (*chPtr & 0x400)) {
+ int low = *chPtr;
+ size_t len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
+ if (((low & ~0x3FF) != 0xC00) || (*chPtr & 0x400)) {
*dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF);
*dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF);
*dst++ = (char) ((*chPtr | 0x80) & 0xBF);