diff options
-rw-r--r-- | generic/tclCmdMZ.c | 42 | ||||
-rw-r--r-- | generic/tclCompExpr.c | 10 | ||||
-rw-r--r-- | generic/tclInt.h | 6 | ||||
-rw-r--r-- | generic/tclParse.c | 10 | ||||
-rw-r--r-- | generic/tclUtf.c | 15 |
5 files changed, 47 insertions, 36 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index c47490a..0764c60 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2500,8 +2500,8 @@ StringStartCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int ch; - const char *p, *string; - int cur, index, length, numChars; + const Tcl_UniChar *p, *string; + int cur, index, length; Tcl_Obj *obj; if (objc != 3) { @@ -2509,32 +2509,30 @@ StringStartCmd( return TCL_ERROR; } - string = TclGetStringFromObj(objv[1], &length); - numChars = Tcl_NumUtfChars(string, length); - if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { + string = Tcl_GetUnicodeFromObj(objv[1], &length); + if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } - string = TclGetStringFromObj(objv[1], &length); - if (index >= numChars) { - index = numChars - 1; + if (index >= length) { + index = length - 1; } cur = 0; if (index > 0) { - p = Tcl_UtfAtIndex(string, index); + p = &string[index]; - TclUtfToUCS4(p, &ch); + (void)TclUniCharToUCS4(p, &ch); for (cur = index; cur >= 0; cur--) { int delta = 0; - const char *next; + const Tcl_UniChar *next; if (!Tcl_UniCharIsWordChar(ch)) { break; } - next = TclUtfPrev(p, string); + next = TclUCS4Prev(p, string); do { next += delta; - delta = TclUtfToUCS4(next, &ch); + delta = TclUniCharToUCS4(next, &ch); } while (next + delta < p); p = next; } @@ -2572,8 +2570,8 @@ StringEndCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int ch; - const char *p, *end, *string; - int cur, index, length, numChars; + const Tcl_UniChar *p, *end, *string; + int cur, index, length; Tcl_Obj *obj; if (objc != 3) { @@ -2581,20 +2579,18 @@ StringEndCmd( return TCL_ERROR; } - string = TclGetStringFromObj(objv[1], &length); - numChars = Tcl_NumUtfChars(string, length); - if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { + string = Tcl_GetUnicodeFromObj(objv[1], &length); + if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } - string = TclGetStringFromObj(objv[1], &length); if (index < 0) { index = 0; } - if (index < numChars) { - p = Tcl_UtfAtIndex(string, index); + if (index < length) { + p = &string[index]; end = string+length; for (cur = index; p < end; cur++) { - p += TclUtfToUCS4(p, &ch); + p += TclUniCharToUCS4(p, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } @@ -2603,7 +2599,7 @@ StringEndCmd( cur++; } } else { - cur = numChars; + cur = length; } TclNewIntObj(obj, cur); Tcl_SetObjResult(interp, obj); diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index 41938e3..fa15fba 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -1928,7 +1928,7 @@ ParseLexeme( { const char *end; int scanned, size; - Tcl_UniChar ch = 0; + int ch; Tcl_Obj *literal = NULL; unsigned char byte; @@ -2145,14 +2145,14 @@ ParseLexeme( */ if (!TclIsBareword(*start) || *start == '_') { - if (Tcl_UtfCharComplete(start, numBytes)) { - scanned = TclUtfToUniChar(start, &ch); + if (TclUCS4Complete(start, numBytes)) { + scanned = TclUtfToUCS4(start, &ch); } else { - char utfBytes[4]; + char utfBytes[8]; memcpy(utfBytes, start, numBytes); utfBytes[numBytes] = '\0'; - scanned = TclUtfToUniChar(utfBytes, &ch); + scanned = TclUtfToUCS4(utfBytes, &ch); } *lexemePtr = INVALID; Tcl_DecrRefCount(literal); diff --git a/generic/tclInt.h b/generic/tclInt.h index 9dde88b..8088d0e 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3252,12 +3252,14 @@ MODULE_SCOPE int TclUtfCount(int ch); #if TCL_UTF_MAX > 3 # define TclUtfToUCS4 Tcl_UtfToUniChar # define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1) +# define TclUCS4Prev(src, ptr) (((src) > (ptr)) ? ((src) - 1) : (src)) # define TclUCS4Complete Tcl_UtfCharComplete # define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ ? ((length) >= 3) : Tcl_UtfCharComplete((src), (length))) #else - MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); - MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *src, int *ucs4Ptr); + MODULE_SCOPE int TclUtfToUCS4(const char *, int *); + MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *, int *); + MODULE_SCOPE const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *, const Tcl_UniChar *); # define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length))) # define TclChar16Complete Tcl_UtfCharComplete diff --git a/generic/tclParse.c b/generic/tclParse.c index daad31d..b863ff2 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -789,7 +789,7 @@ TclParseBackslash( * written. At most 4 bytes will be written there. */ { const char *p = src+1; - Tcl_UniChar unichar = 0; + int unichar; int result; int count; char buf[4] = ""; @@ -935,14 +935,14 @@ TclParseBackslash( * #217987] test subst-3.2 */ - if (Tcl_UtfCharComplete(p, numBytes - 1)) { - count = TclUtfToUniChar(p, &unichar) + 1; /* +1 for '\' */ + if (TclUCS4Complete(p, numBytes - 1)) { + count = TclUtfToUCS4(p, &unichar) + 1; /* +1 for '\' */ } else { - char utfBytes[4]; + char utfBytes[8]; memcpy(utfBytes, p, numBytes - 1); utfBytes[numBytes - 1] = '\0'; - count = TclUtfToUniChar(utfBytes, &unichar) + 1; + count = TclUtfToUCS4(utfBytes, &unichar) + 1; } result = unichar; break; diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 11bde5c..525cd50 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -2629,12 +2629,25 @@ TclUniCharToUCS4( * by the Tcl_UniChar string. */ { if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) { - *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[01] & 0x3FF)) + 0x10000; + *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[1] & 0x3FF)) + 0x10000; return 2; } *ucs4Ptr = src[0]; return 1; } + +const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *src, const Tcl_UniChar *ptr) { + if (src <= ptr + 1) { + return ptr; + } + if (((src[-1] & 0xFC00) == 0xDC00) && ((src[-2] & 0xFC00) == 0xD800)) { + return src - 2; + } + return src - 1; +} + + + #endif /* |