diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-11-18 14:29:56 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-11-18 14:29:56 (GMT) |
commit | 0ebd62435dc4cc8b5ba2050d485dc8e5ae794082 (patch) | |
tree | 58a504a96fab64835f007ce868e32c6cd4e8bf0a /generic | |
parent | 66ed43d1fe6eee5083e335459b4007b4c7d383cc (diff) | |
parent | 8e1f957a669b2f4b84dca7e8a27f2985c0625172 (diff) | |
download | tcl-0ebd62435dc4cc8b5ba2050d485dc8e5ae794082.zip tcl-0ebd62435dc4cc8b5ba2050d485dc8e5ae794082.tar.gz tcl-0ebd62435dc4cc8b5ba2050d485dc8e5ae794082.tar.bz2 |
Merge 8.7
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclCmdMZ.c | 42 | ||||
-rw-r--r-- | generic/tclCompExpr.c | 10 | ||||
-rw-r--r-- | generic/tclInt.h | 6 | ||||
-rw-r--r-- | generic/tclParse.c | 10 | ||||
-rw-r--r-- | generic/tclUtf.c | 15 |
5 files changed, 47 insertions, 36 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index d5828cd..4ffe48b 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2505,8 +2505,8 @@ StringStartCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int ch; - const char *p, *string; - size_t numChars, length, cur, index; + const Tcl_UniChar *p, *string; + size_t cur, index, length; Tcl_Obj *obj; if (objc != 3) { @@ -2514,32 +2514,30 @@ StringStartCmd( return TCL_ERROR; } - string = TclGetStringFromObj(objv[1], &length); - numChars = Tcl_NumUtfChars(string, length) - 1; - if (TclGetIntForIndexM(interp, objv[2], numChars, &index) != TCL_OK) { + string = TclGetUnicodeFromObj(objv[1], &length); + if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } - string = TclGetString(objv[1]); - if (index + 1 > numChars + 1) { - index = numChars; + if (index + 1 >= length + 1) { + index = length - 1; } cur = 0; if (index + 1 > 1) { - p = Tcl_UtfAtIndex(string, index); + p = &string[index]; - TclUtfToUCS4(p, &ch); + (void)TclUniCharToUCS4(p, &ch); for (cur = index; cur != TCL_INDEX_NONE; cur--) { int delta = 0; - const char *next; + const Tcl_UniChar *next; if (!Tcl_UniCharIsWordChar(ch)) { break; } - next = TclUtfPrev(p, string); + next = TclUCS4Prev(p, string); do { next += delta; - delta = TclUtfToUCS4(next, &ch); + delta = TclUniCharToUCS4(next, &ch); } while (next + delta < p); p = next; } @@ -2577,8 +2575,8 @@ StringEndCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int ch; - const char *p, *end, *string; - size_t length, numChars, cur, index; + const Tcl_UniChar *p, *end, *string; + size_t cur, index, length; Tcl_Obj *obj; if (objc != 3) { @@ -2586,20 +2584,18 @@ StringEndCmd( return TCL_ERROR; } - string = TclGetStringFromObj(objv[1], &length); - numChars = Tcl_NumUtfChars(string, length) - 1; - if (TclGetIntForIndexM(interp, objv[2], numChars, &index) != TCL_OK) { + string = TclGetUnicodeFromObj(objv[1], &length); + if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } - string = TclGetStringFromObj(objv[1], &length); if (index == TCL_INDEX_NONE) { index = TCL_INDEX_START; } - if (index + 1 <= numChars + 1) { - p = Tcl_UtfAtIndex(string, index); + if (index + 1 <= length + 1) { + p = &string[index]; end = string+length; for (cur = index; p < end; cur++) { - p += TclUtfToUCS4(p, &ch); + p += TclUniCharToUCS4(p, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } @@ -2608,7 +2604,7 @@ StringEndCmd( cur++; } } else { - cur = numChars + 1; + cur = length; } TclNewIndexObj(obj, cur); Tcl_SetObjResult(interp, obj); diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index 2db2c8a..2404916 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -1927,7 +1927,7 @@ ParseLexeme( storage, if non-NULL. */ { const char *end; - Tcl_UniChar ch = 0; + int ch; Tcl_Obj *literal = NULL; unsigned char byte; @@ -2148,14 +2148,14 @@ ParseLexeme( if (!TclIsBareword(*start) || *start == '_') { size_t scanned; - if (Tcl_UtfCharComplete(start, numBytes)) { - scanned = TclUtfToUniChar(start, &ch); + if (TclUCS4Complete(start, numBytes)) { + scanned = TclUtfToUCS4(start, &ch); } else { - char utfBytes[4]; + char utfBytes[8]; memcpy(utfBytes, start, numBytes); utfBytes[numBytes] = '\0'; - scanned = TclUtfToUniChar(utfBytes, &ch); + scanned = TclUtfToUCS4(utfBytes, &ch); } *lexemePtr = INVALID; Tcl_DecrRefCount(literal); diff --git a/generic/tclInt.h b/generic/tclInt.h index 2a0dfa6..3a759ca 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3183,12 +3183,14 @@ MODULE_SCOPE size_t TclUtfCount(int ch); #if TCL_UTF_MAX > 3 # define TclUtfToUCS4 Tcl_UtfToUniChar # define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1) +# define TclUCS4Prev(src, ptr) (((src) > (ptr)) ? ((src) - 1) : (src)) # define TclUCS4Complete Tcl_UtfCharComplete # define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ ? ((length) >= 3) : Tcl_UtfCharComplete((src), (length))) #else - MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); - MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *src, int *ucs4Ptr); + MODULE_SCOPE int TclUtfToUCS4(const char *, int *); + MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *, int *); + MODULE_SCOPE const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *, const Tcl_UniChar *); # define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length))) # define TclChar16Complete Tcl_UtfCharComplete diff --git a/generic/tclParse.c b/generic/tclParse.c index 7bb92d5..c7404c3 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -790,7 +790,7 @@ TclParseBackslash( * written. At most 4 bytes will be written there. */ { const char *p = src+1; - Tcl_UniChar unichar = 0; + int unichar; int result; size_t count; char buf[4] = ""; @@ -936,14 +936,14 @@ TclParseBackslash( * #217987] test subst-3.2 */ - if (Tcl_UtfCharComplete(p, numBytes - 1)) { - count = TclUtfToUniChar(p, &unichar) + 1; /* +1 for '\' */ + if (TclUCS4Complete(p, numBytes - 1)) { + count = TclUtfToUCS4(p, &unichar) + 1; /* +1 for '\' */ } else { - char utfBytes[4]; + char utfBytes[8]; memcpy(utfBytes, p, numBytes - 1); utfBytes[numBytes - 1] = '\0'; - count = TclUtfToUniChar(utfBytes, &unichar) + 1; + count = TclUtfToUCS4(utfBytes, &unichar) + 1; } result = unichar; break; diff --git a/generic/tclUtf.c b/generic/tclUtf.c index ae87e1b..04a47c8 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -2636,12 +2636,25 @@ TclUniCharToUCS4( * by the Tcl_UniChar string. */ { if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) { - *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[01] & 0x3FF)) + 0x10000; + *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[1] & 0x3FF)) + 0x10000; return 2; } *ucs4Ptr = src[0]; return 1; } + +const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *src, const Tcl_UniChar *ptr) { + if (src <= ptr + 1) { + return ptr; + } + if (((src[-1] & 0xFC00) == 0xDC00) && ((src[-2] & 0xFC00) == 0xD800)) { + return src - 2; + } + return src - 1; +} + + + #endif /* |