summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-11-18 14:29:56 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-11-18 14:29:56 (GMT)
commit0ebd62435dc4cc8b5ba2050d485dc8e5ae794082 (patch)
tree58a504a96fab64835f007ce868e32c6cd4e8bf0a /generic
parent66ed43d1fe6eee5083e335459b4007b4c7d383cc (diff)
parent8e1f957a669b2f4b84dca7e8a27f2985c0625172 (diff)
downloadtcl-0ebd62435dc4cc8b5ba2050d485dc8e5ae794082.zip
tcl-0ebd62435dc4cc8b5ba2050d485dc8e5ae794082.tar.gz
tcl-0ebd62435dc4cc8b5ba2050d485dc8e5ae794082.tar.bz2
Merge 8.7
Diffstat (limited to 'generic')
-rw-r--r--generic/tclCmdMZ.c42
-rw-r--r--generic/tclCompExpr.c10
-rw-r--r--generic/tclInt.h6
-rw-r--r--generic/tclParse.c10
-rw-r--r--generic/tclUtf.c15
5 files changed, 47 insertions, 36 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index d5828cd..4ffe48b 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -2505,8 +2505,8 @@ StringStartCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
int ch;
- const char *p, *string;
- size_t numChars, length, cur, index;
+ const Tcl_UniChar *p, *string;
+ size_t cur, index, length;
Tcl_Obj *obj;
if (objc != 3) {
@@ -2514,32 +2514,30 @@ StringStartCmd(
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
- numChars = Tcl_NumUtfChars(string, length) - 1;
- if (TclGetIntForIndexM(interp, objv[2], numChars, &index) != TCL_OK) {
+ string = TclGetUnicodeFromObj(objv[1], &length);
+ if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
- string = TclGetString(objv[1]);
- if (index + 1 > numChars + 1) {
- index = numChars;
+ if (index + 1 >= length + 1) {
+ index = length - 1;
}
cur = 0;
if (index + 1 > 1) {
- p = Tcl_UtfAtIndex(string, index);
+ p = &string[index];
- TclUtfToUCS4(p, &ch);
+ (void)TclUniCharToUCS4(p, &ch);
for (cur = index; cur != TCL_INDEX_NONE; cur--) {
int delta = 0;
- const char *next;
+ const Tcl_UniChar *next;
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
- next = TclUtfPrev(p, string);
+ next = TclUCS4Prev(p, string);
do {
next += delta;
- delta = TclUtfToUCS4(next, &ch);
+ delta = TclUniCharToUCS4(next, &ch);
} while (next + delta < p);
p = next;
}
@@ -2577,8 +2575,8 @@ StringEndCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
int ch;
- const char *p, *end, *string;
- size_t length, numChars, cur, index;
+ const Tcl_UniChar *p, *end, *string;
+ size_t cur, index, length;
Tcl_Obj *obj;
if (objc != 3) {
@@ -2586,20 +2584,18 @@ StringEndCmd(
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
- numChars = Tcl_NumUtfChars(string, length) - 1;
- if (TclGetIntForIndexM(interp, objv[2], numChars, &index) != TCL_OK) {
+ string = TclGetUnicodeFromObj(objv[1], &length);
+ if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
if (index == TCL_INDEX_NONE) {
index = TCL_INDEX_START;
}
- if (index + 1 <= numChars + 1) {
- p = Tcl_UtfAtIndex(string, index);
+ if (index + 1 <= length + 1) {
+ p = &string[index];
end = string+length;
for (cur = index; p < end; cur++) {
- p += TclUtfToUCS4(p, &ch);
+ p += TclUniCharToUCS4(p, &ch);
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
@@ -2608,7 +2604,7 @@ StringEndCmd(
cur++;
}
} else {
- cur = numChars + 1;
+ cur = length;
}
TclNewIndexObj(obj, cur);
Tcl_SetObjResult(interp, obj);
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index 2db2c8a..2404916 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -1927,7 +1927,7 @@ ParseLexeme(
storage, if non-NULL. */
{
const char *end;
- Tcl_UniChar ch = 0;
+ int ch;
Tcl_Obj *literal = NULL;
unsigned char byte;
@@ -2148,14 +2148,14 @@ ParseLexeme(
if (!TclIsBareword(*start) || *start == '_') {
size_t scanned;
- if (Tcl_UtfCharComplete(start, numBytes)) {
- scanned = TclUtfToUniChar(start, &ch);
+ if (TclUCS4Complete(start, numBytes)) {
+ scanned = TclUtfToUCS4(start, &ch);
} else {
- char utfBytes[4];
+ char utfBytes[8];
memcpy(utfBytes, start, numBytes);
utfBytes[numBytes] = '\0';
- scanned = TclUtfToUniChar(utfBytes, &ch);
+ scanned = TclUtfToUCS4(utfBytes, &ch);
}
*lexemePtr = INVALID;
Tcl_DecrRefCount(literal);
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 2a0dfa6..3a759ca 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3183,12 +3183,14 @@ MODULE_SCOPE size_t TclUtfCount(int ch);
#if TCL_UTF_MAX > 3
# define TclUtfToUCS4 Tcl_UtfToUniChar
# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1)
+# define TclUCS4Prev(src, ptr) (((src) > (ptr)) ? ((src) - 1) : (src))
# define TclUCS4Complete Tcl_UtfCharComplete
# define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
? ((length) >= 3) : Tcl_UtfCharComplete((src), (length)))
#else
- MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr);
- MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *src, int *ucs4Ptr);
+ MODULE_SCOPE int TclUtfToUCS4(const char *, int *);
+ MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *, int *);
+ MODULE_SCOPE const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *, const Tcl_UniChar *);
# define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
? ((length) >= 4) : Tcl_UtfCharComplete((src), (length)))
# define TclChar16Complete Tcl_UtfCharComplete
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 7bb92d5..c7404c3 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -790,7 +790,7 @@ TclParseBackslash(
* written. At most 4 bytes will be written there. */
{
const char *p = src+1;
- Tcl_UniChar unichar = 0;
+ int unichar;
int result;
size_t count;
char buf[4] = "";
@@ -936,14 +936,14 @@ TclParseBackslash(
* #217987] test subst-3.2
*/
- if (Tcl_UtfCharComplete(p, numBytes - 1)) {
- count = TclUtfToUniChar(p, &unichar) + 1; /* +1 for '\' */
+ if (TclUCS4Complete(p, numBytes - 1)) {
+ count = TclUtfToUCS4(p, &unichar) + 1; /* +1 for '\' */
} else {
- char utfBytes[4];
+ char utfBytes[8];
memcpy(utfBytes, p, numBytes - 1);
utfBytes[numBytes - 1] = '\0';
- count = TclUtfToUniChar(utfBytes, &unichar) + 1;
+ count = TclUtfToUCS4(utfBytes, &unichar) + 1;
}
result = unichar;
break;
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index ae87e1b..04a47c8 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -2636,12 +2636,25 @@ TclUniCharToUCS4(
* by the Tcl_UniChar string. */
{
if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) {
- *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[01] & 0x3FF)) + 0x10000;
+ *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[1] & 0x3FF)) + 0x10000;
return 2;
}
*ucs4Ptr = src[0];
return 1;
}
+
+const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *src, const Tcl_UniChar *ptr) {
+ if (src <= ptr + 1) {
+ return ptr;
+ }
+ if (((src[-1] & 0xFC00) == 0xDC00) && ((src[-2] & 0xFC00) == 0xD800)) {
+ return src - 2;
+ }
+ return src - 1;
+}
+
+
+
#endif
/*