summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclCmdMZ.c42
-rw-r--r--generic/tclCompExpr.c10
-rw-r--r--generic/tclInt.h6
-rw-r--r--generic/tclParse.c10
-rw-r--r--generic/tclUtf.c15
5 files changed, 47 insertions, 36 deletions
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index c47490a..0764c60 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -2500,8 +2500,8 @@ StringStartCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
int ch;
- const char *p, *string;
- int cur, index, length, numChars;
+ const Tcl_UniChar *p, *string;
+ int cur, index, length;
Tcl_Obj *obj;
if (objc != 3) {
@@ -2509,32 +2509,30 @@ StringStartCmd(
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
- numChars = Tcl_NumUtfChars(string, length);
- if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
+ string = Tcl_GetUnicodeFromObj(objv[1], &length);
+ if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
- if (index >= numChars) {
- index = numChars - 1;
+ if (index >= length) {
+ index = length - 1;
}
cur = 0;
if (index > 0) {
- p = Tcl_UtfAtIndex(string, index);
+ p = &string[index];
- TclUtfToUCS4(p, &ch);
+ (void)TclUniCharToUCS4(p, &ch);
for (cur = index; cur >= 0; cur--) {
int delta = 0;
- const char *next;
+ const Tcl_UniChar *next;
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
- next = TclUtfPrev(p, string);
+ next = TclUCS4Prev(p, string);
do {
next += delta;
- delta = TclUtfToUCS4(next, &ch);
+ delta = TclUniCharToUCS4(next, &ch);
} while (next + delta < p);
p = next;
}
@@ -2572,8 +2570,8 @@ StringEndCmd(
Tcl_Obj *const objv[]) /* Argument objects. */
{
int ch;
- const char *p, *end, *string;
- int cur, index, length, numChars;
+ const Tcl_UniChar *p, *end, *string;
+ int cur, index, length;
Tcl_Obj *obj;
if (objc != 3) {
@@ -2581,20 +2579,18 @@ StringEndCmd(
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
- numChars = Tcl_NumUtfChars(string, length);
- if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
+ string = Tcl_GetUnicodeFromObj(objv[1], &length);
+ if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) {
return TCL_ERROR;
}
- string = TclGetStringFromObj(objv[1], &length);
if (index < 0) {
index = 0;
}
- if (index < numChars) {
- p = Tcl_UtfAtIndex(string, index);
+ if (index < length) {
+ p = &string[index];
end = string+length;
for (cur = index; p < end; cur++) {
- p += TclUtfToUCS4(p, &ch);
+ p += TclUniCharToUCS4(p, &ch);
if (!Tcl_UniCharIsWordChar(ch)) {
break;
}
@@ -2603,7 +2599,7 @@ StringEndCmd(
cur++;
}
} else {
- cur = numChars;
+ cur = length;
}
TclNewIntObj(obj, cur);
Tcl_SetObjResult(interp, obj);
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index 41938e3..fa15fba 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -1928,7 +1928,7 @@ ParseLexeme(
{
const char *end;
int scanned, size;
- Tcl_UniChar ch = 0;
+ int ch;
Tcl_Obj *literal = NULL;
unsigned char byte;
@@ -2145,14 +2145,14 @@ ParseLexeme(
*/
if (!TclIsBareword(*start) || *start == '_') {
- if (Tcl_UtfCharComplete(start, numBytes)) {
- scanned = TclUtfToUniChar(start, &ch);
+ if (TclUCS4Complete(start, numBytes)) {
+ scanned = TclUtfToUCS4(start, &ch);
} else {
- char utfBytes[4];
+ char utfBytes[8];
memcpy(utfBytes, start, numBytes);
utfBytes[numBytes] = '\0';
- scanned = TclUtfToUniChar(utfBytes, &ch);
+ scanned = TclUtfToUCS4(utfBytes, &ch);
}
*lexemePtr = INVALID;
Tcl_DecrRefCount(literal);
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 9dde88b..8088d0e 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3252,12 +3252,14 @@ MODULE_SCOPE int TclUtfCount(int ch);
#if TCL_UTF_MAX > 3
# define TclUtfToUCS4 Tcl_UtfToUniChar
# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1)
+# define TclUCS4Prev(src, ptr) (((src) > (ptr)) ? ((src) - 1) : (src))
# define TclUCS4Complete Tcl_UtfCharComplete
# define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
? ((length) >= 3) : Tcl_UtfCharComplete((src), (length)))
#else
- MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr);
- MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *src, int *ucs4Ptr);
+ MODULE_SCOPE int TclUtfToUCS4(const char *, int *);
+ MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *, int *);
+ MODULE_SCOPE const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *, const Tcl_UniChar *);
# define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
? ((length) >= 4) : Tcl_UtfCharComplete((src), (length)))
# define TclChar16Complete Tcl_UtfCharComplete
diff --git a/generic/tclParse.c b/generic/tclParse.c
index daad31d..b863ff2 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -789,7 +789,7 @@ TclParseBackslash(
* written. At most 4 bytes will be written there. */
{
const char *p = src+1;
- Tcl_UniChar unichar = 0;
+ int unichar;
int result;
int count;
char buf[4] = "";
@@ -935,14 +935,14 @@ TclParseBackslash(
* #217987] test subst-3.2
*/
- if (Tcl_UtfCharComplete(p, numBytes - 1)) {
- count = TclUtfToUniChar(p, &unichar) + 1; /* +1 for '\' */
+ if (TclUCS4Complete(p, numBytes - 1)) {
+ count = TclUtfToUCS4(p, &unichar) + 1; /* +1 for '\' */
} else {
- char utfBytes[4];
+ char utfBytes[8];
memcpy(utfBytes, p, numBytes - 1);
utfBytes[numBytes - 1] = '\0';
- count = TclUtfToUniChar(utfBytes, &unichar) + 1;
+ count = TclUtfToUCS4(utfBytes, &unichar) + 1;
}
result = unichar;
break;
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 11bde5c..525cd50 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -2629,12 +2629,25 @@ TclUniCharToUCS4(
* by the Tcl_UniChar string. */
{
if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) {
- *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[01] & 0x3FF)) + 0x10000;
+ *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[1] & 0x3FF)) + 0x10000;
return 2;
}
*ucs4Ptr = src[0];
return 1;
}
+
+const Tcl_UniChar *TclUCS4Prev(const Tcl_UniChar *src, const Tcl_UniChar *ptr) {
+ if (src <= ptr + 1) {
+ return ptr;
+ }
+ if (((src[-1] & 0xFC00) == 0xDC00) && ((src[-2] & 0xFC00) == 0xD800)) {
+ return src - 2;
+ }
+ return src - 1;
+}
+
+
+
#endif
/*