summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2017-05-29 15:38:04 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2017-05-29 15:38:04 (GMT)
commit5ff6b263c67e2d7dbf71c0c1b983170cb5d28eab (patch)
tree558eb66988703f183b84df65ce26e773d0fe71e3
parentd4b7f993738502e5111a8c56fb5ba5e7db33785e (diff)
parentd97d4ac534c4858f49230be22cdb9cb429b939fa (diff)
downloadtcl-5ff6b263c67e2d7dbf71c0c1b983170cb5d28eab.zip
tcl-5ff6b263c67e2d7dbf71c0c1b983170cb5d28eab.tar.gz
tcl-5ff6b263c67e2d7dbf71c0c1b983170cb5d28eab.tar.bz2
merge trunk
-rw-r--r--generic/tclBinary.c6
-rw-r--r--generic/tclCmdIL.c4
-rw-r--r--generic/tclCompExpr.c4
-rw-r--r--generic/tclEncoding.c4
-rw-r--r--generic/tclLoad.c2
-rw-r--r--generic/tclParse.c4
-rw-r--r--generic/tclPkg.c1
-rw-r--r--generic/tclScan.c66
-rw-r--r--generic/tclStringObj.c26
-rw-r--r--generic/tclUtf.c32
-rw-r--r--win/tclWinPipe.c2
11 files changed, 88 insertions, 63 deletions
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 1a0e8c8..a693894 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -551,7 +551,7 @@ SetByteArrayFromAny(
byteArrayPtr = ckalloc(BYTEARRAY_SIZE(length));
for (dst = byteArrayPtr->bytes; src < srcEnd; ) {
- src += Tcl_UtfToUniChar(src, &ch);
+ src += TclUtfToUniChar(src, &ch);
improper = improper || (ch > 255);
*dst++ = UCHAR(ch);
}
@@ -1303,7 +1303,7 @@ BinaryFormatCmd(
Tcl_UniChar ch = 0;
char buf[TCL_UTF_MAX + 1];
- Tcl_UtfToUniChar(errorString, &ch);
+ TclUtfToUniChar(errorString, &ch);
buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"bad field specifier \"%s\"", buf));
@@ -1673,7 +1673,7 @@ BinaryScanCmd(
Tcl_UniChar ch = 0;
char buf[TCL_UTF_MAX + 1];
- Tcl_UtfToUniChar(errorString, &ch);
+ TclUtfToUniChar(errorString, &ch);
buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
Tcl_SetObjResult(interp, Tcl_ObjPrintf(
"bad field specifier \"%s\"", buf));
diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c
index a7a5f43..e2093bb 100644
--- a/generic/tclCmdIL.c
+++ b/generic/tclCmdIL.c
@@ -4440,8 +4440,8 @@ DictionaryCompare(
*/
if ((*left != '\0') && (*right != '\0')) {
- left += Tcl_UtfToUniChar(left, &uniLeft);
- right += Tcl_UtfToUniChar(right, &uniRight);
+ left += TclUtfToUniChar(left, &uniLeft);
+ right += TclUtfToUniChar(right, &uniRight);
/*
* Convert both chars to lower for the comparison, because
diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c
index 83bb883..24c8896 100644
--- a/generic/tclCompExpr.c
+++ b/generic/tclCompExpr.c
@@ -2064,13 +2064,13 @@ ParseLexeme(
if (!TclIsBareword(*start) || *start == '_') {
if (Tcl_UtfCharComplete(start, numBytes)) {
- scanned = Tcl_UtfToUniChar(start, &ch);
+ scanned = TclUtfToUniChar(start, &ch);
} else {
char utfBytes[TCL_UTF_MAX];
memcpy(utfBytes, start, (size_t) numBytes);
utfBytes[numBytes] = '\0';
- scanned = Tcl_UtfToUniChar(utfBytes, &ch);
+ scanned = TclUtfToUniChar(utfBytes, &ch);
}
*lexemePtr = INVALID;
Tcl_DecrRefCount(literal);
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 1f8cd6e..35cbfc9 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2344,7 +2344,7 @@ UtfToUtfProc(
src += 2;
} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*
- * Always check before using Tcl_UtfToUniChar. Not doing can so
+ * Always check before using TclUtfToUniChar. Not doing can so
* cause it run beyond the endof the buffer! If we happen such an
* incomplete char its byts are made to represent themselves.
*/
@@ -2353,7 +2353,7 @@ UtfToUtfProc(
src += 1;
dst += Tcl_UniCharToUtf(*chPtr, dst);
} else {
- int n = Tcl_UtfToUniChar(src, chPtr);
+ int n = TclUtfToUniChar(src, chPtr);
src += n;
if (!n) numChars--;
dst += Tcl_UniCharToUtf(*chPtr, dst);
diff --git a/generic/tclLoad.c b/generic/tclLoad.c
index bcda420..66637da 100644
--- a/generic/tclLoad.c
+++ b/generic/tclLoad.c
@@ -336,7 +336,7 @@ Tcl_LoadObjCmd(
}
#endif /* __CYGWIN__ */
for (p = pkgGuess; *p != 0; p += offset) {
- offset = Tcl_UtfToUniChar(p, &ch);
+ offset = TclUtfToUniChar(p, &ch);
if ((ch > 0x100)
|| !(isalpha(UCHAR(ch)) /* INTL: ISO only */
|| (UCHAR(ch) == '_'))) {
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 25f9b3d..73de7e8 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -963,13 +963,13 @@ TclParseBackslash(
*/
if (Tcl_UtfCharComplete(p, numBytes - 1)) {
- count = Tcl_UtfToUniChar(p, &unichar) + 1; /* +1 for '\' */
+ count = TclUtfToUniChar(p, &unichar) + 1; /* +1 for '\' */
} else {
char utfBytes[TCL_UTF_MAX];
memcpy(utfBytes, p, (size_t) (numBytes - 1));
utfBytes[numBytes - 1] = '\0';
- count = Tcl_UtfToUniChar(utfBytes, &unichar) + 1;
+ count = TclUtfToUniChar(utfBytes, &unichar) + 1;
}
result = unichar;
break;
diff --git a/generic/tclPkg.c b/generic/tclPkg.c
index 9ad3cb7..3b0554a 100644
--- a/generic/tclPkg.c
+++ b/generic/tclPkg.c
@@ -224,6 +224,7 @@ static void PkgFilesCleanupProc(ClientData clientData,
entry = Tcl_NextHashEntry(&search);
}
Tcl_DeleteHashTable(&pkgFiles->table);
+ ckfree(pkgFiles);
return;
}
diff --git a/generic/tclScan.c b/generic/tclScan.c
index 96aeefa..59384e1 100644
--- a/generic/tclScan.c
+++ b/generic/tclScan.c
@@ -78,11 +78,11 @@ BuildCharSet(
memset(cset, 0, sizeof(CharSet));
- offset = Tcl_UtfToUniChar(format, &ch);
+ offset = TclUtfToUniChar(format, &ch);
if (ch == '^') {
cset->exclude = 1;
format += offset;
- offset = Tcl_UtfToUniChar(format, &ch);
+ offset = TclUtfToUniChar(format, &ch);
}
end = format + offset;
@@ -91,14 +91,14 @@ BuildCharSet(
*/
if (ch == ']') {
- end += Tcl_UtfToUniChar(end, &ch);
+ end += TclUtfToUniChar(end, &ch);
}
nranges = 0;
while (ch != ']') {
if (ch == '-') {
nranges++;
}
- end += Tcl_UtfToUniChar(end, &ch);
+ end += TclUtfToUniChar(end, &ch);
}
cset->chars = ckalloc(sizeof(Tcl_UniChar) * (end - format - 1));
@@ -113,11 +113,11 @@ BuildCharSet(
*/
cset->nchars = cset->nranges = 0;
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
start = ch;
if (ch == ']' || ch == '-') {
cset->chars[cset->nchars++] = ch;
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
}
while (ch != ']') {
if (*format == '-') {
@@ -138,7 +138,7 @@ BuildCharSet(
cset->chars[cset->nchars++] = start;
cset->chars[cset->nchars++] = ch;
} else {
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
/*
* Check to see if the range is in reverse order.
@@ -156,7 +156,7 @@ BuildCharSet(
} else {
cset->chars[cset->nchars++] = ch;
}
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
}
return format;
}
@@ -279,20 +279,20 @@ ValidateFormat(
xpgSize = objIndex = gotXpg = gotSequential = 0;
while (*format != '\0') {
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
flags = 0;
if (ch != '%') {
continue;
}
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
if (ch == '%') {
continue;
}
if (ch == '*') {
flags |= SCAN_SUPPRESS;
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
goto xpgCheckDone;
}
@@ -308,7 +308,7 @@ ValidateFormat(
goto notXpg;
}
format = end+1;
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
gotXpg = 1;
if (gotSequential) {
goto mixedXPG;
@@ -347,7 +347,7 @@ ValidateFormat(
if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
value = strtoul(format-1, (char **) &format, 10); /* INTL: "C" locale. */
flags |= SCAN_WIDTH;
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
}
/*
@@ -359,13 +359,13 @@ ValidateFormat(
if (*format == 'l') {
flags |= SCAN_BIG;
format += 1;
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
break;
}
case 'L':
flags |= SCAN_LONGER;
case 'h':
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
}
if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
@@ -434,24 +434,24 @@ ValidateFormat(
if (*format == '\0') {
goto badSet;
}
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
if (ch == '^') {
if (*format == '\0') {
goto badSet;
}
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
}
if (ch == ']') {
if (*format == '\0') {
goto badSet;
}
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
}
while (ch != ']') {
if (*format == '\0') {
goto badSet;
}
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
}
break;
badSet:
@@ -630,7 +630,7 @@ Tcl_ScanObjCmd(
nconversions = 0;
while (*format != '\0') {
int parseFlag = TCL_PARSE_NO_WHITESPACE;
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
flags = 0;
@@ -639,13 +639,13 @@ Tcl_ScanObjCmd(
*/
if (Tcl_UniCharIsSpace(ch)) {
- offset = Tcl_UtfToUniChar(string, &sch);
+ offset = TclUtfToUniChar(string, &sch);
while (Tcl_UniCharIsSpace(sch)) {
if (*string == '\0') {
goto done;
}
string += offset;
- offset = Tcl_UtfToUniChar(string, &sch);
+ offset = TclUtfToUniChar(string, &sch);
}
continue;
}
@@ -656,14 +656,14 @@ Tcl_ScanObjCmd(
underflow = 1;
goto done;
}
- string += Tcl_UtfToUniChar(string, &sch);
+ string += TclUtfToUniChar(string, &sch);
if (ch != sch) {
goto done;
}
continue;
}
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
if (ch == '%') {
goto literal;
}
@@ -675,13 +675,13 @@ Tcl_ScanObjCmd(
if (ch == '*') {
flags |= SCAN_SUPPRESS;
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
} else if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
char *formatEnd;
value = strtoul(format-1, &formatEnd, 10);/* INTL: "C" locale. */
if (*formatEnd == '$') {
format = formatEnd+1;
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
objIndex = (int) value - 1;
}
}
@@ -692,7 +692,7 @@ Tcl_ScanObjCmd(
if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
width = (int) strtoul(format-1, (char **) &format, 10);/* INTL: "C" locale. */
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
} else {
width = 0;
}
@@ -706,7 +706,7 @@ Tcl_ScanObjCmd(
if (*format == 'l') {
flags |= SCAN_BIG;
format += 1;
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
break;
}
case 'L':
@@ -715,7 +715,7 @@ Tcl_ScanObjCmd(
* Fall through so we skip to the next character.
*/
case 'h':
- format += Tcl_UtfToUniChar(format, &ch);
+ format += TclUtfToUniChar(format, &ch);
}
/*
@@ -799,7 +799,7 @@ Tcl_ScanObjCmd(
if (!(flags & SCAN_NOSKIP)) {
while (*string != '\0') {
- offset = Tcl_UtfToUniChar(string, &sch);
+ offset = TclUtfToUniChar(string, &sch);
if (!Tcl_UniCharIsSpace(sch)) {
break;
}
@@ -826,7 +826,7 @@ Tcl_ScanObjCmd(
}
end = string;
while (*end != '\0') {
- offset = Tcl_UtfToUniChar(end, &sch);
+ offset = TclUtfToUniChar(end, &sch);
if (Tcl_UniCharIsSpace(sch)) {
break;
}
@@ -854,7 +854,7 @@ Tcl_ScanObjCmd(
format = BuildCharSet(&cset, format);
while (*end != '\0') {
- offset = Tcl_UtfToUniChar(end, &sch);
+ offset = TclUtfToUniChar(end, &sch);
if (!CharInSet(&cset, (int)sch)) {
break;
}
@@ -885,7 +885,7 @@ Tcl_ScanObjCmd(
* Scan a single Unicode character.
*/
- offset = Tcl_UtfToUniChar(string, &sch);
+ offset = TclUtfToUniChar(string, &sch);
i = (int)sch;
if (!offset) {
offset = Tcl_UtfToUniChar(string, &sch);
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 48805bd..983096b 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -1689,7 +1689,7 @@ Tcl_AppendFormatToObj(
#endif
int newXpg, numChars, allocSegment = 0, segmentLimit, segmentNumBytes;
Tcl_Obj *segment;
- int step = Tcl_UtfToUniChar(format, &ch);
+ int step = TclUtfToUniChar(format, &ch);
format += step;
if (ch != '%') {
@@ -1713,7 +1713,7 @@ Tcl_AppendFormatToObj(
* Step 0. Handle special case of escaped format marker (i.e., %%).
*/
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
if (ch == '%') {
span = format;
numBytes = step;
@@ -1733,7 +1733,7 @@ Tcl_AppendFormatToObj(
newXpg = 1;
objIndex = position - 1;
format = end + 1;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
}
if (newXpg) {
@@ -1784,7 +1784,7 @@ Tcl_AppendFormatToObj(
}
if (sawFlag) {
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
} while (sawFlag);
@@ -1796,7 +1796,7 @@ Tcl_AppendFormatToObj(
if (isdigit(UCHAR(ch))) {
width = strtoul(format, &end, 10);
format = end;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
} else if (ch == '*') {
if (objIndex >= objc - 1) {
msg = badIndex[gotXpg];
@@ -1812,7 +1812,7 @@ Tcl_AppendFormatToObj(
}
objIndex++;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
if (width > limit) {
msg = overflow;
@@ -1828,12 +1828,12 @@ Tcl_AppendFormatToObj(
if (ch == '.') {
gotPrecision = 1;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
if (isdigit(UCHAR(ch))) {
precision = strtoul(format, &end, 10);
format = end;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
} else if (ch == '*') {
if (objIndex >= objc - 1) {
msg = badIndex[gotXpg];
@@ -1854,7 +1854,7 @@ Tcl_AppendFormatToObj(
}
objIndex++;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
}
/*
@@ -1864,14 +1864,14 @@ Tcl_AppendFormatToObj(
if (ch == 'h') {
useShort = 1;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
} else if (ch == 'l') {
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
if (ch == 'l') {
useBig = 1;
format += step;
- step = Tcl_UtfToUniChar(format, &ch);
+ step = TclUtfToUniChar(format, &ch);
#ifndef TCL_WIDE_INT_IS_LONG
} else {
useWide = 1;
@@ -3419,7 +3419,7 @@ TclStringObjReverse(
* It's part of the contract for objPtr->bytes values.
* Thus, we can skip calling Tcl_UtfCharComplete() here.
*/
- int bytesInChar = Tcl_UtfToUniChar(from, &ch);
+ int bytesInChar = TclUtfToUniChar(from, &ch);
ReverseBytes((unsigned char *)to, (unsigned char *)from,
bytesInChar);
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 5892e1f..a6716c1 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -302,7 +302,9 @@ Tcl_UtfToUniChar(
*/
*chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (src[1] & 0x3F));
- return 2;
+ if ((*chPtr == 0) || (*chPtr <= 0x3ff) && (*chPtr > 0x7f)) {
+ return 2;
+ }
}
/*
@@ -317,7 +319,9 @@ Tcl_UtfToUniChar(
*chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12)
| ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
- return 3;
+ if ((*chPtr <= 0xffff) && (*chPtr > 0x3ff)) {
+ return 3;
+ }
}
/*
@@ -331,10 +335,30 @@ Tcl_UtfToUniChar(
/*
* Four-byte-character lead byte followed by three trail bytes.
*/
-
+#if TCL_UTF_MAX == 4
+ Tcl_UniChar surrogate;
+
+ byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12)
+ | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000;
+ surrogate = 0xD800 + (byte >> 10);
+ if (byte & 0x100000) {
+ /* out of range, < 0x10000 or > 0x10ffff */
+ } else if (*chPtr != surrogate) {
+ /* produce high surrogate, but don't advance source pointer */
+ *chPtr = surrogate;
+ return 0;
+ } else {
+ /* produce low surrogate, and advance source pointer */
+ *chPtr = (Tcl_UniChar) (0xDC00 | (byte & 0x3FF));
+ return 4;
+ }
+#else
*chPtr = (Tcl_UniChar) (((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12)
| ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
- return 4;
+ if ((*chPtr <= 0x10ffff) && (*chPtr > 0xffff)) {
+ return 4;
+ }
+#endif
}
/*
diff --git a/win/tclWinPipe.c b/win/tclWinPipe.c
index 5246d53..fe0ed2d 100644
--- a/win/tclWinPipe.c
+++ b/win/tclWinPipe.c
@@ -1482,7 +1482,7 @@ BuildCommandLine(
Tcl_UniChar ch;
for (start = arg; *start != '\0'; start += count) {
- count = Tcl_UtfToUniChar(start, &ch);
+ count = TclUtfToUniChar(start, &ch);
if (Tcl_UniCharIsSpace(ch)) { /* INTL: ISO space. */
quote = 1;
break;