diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2017-05-29 15:38:04 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2017-05-29 15:38:04 (GMT) |
commit | 5ff6b263c67e2d7dbf71c0c1b983170cb5d28eab (patch) | |
tree | 558eb66988703f183b84df65ce26e773d0fe71e3 | |
parent | d4b7f993738502e5111a8c56fb5ba5e7db33785e (diff) | |
parent | d97d4ac534c4858f49230be22cdb9cb429b939fa (diff) | |
download | tcl-5ff6b263c67e2d7dbf71c0c1b983170cb5d28eab.zip tcl-5ff6b263c67e2d7dbf71c0c1b983170cb5d28eab.tar.gz tcl-5ff6b263c67e2d7dbf71c0c1b983170cb5d28eab.tar.bz2 |
merge trunk
-rw-r--r-- | generic/tclBinary.c | 6 | ||||
-rw-r--r-- | generic/tclCmdIL.c | 4 | ||||
-rw-r--r-- | generic/tclCompExpr.c | 4 | ||||
-rw-r--r-- | generic/tclEncoding.c | 4 | ||||
-rw-r--r-- | generic/tclLoad.c | 2 | ||||
-rw-r--r-- | generic/tclParse.c | 4 | ||||
-rw-r--r-- | generic/tclPkg.c | 1 | ||||
-rw-r--r-- | generic/tclScan.c | 66 | ||||
-rw-r--r-- | generic/tclStringObj.c | 26 | ||||
-rw-r--r-- | generic/tclUtf.c | 32 | ||||
-rw-r--r-- | win/tclWinPipe.c | 2 |
11 files changed, 88 insertions, 63 deletions
diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 1a0e8c8..a693894 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -551,7 +551,7 @@ SetByteArrayFromAny( byteArrayPtr = ckalloc(BYTEARRAY_SIZE(length)); for (dst = byteArrayPtr->bytes; src < srcEnd; ) { - src += Tcl_UtfToUniChar(src, &ch); + src += TclUtfToUniChar(src, &ch); improper = improper || (ch > 255); *dst++ = UCHAR(ch); } @@ -1303,7 +1303,7 @@ BinaryFormatCmd( Tcl_UniChar ch = 0; char buf[TCL_UTF_MAX + 1]; - Tcl_UtfToUniChar(errorString, &ch); + TclUtfToUniChar(errorString, &ch); buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; Tcl_SetObjResult(interp, Tcl_ObjPrintf( "bad field specifier \"%s\"", buf)); @@ -1673,7 +1673,7 @@ BinaryScanCmd( Tcl_UniChar ch = 0; char buf[TCL_UTF_MAX + 1]; - Tcl_UtfToUniChar(errorString, &ch); + TclUtfToUniChar(errorString, &ch); buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; Tcl_SetObjResult(interp, Tcl_ObjPrintf( "bad field specifier \"%s\"", buf)); diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index a7a5f43..e2093bb 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -4440,8 +4440,8 @@ DictionaryCompare( */ if ((*left != '\0') && (*right != '\0')) { - left += Tcl_UtfToUniChar(left, &uniLeft); - right += Tcl_UtfToUniChar(right, &uniRight); + left += TclUtfToUniChar(left, &uniLeft); + right += TclUtfToUniChar(right, &uniRight); /* * Convert both chars to lower for the comparison, because diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index 83bb883..24c8896 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -2064,13 +2064,13 @@ ParseLexeme( if (!TclIsBareword(*start) || *start == '_') { if (Tcl_UtfCharComplete(start, numBytes)) { - scanned = Tcl_UtfToUniChar(start, &ch); + scanned = TclUtfToUniChar(start, &ch); } else { char utfBytes[TCL_UTF_MAX]; memcpy(utfBytes, start, (size_t) numBytes); utfBytes[numBytes] = '\0'; - scanned = Tcl_UtfToUniChar(utfBytes, &ch); + scanned = TclUtfToUniChar(utfBytes, &ch); } *lexemePtr = INVALID; Tcl_DecrRefCount(literal); diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 1f8cd6e..35cbfc9 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2344,7 +2344,7 @@ UtfToUtfProc( src += 2; } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* - * Always check before using Tcl_UtfToUniChar. Not doing can so + * Always check before using TclUtfToUniChar. Not doing can so * cause it run beyond the endof the buffer! If we happen such an * incomplete char its byts are made to represent themselves. */ @@ -2353,7 +2353,7 @@ UtfToUtfProc( src += 1; dst += Tcl_UniCharToUtf(*chPtr, dst); } else { - int n = Tcl_UtfToUniChar(src, chPtr); + int n = TclUtfToUniChar(src, chPtr); src += n; if (!n) numChars--; dst += Tcl_UniCharToUtf(*chPtr, dst); diff --git a/generic/tclLoad.c b/generic/tclLoad.c index bcda420..66637da 100644 --- a/generic/tclLoad.c +++ b/generic/tclLoad.c @@ -336,7 +336,7 @@ Tcl_LoadObjCmd( } #endif /* __CYGWIN__ */ for (p = pkgGuess; *p != 0; p += offset) { - offset = Tcl_UtfToUniChar(p, &ch); + offset = TclUtfToUniChar(p, &ch); if ((ch > 0x100) || !(isalpha(UCHAR(ch)) /* INTL: ISO only */ || (UCHAR(ch) == '_'))) { diff --git a/generic/tclParse.c b/generic/tclParse.c index 25f9b3d..73de7e8 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -963,13 +963,13 @@ TclParseBackslash( */ if (Tcl_UtfCharComplete(p, numBytes - 1)) { - count = Tcl_UtfToUniChar(p, &unichar) + 1; /* +1 for '\' */ + count = TclUtfToUniChar(p, &unichar) + 1; /* +1 for '\' */ } else { char utfBytes[TCL_UTF_MAX]; memcpy(utfBytes, p, (size_t) (numBytes - 1)); utfBytes[numBytes - 1] = '\0'; - count = Tcl_UtfToUniChar(utfBytes, &unichar) + 1; + count = TclUtfToUniChar(utfBytes, &unichar) + 1; } result = unichar; break; diff --git a/generic/tclPkg.c b/generic/tclPkg.c index 9ad3cb7..3b0554a 100644 --- a/generic/tclPkg.c +++ b/generic/tclPkg.c @@ -224,6 +224,7 @@ static void PkgFilesCleanupProc(ClientData clientData, entry = Tcl_NextHashEntry(&search); } Tcl_DeleteHashTable(&pkgFiles->table); + ckfree(pkgFiles); return; } diff --git a/generic/tclScan.c b/generic/tclScan.c index 96aeefa..59384e1 100644 --- a/generic/tclScan.c +++ b/generic/tclScan.c @@ -78,11 +78,11 @@ BuildCharSet( memset(cset, 0, sizeof(CharSet)); - offset = Tcl_UtfToUniChar(format, &ch); + offset = TclUtfToUniChar(format, &ch); if (ch == '^') { cset->exclude = 1; format += offset; - offset = Tcl_UtfToUniChar(format, &ch); + offset = TclUtfToUniChar(format, &ch); } end = format + offset; @@ -91,14 +91,14 @@ BuildCharSet( */ if (ch == ']') { - end += Tcl_UtfToUniChar(end, &ch); + end += TclUtfToUniChar(end, &ch); } nranges = 0; while (ch != ']') { if (ch == '-') { nranges++; } - end += Tcl_UtfToUniChar(end, &ch); + end += TclUtfToUniChar(end, &ch); } cset->chars = ckalloc(sizeof(Tcl_UniChar) * (end - format - 1)); @@ -113,11 +113,11 @@ BuildCharSet( */ cset->nchars = cset->nranges = 0; - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); start = ch; if (ch == ']' || ch == '-') { cset->chars[cset->nchars++] = ch; - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); } while (ch != ']') { if (*format == '-') { @@ -138,7 +138,7 @@ BuildCharSet( cset->chars[cset->nchars++] = start; cset->chars[cset->nchars++] = ch; } else { - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); /* * Check to see if the range is in reverse order. @@ -156,7 +156,7 @@ BuildCharSet( } else { cset->chars[cset->nchars++] = ch; } - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); } return format; } @@ -279,20 +279,20 @@ ValidateFormat( xpgSize = objIndex = gotXpg = gotSequential = 0; while (*format != '\0') { - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); flags = 0; if (ch != '%') { continue; } - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); if (ch == '%') { continue; } if (ch == '*') { flags |= SCAN_SUPPRESS; - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); goto xpgCheckDone; } @@ -308,7 +308,7 @@ ValidateFormat( goto notXpg; } format = end+1; - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); gotXpg = 1; if (gotSequential) { goto mixedXPG; @@ -347,7 +347,7 @@ ValidateFormat( if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ value = strtoul(format-1, (char **) &format, 10); /* INTL: "C" locale. */ flags |= SCAN_WIDTH; - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); } /* @@ -359,13 +359,13 @@ ValidateFormat( if (*format == 'l') { flags |= SCAN_BIG; format += 1; - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); break; } case 'L': flags |= SCAN_LONGER; case 'h': - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); } if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) { @@ -434,24 +434,24 @@ ValidateFormat( if (*format == '\0') { goto badSet; } - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); if (ch == '^') { if (*format == '\0') { goto badSet; } - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); } if (ch == ']') { if (*format == '\0') { goto badSet; } - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); } while (ch != ']') { if (*format == '\0') { goto badSet; } - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); } break; badSet: @@ -630,7 +630,7 @@ Tcl_ScanObjCmd( nconversions = 0; while (*format != '\0') { int parseFlag = TCL_PARSE_NO_WHITESPACE; - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); flags = 0; @@ -639,13 +639,13 @@ Tcl_ScanObjCmd( */ if (Tcl_UniCharIsSpace(ch)) { - offset = Tcl_UtfToUniChar(string, &sch); + offset = TclUtfToUniChar(string, &sch); while (Tcl_UniCharIsSpace(sch)) { if (*string == '\0') { goto done; } string += offset; - offset = Tcl_UtfToUniChar(string, &sch); + offset = TclUtfToUniChar(string, &sch); } continue; } @@ -656,14 +656,14 @@ Tcl_ScanObjCmd( underflow = 1; goto done; } - string += Tcl_UtfToUniChar(string, &sch); + string += TclUtfToUniChar(string, &sch); if (ch != sch) { goto done; } continue; } - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); if (ch == '%') { goto literal; } @@ -675,13 +675,13 @@ Tcl_ScanObjCmd( if (ch == '*') { flags |= SCAN_SUPPRESS; - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); } else if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ char *formatEnd; value = strtoul(format-1, &formatEnd, 10);/* INTL: "C" locale. */ if (*formatEnd == '$') { format = formatEnd+1; - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); objIndex = (int) value - 1; } } @@ -692,7 +692,7 @@ Tcl_ScanObjCmd( if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ width = (int) strtoul(format-1, (char **) &format, 10);/* INTL: "C" locale. */ - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); } else { width = 0; } @@ -706,7 +706,7 @@ Tcl_ScanObjCmd( if (*format == 'l') { flags |= SCAN_BIG; format += 1; - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); break; } case 'L': @@ -715,7 +715,7 @@ Tcl_ScanObjCmd( * Fall through so we skip to the next character. */ case 'h': - format += Tcl_UtfToUniChar(format, &ch); + format += TclUtfToUniChar(format, &ch); } /* @@ -799,7 +799,7 @@ Tcl_ScanObjCmd( if (!(flags & SCAN_NOSKIP)) { while (*string != '\0') { - offset = Tcl_UtfToUniChar(string, &sch); + offset = TclUtfToUniChar(string, &sch); if (!Tcl_UniCharIsSpace(sch)) { break; } @@ -826,7 +826,7 @@ Tcl_ScanObjCmd( } end = string; while (*end != '\0') { - offset = Tcl_UtfToUniChar(end, &sch); + offset = TclUtfToUniChar(end, &sch); if (Tcl_UniCharIsSpace(sch)) { break; } @@ -854,7 +854,7 @@ Tcl_ScanObjCmd( format = BuildCharSet(&cset, format); while (*end != '\0') { - offset = Tcl_UtfToUniChar(end, &sch); + offset = TclUtfToUniChar(end, &sch); if (!CharInSet(&cset, (int)sch)) { break; } @@ -885,7 +885,7 @@ Tcl_ScanObjCmd( * Scan a single Unicode character. */ - offset = Tcl_UtfToUniChar(string, &sch); + offset = TclUtfToUniChar(string, &sch); i = (int)sch; if (!offset) { offset = Tcl_UtfToUniChar(string, &sch); diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 48805bd..983096b 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -1689,7 +1689,7 @@ Tcl_AppendFormatToObj( #endif int newXpg, numChars, allocSegment = 0, segmentLimit, segmentNumBytes; Tcl_Obj *segment; - int step = Tcl_UtfToUniChar(format, &ch); + int step = TclUtfToUniChar(format, &ch); format += step; if (ch != '%') { @@ -1713,7 +1713,7 @@ Tcl_AppendFormatToObj( * Step 0. Handle special case of escaped format marker (i.e., %%). */ - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); if (ch == '%') { span = format; numBytes = step; @@ -1733,7 +1733,7 @@ Tcl_AppendFormatToObj( newXpg = 1; objIndex = position - 1; format = end + 1; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } } if (newXpg) { @@ -1784,7 +1784,7 @@ Tcl_AppendFormatToObj( } if (sawFlag) { format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } } while (sawFlag); @@ -1796,7 +1796,7 @@ Tcl_AppendFormatToObj( if (isdigit(UCHAR(ch))) { width = strtoul(format, &end, 10); format = end; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } else if (ch == '*') { if (objIndex >= objc - 1) { msg = badIndex[gotXpg]; @@ -1812,7 +1812,7 @@ Tcl_AppendFormatToObj( } objIndex++; format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } if (width > limit) { msg = overflow; @@ -1828,12 +1828,12 @@ Tcl_AppendFormatToObj( if (ch == '.') { gotPrecision = 1; format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } if (isdigit(UCHAR(ch))) { precision = strtoul(format, &end, 10); format = end; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } else if (ch == '*') { if (objIndex >= objc - 1) { msg = badIndex[gotXpg]; @@ -1854,7 +1854,7 @@ Tcl_AppendFormatToObj( } objIndex++; format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } /* @@ -1864,14 +1864,14 @@ Tcl_AppendFormatToObj( if (ch == 'h') { useShort = 1; format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); } else if (ch == 'l') { format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); if (ch == 'l') { useBig = 1; format += step; - step = Tcl_UtfToUniChar(format, &ch); + step = TclUtfToUniChar(format, &ch); #ifndef TCL_WIDE_INT_IS_LONG } else { useWide = 1; @@ -3419,7 +3419,7 @@ TclStringObjReverse( * It's part of the contract for objPtr->bytes values. * Thus, we can skip calling Tcl_UtfCharComplete() here. */ - int bytesInChar = Tcl_UtfToUniChar(from, &ch); + int bytesInChar = TclUtfToUniChar(from, &ch); ReverseBytes((unsigned char *)to, (unsigned char *)from, bytesInChar); diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 5892e1f..a6716c1 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -302,7 +302,9 @@ Tcl_UtfToUniChar( */ *chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (src[1] & 0x3F)); - return 2; + if ((*chPtr == 0) || (*chPtr <= 0x3ff) && (*chPtr > 0x7f)) { + return 2; + } } /* @@ -317,7 +319,9 @@ Tcl_UtfToUniChar( *chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12) | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F)); - return 3; + if ((*chPtr <= 0xffff) && (*chPtr > 0x3ff)) { + return 3; + } } /* @@ -331,10 +335,30 @@ Tcl_UtfToUniChar( /* * Four-byte-character lead byte followed by three trail bytes. */ - +#if TCL_UTF_MAX == 4 + Tcl_UniChar surrogate; + + byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) + | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000; + surrogate = 0xD800 + (byte >> 10); + if (byte & 0x100000) { + /* out of range, < 0x10000 or > 0x10ffff */ + } else if (*chPtr != surrogate) { + /* produce high surrogate, but don't advance source pointer */ + *chPtr = surrogate; + return 0; + } else { + /* produce low surrogate, and advance source pointer */ + *chPtr = (Tcl_UniChar) (0xDC00 | (byte & 0x3FF)); + return 4; + } +#else *chPtr = (Tcl_UniChar) (((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); - return 4; + if ((*chPtr <= 0x10ffff) && (*chPtr > 0xffff)) { + return 4; + } +#endif } /* diff --git a/win/tclWinPipe.c b/win/tclWinPipe.c index 5246d53..fe0ed2d 100644 --- a/win/tclWinPipe.c +++ b/win/tclWinPipe.c @@ -1482,7 +1482,7 @@ BuildCommandLine( Tcl_UniChar ch; for (start = arg; *start != '\0'; start += count) { - count = Tcl_UtfToUniChar(start, &ch); + count = TclUtfToUniChar(start, &ch); if (Tcl_UniCharIsSpace(ch)) { /* INTL: ISO space. */ quote = 1; break; |