diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2017-08-18 22:06:54 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2017-08-18 22:06:54 (GMT) |
commit | 2b886b868c8231e01906b7435b9b71f301f91e90 (patch) | |
tree | 56fbdeda6cb9713267ccfc74c96ee601243f1d68 /generic | |
parent | a61edf450cec98aaa1d893e59c465f4eb181301c (diff) | |
parent | b916fb2e6064a68a3848169f1d6263396112d330 (diff) | |
download | tcl-2b886b868c8231e01906b7435b9b71f301f91e90.zip tcl-2b886b868c8231e01906b7435b9b71f301f91e90.tar.gz tcl-2b886b868c8231e01906b7435b9b71f301f91e90.tar.bz2 |
merge core-8-6-branch
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclBinary.c | 6 | ||||
-rw-r--r-- | generic/tclCmdIL.c | 2 | ||||
-rw-r--r-- | generic/tclCmdMZ.c | 10 | ||||
-rw-r--r-- | generic/tclCompExpr.c | 2 | ||||
-rw-r--r-- | generic/tclEncoding.c | 20 | ||||
-rw-r--r-- | generic/tclLoad.c | 2 | ||||
-rw-r--r-- | generic/tclParse.c | 2 | ||||
-rw-r--r-- | generic/tclScan.c | 2 | ||||
-rw-r--r-- | generic/tclStringObj.c | 4 | ||||
-rw-r--r-- | generic/tclUtf.c | 78 | ||||
-rw-r--r-- | generic/tclUtil.c | 2 |
11 files changed, 79 insertions, 51 deletions
diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 5772bc6..a693894 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -536,7 +536,7 @@ SetByteArrayFromAny( const char *src, *srcEnd; unsigned char *dst; ByteArray *byteArrayPtr; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; if (objPtr->typePtr == &properByteArrayType) { return TCL_OK; @@ -1300,7 +1300,7 @@ BinaryFormatCmd( badField: { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; char buf[TCL_UTF_MAX + 1]; TclUtfToUniChar(errorString, &ch); @@ -1670,7 +1670,7 @@ BinaryScanCmd( badField: { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; char buf[TCL_UTF_MAX + 1]; TclUtfToUniChar(errorString, &ch); diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index 4a11c3b..47076ec 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -4370,7 +4370,7 @@ static int DictionaryCompare( const char *left, const char *right) /* The strings to compare. */ { - Tcl_UniChar uniLeft, uniRight, uniLeftLower, uniRightLower; + Tcl_UniChar uniLeft = 0, uniRight = 0, uniLeftLower, uniRightLower; int diff, zeros; int secondaryDiff = 0; diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 83382a7..2195aa1 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -1173,7 +1173,7 @@ Tcl_SplitObjCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; int len; const char *splitChars; const char *stringPtr; @@ -1258,7 +1258,7 @@ Tcl_SplitObjCmd( } else { const char *element, *p, *splitEnd; int splitLen; - Tcl_UniChar splitChar; + Tcl_UniChar splitChar = 0; /* * Normal case: split on any of a given set of characters. Discard @@ -1482,7 +1482,7 @@ StringIsCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { const char *string1, *end, *stop; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; int (*chcomp)(int) = NULL; /* The UniChar comparison function. */ int i, failat = 0, result = 1, strict = 0, index, length1, length2; Tcl_Obj *objPtr, *failVarObj = NULL; @@ -2410,7 +2410,7 @@ StringStartCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; const char *p, *string; int cur, index, length, numChars; @@ -2471,7 +2471,7 @@ StringEndCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; const char *p, *end, *string; int cur, index, length, numChars; diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index 24c8896..b854b0f 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -1885,7 +1885,7 @@ ParseLexeme( { const char *end; int scanned; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; Tcl_Obj *literal = NULL; unsigned char byte; diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 6c21287..7ab33f8 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2293,7 +2293,7 @@ UtfToUtfProc( const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; result = TCL_OK; @@ -2342,8 +2342,8 @@ UtfToUtfProc( } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* * Always check before using TclUtfToUniChar. Not doing can so - * cause it run beyond the endof the buffer! If we happen such an - * incomplete char its byts are made to represent themselves. + * cause it run beyond the end of the buffer! If we happen such an + * incomplete char its bytes are made to represent themselves. */ ch = (unsigned char) *src; @@ -2407,7 +2407,7 @@ UnicodeToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -2497,7 +2497,7 @@ UtfToUnicodeProc( { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; srcStart = src; srcEnd = src + srcLen; @@ -2607,7 +2607,7 @@ TableToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart, *prefixBytes; int result, byte, numChars, charLimit = INT_MAX; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; const unsigned short *const *toUnicode; const unsigned short *pageZero; TableEncodingData *dataPtr = clientData; @@ -2719,7 +2719,7 @@ TableFromUtfProc( { const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd, *prefixBytes; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; int result, len, word, numChars; TableEncodingData *dataPtr = clientData; const unsigned short *const *fromUnicode; @@ -2853,7 +2853,7 @@ Iso88591ToUtfProc( result = TCL_OK; for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; @@ -2939,7 +2939,7 @@ Iso88591FromUtfProc( dstEnd = dst + dstLen - 1; for (numChars = 0; src < srcEnd; numChars++) { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; int len; if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { @@ -3326,7 +3326,7 @@ EscapeFromUtfProc( for (numChars = 0; src < srcEnd; numChars++) { unsigned len; int word; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { /* diff --git a/generic/tclLoad.c b/generic/tclLoad.c index 66637da..e0bb5ef 100644 --- a/generic/tclLoad.c +++ b/generic/tclLoad.c @@ -130,7 +130,7 @@ Tcl_LoadObjCmd( Tcl_PackageInitProc *initProc; const char *p, *fullFileName, *packageName; Tcl_LoadHandle loadHandle; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; unsigned len; int index, flags = 0; Tcl_Obj *const *savedobjv = objv; diff --git a/generic/tclParse.c b/generic/tclParse.c index 5d640dc..a2227f7 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -829,7 +829,7 @@ TclParseBackslash( * written there. */ { register const char *p = src+1; - Tcl_UniChar unichar; + Tcl_UniChar unichar = 0; int result; int count; char buf[TCL_UTF_MAX]; diff --git a/generic/tclScan.c b/generic/tclScan.c index 17069eb..7a6a8a2 100644 --- a/generic/tclScan.c +++ b/generic/tclScan.c @@ -257,7 +257,7 @@ ValidateFormat( { int gotXpg, gotSequential, value, i, flags; char *end; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; int objIndex, xpgSize, nspace = numVars; int *nassign = TclStackAlloc(interp, nspace * sizeof(int)); char buf[TCL_UTF_MAX+1]; diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 2305220..209f982 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -1677,7 +1677,7 @@ Tcl_AppendFormatToObj( #endif int newXpg, numChars, allocSegment = 0, segmentLimit, segmentNumBytes; Tcl_Obj *segment; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; int step = TclUtfToUniChar(format, &ch); format += step; @@ -3428,7 +3428,7 @@ TclStringObjReverse( Tcl_Obj *objPtr) { String *stringPtr; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; if (TclIsPureByteArray(objPtr)) { int numBytes; diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 161a4bd..25cc2d1 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -253,6 +253,15 @@ Tcl_UniCharToUtfDString( * Tcl_UtfCharComplete() before calling this routine to ensure that * enough bytes remain in the string. * + * If TCL_UTF_MAX == 4, special handling of Surrogate pairs is done: + * For any UTF-8 string containing a character outside of the BMP, the + * first call to this function will fill *chPtr with the high surrogate + * and generate a return value of 0. Calling Tcl_UtfToUniChar again + * will produce the low surrogate and a return value of 4. Because *chPtr + * is used to remember whether the high surrogate is already produced, it + * is recommended to initialize the variable it points to as 0 before + * the first call to Tcl_UtfToUniChar is done. + * * Results: * *chPtr is filled with the Tcl_UniChar, and the return value is the * number of bytes from the UTF-8 string that were consumed. @@ -272,7 +281,7 @@ Tcl_UtfToUniChar( register int byte; /* - * Unroll 1 to 3 byte UTF-8 sequences, use loop to handle longer ones. + * Unroll 1 to 3 (or 4) byte UTF-8 sequences. */ byte = *((unsigned char *) src); @@ -325,12 +334,30 @@ Tcl_UtfToUniChar( /* * Four-byte-character lead byte followed by three trail bytes. */ - +#if TCL_UTF_MAX == 4 + Tcl_UniChar surrogate; + + byte = (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) + | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)) - 0x10000; + surrogate = (Tcl_UniChar) (0xD800 + (byte >> 10)); + if (byte & 0x100000) { + /* out of range, < 0x10000 or > 0x10ffff */ + } else if (*chPtr != surrogate) { + /* produce high surrogate, but don't advance source pointer */ + *chPtr = surrogate; + return 0; + } else { + /* produce low surrogate, and advance source pointer */ + *chPtr = (Tcl_UniChar) (0xDC00 | (byte & 0x3FF)); + return 4; + } +#else *chPtr = (Tcl_UniChar) (((byte & 0x07) << 18) | ((src[1] & 0x3F) << 12) | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); if ((unsigned)(*chPtr - 0x10000) <= 0xFFFFF) { return 4; } +#endif } /* @@ -371,7 +398,7 @@ Tcl_UtfToUniCharDString( * appended to this previously initialized * DString. */ { - Tcl_UniChar *w, *wString; + Tcl_UniChar ch, *w, *wString; const char *p, *end; int oldLength; @@ -393,8 +420,8 @@ Tcl_UtfToUniCharDString( w = wString; end = src + length; for (p = src; p < end; ) { - p += TclUtfToUniChar(p, w); - w++; + p += TclUtfToUniChar(p, &ch); + *w++ = ch; } *w = '\0'; Tcl_DStringSetLength(dsPtr, @@ -428,10 +455,7 @@ Tcl_UtfCharComplete( * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { - int ch; - - ch = *((unsigned char *) src); - return length >= totalBytes[ch]; + return length >= totalBytes[(unsigned char)*src]; } /* @@ -458,8 +482,8 @@ Tcl_NumUtfChars( int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { - Tcl_UniChar ch; - register int i; + Tcl_UniChar ch = 0; + register int i = 0; /* * The separate implementations are faster. @@ -468,7 +492,6 @@ Tcl_NumUtfChars( * single-byte char case specially. */ - i = 0; if (length < 0) { while (*src != '\0') { src += TclUtfToUniChar(src, &ch); @@ -519,7 +542,7 @@ Tcl_UtfFindFirst( int ch) /* The Tcl_UniChar to search for. */ { int len; - Tcl_UniChar find; + Tcl_UniChar find = 0; while (1) { len = TclUtfToUniChar(src, &find); @@ -558,7 +581,7 @@ Tcl_UtfFindLast( int ch) /* The Tcl_UniChar to search for. */ { int len; - Tcl_UniChar find; + Tcl_UniChar find = 0; const char *last; last = NULL; @@ -598,9 +621,15 @@ const char * Tcl_UtfNext( const char *src) /* The current location in the string. */ { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; + int len = TclUtfToUniChar(src, &ch); - return src + TclUtfToUniChar(src, &ch); +#if TCL_UTF_MAX == 4 + if (len == 0) { + len = TclUtfToUniChar(src, &ch); + } +#endif + return src + len; } /* @@ -633,8 +662,7 @@ Tcl_UtfPrev( const char *look; int i, byte; - src--; - look = src; + look = --src; for (i = 0; i < TCL_UTF_MAX; i++) { if (look < start) { if (src < start) { @@ -707,7 +735,7 @@ Tcl_UtfAtIndex( register const char *src, /* The UTF-8 string. */ register int index) /* The position of the desired character. */ { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; while (index > 0) { index--; @@ -791,7 +819,7 @@ int Tcl_UtfToUpper( char *str) /* String to convert in place. */ { - Tcl_UniChar ch, upChar; + Tcl_UniChar ch = 0, upChar; char *src, *dst; int bytes; @@ -844,7 +872,7 @@ int Tcl_UtfToLower( char *str) /* String to convert in place. */ { - Tcl_UniChar ch, lowChar; + Tcl_UniChar ch = 0, lowChar; char *src, *dst; int bytes; @@ -898,7 +926,7 @@ int Tcl_UtfToTitle( char *str) /* String to convert in place. */ { - Tcl_UniChar ch, titleChar, lowChar; + Tcl_UniChar ch = 0, titleChar, lowChar; char *src, *dst; int bytes; @@ -1007,7 +1035,7 @@ Tcl_UtfNcmp( const char *ct, /* UTF string cs is compared to. */ unsigned long numChars) /* Number of UTF chars to compare. */ { - Tcl_UniChar ch1, ch2; + Tcl_UniChar ch1 = 0, ch2 = 0; /* * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the @@ -1055,7 +1083,7 @@ Tcl_UtfNcasecmp( const char *ct, /* UTF string cs is compared to. */ unsigned long numChars) /* Number of UTF chars to compare. */ { - Tcl_UniChar ch1, ch2; + Tcl_UniChar ch1 = 0, ch2 = 0; while (numChars-- > 0) { /* * n must be interpreted as chars, not bytes. @@ -1684,7 +1712,7 @@ Tcl_UniCharCaseMatch( * characters. */ int nocase) /* 0 for case sensitive, 1 for insensitive */ { - Tcl_UniChar ch1, p; + Tcl_UniChar ch1 = 0, p; while (1) { p = *uniPattern; diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 91cc3b4..608cd15 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1646,7 +1646,7 @@ Tcl_Backslash( * src, unless NULL. */ { char buf[TCL_UTF_MAX]; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; Tcl_UtfBackslash(src, readPtr, buf); TclUtfToUniChar(buf, &ch); |