diff options
-rw-r--r-- | generic/regc_lex.c | 10 | ||||
-rw-r--r-- | generic/tclEncoding.c | 11 | ||||
-rw-r--r-- | generic/tclInt.h | 2 | ||||
-rw-r--r-- | generic/tclParse.c | 14 | ||||
-rw-r--r-- | tests/encoding.test | 15 | ||||
-rw-r--r-- | tests/reg.test | 1 |
6 files changed, 27 insertions, 26 deletions
diff --git a/generic/regc_lex.c b/generic/regc_lex.c index 1b00b71..2208c0e 100644 --- a/generic/regc_lex.c +++ b/generic/regc_lex.c @@ -843,12 +843,18 @@ lexescape( if (ISERR()) { FAILW(REG_EESCAPE); } - if (i > 0xFFFF) { +#if CHRBITS > 16 + if ((unsigned)i > 0x10FFFF) { + i = 0xFFFD; + } +#else + if ((unsigned)i & ~0xFFFF) { /* TODO: output a Surrogate pair */ i = 0xFFFD; } - RETV(PLAIN, (uchr) i); +#endif + RETV(PLAIN, (uchr)i); break; case CHR('v'): RETV(PLAIN, CHR('\v')); diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index e012570..6ab0510 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2557,11 +2557,6 @@ UtfToUnicodeProc( } src += TclUtfToUniChar(src, chPtr); - /* - * Need to handle this in a way that won't cause misalignment by - * casting dst to a Tcl_UniChar. [Bug 1122671] - */ - if (clientData) { #if TCL_UTF_MAX > 4 if (*chPtr <= 0xFFFF) { @@ -2793,11 +2788,7 @@ TableFromUtfProc( len = TclUtfToUniChar(src, &ch); #if TCL_UTF_MAX > 4 - /* - * This prevents a crash condition. More evaluation is required for - * full support of int Tcl_UniChar. [Bug 1004065] - */ - + /* Unicode chars > +U0FFFF cannot be represented in any table encoding */ if (ch & 0xFFFF0000) { word = 0; } else diff --git a/generic/tclInt.h b/generic/tclInt.h index 780ea30..5df9aac 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3065,8 +3065,6 @@ MODULE_SCOPE int TclObjUnsetVar2(Tcl_Interp *interp, Tcl_Obj *part1Ptr, Tcl_Obj *part2Ptr, int flags); MODULE_SCOPE int TclParseBackslash(const char *src, int numBytes, int *readPtr, char *dst); -MODULE_SCOPE int TclParseHex(const char *src, int numBytes, - int *resultPtr); MODULE_SCOPE int TclParseNumber(Tcl_Interp *interp, Tcl_Obj *objPtr, const char *expected, const char *bytes, int numBytes, const char **endPtrPtr, int flags); diff --git a/generic/tclParse.c b/generic/tclParse.c index 4d7e6b8..f834480 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -167,6 +167,8 @@ static int ParseTokens(const char *src, int numBytes, int mask, int flags, Tcl_Parse *parsePtr); static int ParseWhiteSpace(const char *src, int numBytes, int *incompletePtr, char *typePtr); +static int ParseHex(const char *src, int numBytes, + int *resultPtr); /* *---------------------------------------------------------------------- @@ -754,7 +756,7 @@ TclParseAllWhiteSpace( /* *---------------------------------------------------------------------- * - * TclParseHex -- + * ParseHex -- * * Scans a hexadecimal number as a Tcl_UniChar value (e.g., for parsing * \x and \u escape sequences). At most numBytes bytes are scanned. @@ -774,7 +776,7 @@ TclParseAllWhiteSpace( */ int -TclParseHex( +ParseHex( const char *src, /* First character to parse. */ int numBytes, /* Max number of byes to scan */ int *resultPtr) /* Points to storage provided by caller where @@ -899,7 +901,7 @@ TclParseBackslash( result = 0xB; break; case 'x': - count += TclParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result); + count += ParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result); if (count == 2) { /* * No hexadigits -> This is just "x". @@ -914,7 +916,7 @@ TclParseBackslash( } break; case 'u': - count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result); + count += ParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result); if (count == 2) { /* * No hexadigits -> This is just "u". @@ -926,7 +928,7 @@ TclParseBackslash( /* If high surrogate is immediately followed by a low surrogate * escape, combine them into one character. */ int low; - int count2 = TclParseHex(p+7, 4, &low); + int count2 = ParseHex(p+7, 4, &low); if ((count2 == 4) && ((low & 0xDC00) == 0xDC00)) { result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000; count += count2 + 2; @@ -935,7 +937,7 @@ TclParseBackslash( } break; case 'U': - count += TclParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); + count += ParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result); if (count == 2) { /* * No hexadigits -> This is just "U". diff --git a/tests/encoding.test b/tests/encoding.test index 6fef748..a8ce162 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -36,7 +36,7 @@ proc runtests {} { testConstraint testencoding [llength [info commands testencoding]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint teststringbytes [llength [info commands teststringbytes]] -testConstraint fullutf [expr {[format %c 0x010000] != "\ufffd"}] +testConstraint fullutf [expr {[format %c 0x010000] ne "\ufffd"}] testConstraint exec [llength [info commands exec]] testConstraint testgetdefenc [llength [info commands testgetdefenc]] @@ -284,16 +284,16 @@ test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding} # OpenEncodingFile is fully tested by the rest of the tests in this file. test encoding-12.1 {LoadTableEncoding: normal encoding} { - set x [encoding convertto iso8859-3 \u120] - append x [encoding convertto iso8859-3 \ud5] - append x [encoding convertfrom iso8859-3 \xd5] + set x [encoding convertto iso8859-3 \u0120] + append x [encoding convertto iso8859-3 \xD5] + append x [encoding convertfrom iso8859-3 \xD5] } "\xd5?\u120" test encoding-12.2 {LoadTableEncoding: single-byte encoding} { set x [encoding convertto iso8859-3 ab\u0120g] - append x [encoding convertfrom iso8859-3 ab\xd5g] + append x [encoding convertfrom iso8859-3 ab\xD5g] } "ab\xd5gab\u120g" test encoding-12.3 {LoadTableEncoding: multi-byte encoding} { - set x [encoding convertto shiftjis ab\u4e4eg] + set x [encoding convertto shiftjis ab\u4E4Eg] append x [encoding convertfrom shiftjis ab\x8c\xc1g] } "ab\x8c\xc1gab\u4e4eg" test encoding-12.4 {LoadTableEncoding: double-byte encoding} { @@ -305,6 +305,9 @@ test encoding-12.5 {LoadTableEncoding: symbol encoding} { append x [encoding convertto symbol \u67] append x [encoding convertfrom symbol \x67] } "\x67\x67\u3b3" +test encoding-12.6 {LoadTableEncoding: overflow in char value} fullutf { + encoding convertto iso8859-3 \U010000 +} "?" test encoding-13.1 {LoadEscapeTable} { viewable [set x [encoding convertto iso2022 ab\u4e4e\u68d9g]] diff --git a/tests/reg.test b/tests/reg.test index d040632..a95d1e2 100644 --- a/tests/reg.test +++ b/tests/reg.test @@ -626,6 +626,7 @@ expectMatch 13.14 P "a\\rb" "a\rb" "a\rb" expectMatch 13.15 P "a\\tb" "a\tb" "a\tb" expectMatch 13.16 P "a\\u0008x" "a\bx" "a\bx" expectMatch 13.17 P {a\u008x} "a\bx" "a\bx" +expectError 13.17.1 - {a\ux} EESCAPE expectMatch 13.18 P "a\\u00088x" "a\b8x" "a\b8x" expectMatch 13.19 P "a\\U00000008x" "a\bx" "a\bx" expectMatch 13.20 P {a\U0000008x} "a\bx" "a\bx" |