6 files changed, 27 insertions, 26 deletions
diff --git a/generic/regc_lex.c b/generic/regc_lex.c
index 1b00b71..2208c0e 100644
--- a/generic/regc_lex.c
+++ b/generic/regc_lex.c
@@ -843,12 +843,18 @@ lexescape(
 	if (ISERR()) {
 	    FAILW(REG_EESCAPE);
 	}
-	if (i > 0xFFFF) {
+#if CHRBITS > 16
+	if ((unsigned)i > 0x10FFFF) {
+	    i = 0xFFFD;
+	}
+#else
+	if ((unsigned)i & ~0xFFFF) {
 	    /* TODO: output a Surrogate pair
 	     */
 	    i = 0xFFFD;
 	}
-	RETV(PLAIN, (uchr) i);
+#endif
+	RETV(PLAIN, (uchr)i);
 	break;
     case CHR('v'):
 	RETV(PLAIN, CHR('\v'));
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index e012570..6ab0510 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -2557,11 +2557,6 @@ UtfToUnicodeProc(
 	}
 	src += TclUtfToUniChar(src, chPtr);
 
-	/*
-	 * Need to handle this in a way that won't cause misalignment by
-	 * casting dst to a Tcl_UniChar. [Bug 1122671]
-	 */
-
 	if (clientData) {
 #if TCL_UTF_MAX > 4
 	    if (*chPtr <= 0xFFFF) {
@@ -2793,11 +2788,7 @@ TableFromUtfProc(
 	len = TclUtfToUniChar(src, &ch);
 
 #if TCL_UTF_MAX > 4
-	/*
-	 * This prevents a crash condition. More evaluation is required for
-	 * full support of int Tcl_UniChar. [Bug 1004065]
-	 */
-
+	/* Unicode chars > +U0FFFF cannot be represented in any table encoding */
 	if (ch & 0xFFFF0000) {
 	    word = 0;
 	} else
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 780ea30..5df9aac 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3065,8 +3065,6 @@ MODULE_SCOPE int	TclObjUnsetVar2(Tcl_Interp *interp,
 			    Tcl_Obj *part1Ptr, Tcl_Obj *part2Ptr, int flags);
 MODULE_SCOPE int	TclParseBackslash(const char *src,
 			    int numBytes, int *readPtr, char *dst);
-MODULE_SCOPE int	TclParseHex(const char *src, int numBytes,
-			    int *resultPtr);
 MODULE_SCOPE int	TclParseNumber(Tcl_Interp *interp, Tcl_Obj *objPtr,
 			    const char *expected, const char *bytes,
 			    int numBytes, const char **endPtrPtr, int flags);
diff --git a/generic/tclParse.c b/generic/tclParse.c
index 4d7e6b8..f834480 100644
--- a/generic/tclParse.c
+++ b/generic/tclParse.c
@@ -167,6 +167,8 @@ static int		ParseTokens(const char *src, int numBytes, int mask,
 			    int flags, Tcl_Parse *parsePtr);
 static int		ParseWhiteSpace(const char *src, int numBytes,
 			    int *incompletePtr, char *typePtr);
+static int		ParseHex(const char *src, int numBytes,
+			    int *resultPtr);
 
 /*
  *----------------------------------------------------------------------
@@ -754,7 +756,7 @@ TclParseAllWhiteSpace(
 /*
  *----------------------------------------------------------------------
  *
- * TclParseHex --
+ * ParseHex --
  *
  *	Scans a hexadecimal number as a Tcl_UniChar value (e.g., for parsing
  *	\x and \u escape sequences). At most numBytes bytes are scanned.
@@ -774,7 +776,7 @@ TclParseAllWhiteSpace(
  */
 
 int
-TclParseHex(
+ParseHex(
     const char *src,		/* First character to parse. */
     int numBytes,		/* Max number of byes to scan */
     int *resultPtr)	/* Points to storage provided by caller where
@@ -899,7 +901,7 @@ TclParseBackslash(
 	result = 0xB;
 	break;
     case 'x':
-	count += TclParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result);
+	count += ParseHex(p+1, (numBytes > 3) ? 2 : numBytes-2, &result);
 	if (count == 2) {
 	    /*
 	     * No hexadigits -> This is just "x".
@@ -914,7 +916,7 @@ TclParseBackslash(
 	}
 	break;
     case 'u':
-	count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result);
+	count += ParseHex(p+1, (numBytes > 5) ? 4 : numBytes-2, &result);
 	if (count == 2) {
 	    /*
 	     * No hexadigits -> This is just "u".
@@ -926,7 +928,7 @@ TclParseBackslash(
 	    /* If high surrogate is immediately followed by a low surrogate
 	     * escape, combine them into one character. */
 	    int low;
-	    int count2 = TclParseHex(p+7, 4, &low);
+	    int count2 = ParseHex(p+7, 4, &low);
 	    if ((count2 == 4) && ((low & 0xDC00) == 0xDC00)) {
 		result = ((result & 0x3FF)<<10 | (low & 0x3FF)) + 0x10000;
 		count += count2 + 2;
@@ -935,7 +937,7 @@ TclParseBackslash(
 	}
 	break;
     case 'U':
-	count += TclParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result);
+	count += ParseHex(p+1, (numBytes > 9) ? 8 : numBytes-2, &result);
 	if (count == 2) {
 	    /*
 	     * No hexadigits -> This is just "U".
diff --git a/tests/encoding.test b/tests/encoding.test
index 6fef748..a8ce162 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -36,7 +36,7 @@ proc runtests {} {
 testConstraint testencoding [llength [info commands testencoding]]
 testConstraint testbytestring [llength [info commands testbytestring]]
 testConstraint teststringbytes [llength [info commands teststringbytes]]
-testConstraint fullutf [expr {[format %c 0x010000] != "\ufffd"}]
+testConstraint fullutf [expr {[format %c 0x010000] ne "\ufffd"}]
 testConstraint exec [llength [info commands exec]]
 testConstraint testgetdefenc [llength [info commands testgetdefenc]]
 
@@ -284,16 +284,16 @@ test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding}
 # OpenEncodingFile is fully tested by the rest of the tests in this file.
 
 test encoding-12.1 {LoadTableEncoding: normal encoding} {
-    set x [encoding convertto iso8859-3 \u120]
-    append x [encoding convertto iso8859-3 \ud5]
-    append x [encoding convertfrom iso8859-3 \xd5]
+    set x [encoding convertto iso8859-3 \u0120]
+    append x [encoding convertto iso8859-3 \xD5]
+    append x [encoding convertfrom iso8859-3 \xD5]
 } "\xd5?\u120"
 test encoding-12.2 {LoadTableEncoding: single-byte encoding} {
     set x [encoding convertto iso8859-3 ab\u0120g]
-    append x [encoding convertfrom iso8859-3 ab\xd5g]
+    append x [encoding convertfrom iso8859-3 ab\xD5g]
 } "ab\xd5gab\u120g"
 test encoding-12.3 {LoadTableEncoding: multi-byte encoding} {
-    set x [encoding convertto shiftjis ab\u4e4eg]
+    set x [encoding convertto shiftjis ab\u4E4Eg] 
     append x [encoding convertfrom shiftjis ab\x8c\xc1g]
 } "ab\x8c\xc1gab\u4e4eg"
 test encoding-12.4 {LoadTableEncoding: double-byte encoding} {
@@ -305,6 +305,9 @@ test encoding-12.5 {LoadTableEncoding: symbol encoding} {
     append x [encoding convertto symbol \u67]
     append x [encoding convertfrom symbol \x67]
 } "\x67\x67\u3b3"
+test encoding-12.6 {LoadTableEncoding: overflow in char value} fullutf {
+    encoding convertto iso8859-3 \U010000
+} "?"
 
 test encoding-13.1 {LoadEscapeTable} {
     viewable [set x [encoding convertto iso2022 ab\u4e4e\u68d9g]]
diff --git a/tests/reg.test b/tests/reg.test
index d040632..a95d1e2 100644
--- a/tests/reg.test
+++ b/tests/reg.test
@@ -626,6 +626,7 @@ expectMatch	13.14 P		"a\\rb"		"a\rb"	"a\rb"
 expectMatch	13.15 P		"a\\tb"		"a\tb"	"a\tb"
 expectMatch	13.16 P		"a\\u0008x"	"a\bx"	"a\bx"
 expectMatch	13.17 P		{a\u008x}	"a\bx"	"a\bx"
+expectError	13.17.1 -	{a\ux}		EESCAPE
 expectMatch	13.18 P		"a\\u00088x"	"a\b8x"	"a\b8x"
 expectMatch	13.19 P		"a\\U00000008x"	"a\bx"	"a\bx"
 expectMatch	13.20 P		{a\U0000008x}	"a\bx"	"a\bx"