11 files changed, 182 insertions, 40 deletions
diff --git a/doc/UniCharIsAlpha.3 b/doc/UniCharIsAlpha.3
index 61490ed..20828e4 100644
--- a/doc/UniCharIsAlpha.3
+++ b/doc/UniCharIsAlpha.3
@@ -8,7 +8,7 @@
 .so man.macros
 .BS
 .SH NAME
-Tcl_UniCharIsAlnum, Tcl_UniCharIsAlpha, Tcl_UniCharIsControl, Tcl_UniCharIsDigit, Tcl_UniCharIsGraph, Tcl_UniCharIsLower, Tcl_UniCharIsPrint, Tcl_UniCharIsPunct, Tcl_UniCharIsSpace, Tcl_UniCharIsUpper, Tcl_UniCharIsWordChar \- routines for classification of Tcl_UniChar characters
+Tcl_UniCharIsAlnum, Tcl_UniCharIsAlpha, Tcl_UniCharIsControl, Tcl_UniCharIsDigit, Tcl_UniCharIsGraph, Tcl_UniCharIsLower, Tcl_UniCharIsPrint, Tcl_UniCharIsPunct, Tcl_UniCharIsSpace, Tcl_UniCharIsUpper, Tcl_UniCharIsUnicode, Tcl_UniCharIsWordChar \- routines for classification of Tcl_UniChar characters
 .SH SYNOPSIS
 .nf
 \fB#include <tcl.h>\fR
@@ -44,6 +44,9 @@ int
 \fBTcl_UniCharIsUpper\fR(\fIch\fR)
 .sp
 int
+\fBTcl_UniCharIsUnicode\fR(\fIch\fR)
+.sp
+int
 \fBTcl_UniCharIsWordChar\fR(\fIch\fR)
 .SH ARGUMENTS
 .AS int ch
@@ -81,6 +84,8 @@ with the various routines.
 .PP
 \fBTcl_UniCharIsUpper\fR tests if the character is an uppercase Unicode character.
 .PP
+\fBTcl_UniCharIsUpper\fR tests if the character is a Unicode character.
+.PP
 \fBTcl_UniCharIsWordChar\fR tests if the character is alphanumeric or
 a connector punctuation mark.
 
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 51ece1a..03d43ce 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -2412,8 +2412,8 @@ declare 652 {
 declare 653 {
     unsigned char *TclGetByteArrayFromObj(Tcl_Obj *objPtr, size_t *lengthPtr)
 }
-declare 656 {
-    void TclUnusedStubEntry(void)
+declare 657 {
+    int Tcl_UniCharIsUnicode(int ch)
 }
 
 # ----- BASELINE -- FOR -- 8.7.0 ----- #
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index d020a93..61d1010 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1533,16 +1533,16 @@ StringIsCmd(
 	"boolean",	"dict",		"digit",	"double",
 	"entier",	"false",	"graph",	"integer",
 	"list",		"lower",	"print",	"punct",
-	"space",	"true",		"upper",	"wideinteger",
-	"wordchar",	"xdigit",	NULL
+	"space",	"true",		"upper",	"unicode",
+	"wideinteger", "wordchar",	"xdigit",	NULL
     };
     enum isClassesEnum {
 	STR_IS_ALNUM,	STR_IS_ALPHA,	STR_IS_ASCII,	STR_IS_CONTROL,
 	STR_IS_BOOL,	STR_IS_DICT,	STR_IS_DIGIT,	STR_IS_DOUBLE,
 	STR_IS_ENTIER,	STR_IS_FALSE,	STR_IS_GRAPH,	STR_IS_INT,
 	STR_IS_LIST,	STR_IS_LOWER,	STR_IS_PRINT,	STR_IS_PUNCT,
-	STR_IS_SPACE,	STR_IS_TRUE,	STR_IS_UPPER,	STR_IS_WIDE,
-	STR_IS_WORD,	STR_IS_XDIGIT
+	STR_IS_SPACE,	STR_IS_TRUE,	STR_IS_UPPER,	STR_IS_UNICODE,
+	STR_IS_WIDE,	STR_IS_WORD,	STR_IS_XDIGIT
     };
     static const char *const isOptions[] = {
 	"-strict", "-failindex", NULL
@@ -1872,6 +1872,9 @@ StringIsCmd(
     case STR_IS_UPPER:
 	chcomp = Tcl_UniCharIsUpper;
 	break;
+    case STR_IS_UNICODE:
+	chcomp = Tcl_UniCharIsUnicode;
+	break;
     case STR_IS_WORD:
 	chcomp = Tcl_UniCharIsWordChar;
 	break;
diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c
index 0bac52b..a160625 100644
--- a/generic/tclCompCmdsSZ.c
+++ b/generic/tclCompCmdsSZ.c
@@ -505,19 +505,19 @@ TclCompileStringIsCmd(
     Tcl_Token *tokenPtr = TokenAfter(parsePtr->tokenPtr);
     static const char *const isClasses[] = {
 	"alnum",	"alpha",	"ascii",	"control",
-	"boolean",	"dict", "digit",	"double",	"entier",
-	"false",	"graph",	"integer",	"list",
-	"lower",	"print",	"punct",	"space",
-	"true",		"upper",	"wideinteger",	"wordchar",
-	"xdigit",	NULL
+	"boolean",	"dict",		"digit",	"double",
+	"entier",	"false",	"graph",	"integer",
+	"list",		"lower",	"print",	"punct",
+	"space",	"true",		"upper",	"unicode",
+	"wideinteger", "wordchar",	"xdigit",	NULL
     };
     enum isClassesEnum {
 	STR_IS_ALNUM,	STR_IS_ALPHA,	STR_IS_ASCII,	STR_IS_CONTROL,
-	STR_IS_BOOL,	STR_IS_DICT, STR_IS_DIGIT,	STR_IS_DOUBLE,	STR_IS_ENTIER,
-	STR_IS_FALSE,	STR_IS_GRAPH,	STR_IS_INT,	STR_IS_LIST,
-	STR_IS_LOWER,	STR_IS_PRINT,	STR_IS_PUNCT,	STR_IS_SPACE,
-	STR_IS_TRUE,	STR_IS_UPPER,	STR_IS_WIDE,	STR_IS_WORD,
-	STR_IS_XDIGIT
+	STR_IS_BOOL,	STR_IS_DICT,	STR_IS_DIGIT,	STR_IS_DOUBLE,
+	STR_IS_ENTIER,	STR_IS_FALSE,	STR_IS_GRAPH,	STR_IS_INT,
+	STR_IS_LIST,	STR_IS_LOWER,	STR_IS_PRINT,	STR_IS_PUNCT,
+	STR_IS_SPACE,	STR_IS_TRUE,	STR_IS_UPPER,	STR_IS_UNICODE,
+	STR_IS_WIDE,	STR_IS_WORD,	STR_IS_XDIGIT
     };
     int t, range, allowEmpty = 0, end;
     InstStringClassType strClassType;
@@ -609,6 +609,9 @@ TclCompileStringIsCmd(
     case STR_IS_UPPER:
 	strClassType = STR_CLASS_UPPER;
 	goto compileStrClass;
+    case STR_IS_UNICODE:
+	strClassType = STR_CLASS_UNICODE;
+	goto compileStrClass;
     case STR_IS_WORD:
 	strClassType = STR_CLASS_WORD;
 	goto compileStrClass;
@@ -1415,6 +1418,7 @@ StringClassDesc const tclStringClassTable[] = {
     {"upper",	Tcl_UniCharIsUpper},
     {"word",	Tcl_UniCharIsWordChar},
     {"xdigit",	UniCharIsHexDigit},
+    {"unicode",	Tcl_UniCharIsUnicode},
     {"",	NULL}
 };
 
diff --git a/generic/tclCompile.h b/generic/tclCompile.h
index 21a27f7..6a5faaf 100644
--- a/generic/tclCompile.h
+++ b/generic/tclCompile.h
@@ -922,8 +922,9 @@ typedef enum InstStringClassType {
     STR_CLASS_UPPER,		/* Unicode upper-case alphabet characters. */
     STR_CLASS_WORD,		/* Unicode word (alphabetic, digit, connector
 				 * punctuation) characters. */
-    STR_CLASS_XDIGIT		/* Characters that can be used as digits in
+    STR_CLASS_XDIGIT,		/* Characters that can be used as digits in
 				 * hexadecimal numbers ([0-9A-Fa-f]). */
+    STR_CLASS_UNICODE		/* Unicode characters. */
 } InstStringClassType;
 
 typedef struct StringClassDesc {
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 4b91817..bb31355 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -1933,8 +1933,9 @@ EXTERN unsigned char *	TclGetByteArrayFromObj(Tcl_Obj *objPtr,
 				size_t *lengthPtr);
 /* Slot 654 is reserved */
 /* Slot 655 is reserved */
-/* 656 */
-EXTERN void		TclUnusedStubEntry(void);
+/* Slot 656 is reserved */
+/* 657 */
+EXTERN int		Tcl_UniCharIsUnicode(int ch);
 
 typedef struct {
     const struct TclPlatStubs *tclPlatStubs;
@@ -2626,7 +2627,8 @@ typedef struct TclStubs {
     unsigned char * (*tclGetByteArrayFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 653 */
     void (*reserved654)(void);
     void (*reserved655)(void);
-    void (*tclUnusedStubEntry) (void); /* 656 */
+    void (*reserved656)(void);
+    int (*tcl_UniCharIsUnicode) (int ch); /* 657 */
 } TclStubs;
 
 extern const TclStubs *tclStubsPtr;
@@ -3965,8 +3967,9 @@ extern const TclStubs *tclStubsPtr;
 	(tclStubsPtr->tclGetByteArrayFromObj) /* 653 */
 /* Slot 654 is reserved */
 /* Slot 655 is reserved */
-#define TclUnusedStubEntry \
-	(tclStubsPtr->tclUnusedStubEntry) /* 656 */
+/* Slot 656 is reserved */
+#define Tcl_UniCharIsUnicode \
+	(tclStubsPtr->tcl_UniCharIsUnicode) /* 657 */
 
 #endif /* defined(USE_TCL_STUBS) */
 
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index c4ef159..d994f10 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -517,6 +517,9 @@ FillEncodingFileMap(void)
  *---------------------------------------------------------------------------
  */
 
+#define FLAG_LE 1
+#define FLAG_WTF 2
+
 void
 TclInitEncodingSubsystem(void)
 {
@@ -559,13 +562,16 @@ TclInitEncodingSubsystem(void)
     type.nullSize	= 1;
     type.clientData	= NULL;
     Tcl_CreateEncoding(&type);
+    type.clientData	= INT2PTR(FLAG_WTF);
+    type.encodingName	= "wtf-8";
+    Tcl_CreateEncoding(&type);
 
     type.toUtfProc	= Utf16ToUtfProc;
     type.fromUtfProc    = UtfToUcs2Proc;
     type.freeProc	= NULL;
     type.nullSize	= 2;
     type.encodingName   = "ucs-2le";
-    type.clientData	= INT2PTR(1);
+    type.clientData	= INT2PTR(FLAG_LE);
     Tcl_CreateEncoding(&type);
     type.encodingName   = "ucs-2be";
     type.clientData	= INT2PTR(0);
@@ -579,7 +585,7 @@ TclInitEncodingSubsystem(void)
     type.freeProc	= NULL;
     type.nullSize	= 2;
     type.encodingName   = "utf-16le";
-    type.clientData	= INT2PTR(1);
+    type.clientData	= INT2PTR(FLAG_LE);
     Tcl_CreateEncoding(&type);
     type.encodingName   = "utf-16be";
     type.clientData	= INT2PTR(0);
@@ -2271,7 +2277,7 @@ UtfExtToUtfIntProc(
 
 static int
 UtfToUtfProc(
-    TCL_UNUSED(ClientData),
+    void *clientData,	/* flags */
     const char *src,		/* Source string in UTF-8. */
     int srcLen,			/* Source string length in bytes. */
     int flags,			/* Conversion control flags. */
@@ -2302,6 +2308,7 @@ UtfToUtfProc(
     const char *srcStart, *srcEnd, *srcClose;
     const char *dstStart, *dstEnd;
     int result, numChars, charLimit = INT_MAX;
+    int encflags = PTR2INT(clientData);
     int *chPtr = (int *) statePtr;
 
     if (flags & TCL_ENCODING_START) {
@@ -2370,6 +2377,9 @@ UtfToUtfProc(
 		int low = *chPtr;
 		size_t len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
 		if (((low & ~0x3FF) != 0xDC00) || (*chPtr & 0x400)) {
+		    if ((pureNullMode == 1) && !(encflags & FLAG_WTF)) {
+			*chPtr = 0xFFFD;
+		    }
 			*dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF);
 			*dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF);
 			*dst++ = (char) ((*chPtr | 0x80) & 0xBF);
@@ -2378,6 +2388,9 @@ UtfToUtfProc(
 		src += len;
 		dst += Tcl_UniCharToUtf(*chPtr, dst);
 		*chPtr = low;
+	    } else if ((pureNullMode == 1) && !(encflags & FLAG_WTF)
+		    && !Tcl_UniCharIsUnicode(*chPtr)) {
+		*chPtr = 0xFFFD;
 	    }
 	    dst += Tcl_UniCharToUtf(*chPtr, dst);
 	}
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index fa5562d..cd1ce81 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -1905,7 +1905,8 @@ const TclStubs tclStubs = {
     TclGetByteArrayFromObj, /* 653 */
     0, /* 654 */
     0, /* 655 */
-    TclUnusedStubEntry, /* 656 */
+    0, /* 656 */
+    Tcl_UniCharIsUnicode, /* 657 */
 };
 
 /* !END!: Do not edit above this line. */
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index e096c06..2687a1d 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -2187,6 +2187,36 @@ Tcl_UniCharIsUpper(
 /*
  *----------------------------------------------------------------------
  *
+ * Tcl_UniCharIsUnicode --
+ *
+ *	Test if a character is a Unicode character.
+ *
+ * Results:
+ *	Returns non-zero if character belongs to the Unicode set.
+ *
+ *	Excluded are:
+ *	  1) All characters > U+10FFFF
+ *	  2) Surrogates U+D800 - U+DFFF
+ *	  3) Last 2 characters of each plane, so U+??FFFE  and U+??FFFF
+ *	  4) The characters in the range U+FDD0 - U+FDEF
+ *
+ * Side effects:
+ *	None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsUnicode(
+    int ch)			/* Unicode character to test. */
+{
+    return ((unsigned int)ch <= 0x10FFFF) && ((ch & 0xFFF800) != 0xD800)
+	    && ((ch & 0xFFFE) != 0xFFFE) && ((unsigned int)(ch - 0xFDD0) >= 32);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
  * Tcl_UniCharIsWordChar --
  *
  *	Test if a character is alphanumeric or a connector punctuation mark.
diff --git a/tests/encoding.test b/tests/encoding.test
index b1150c6..76b830d 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -340,61 +340,61 @@ test encoding-15.5 {UtfToUtfProc emoji character input} {
 } "4 😂"
 test encoding-15.6 {UtfToUtfProc emoji character output} {
     set x \uDE02\uD83D\uDE02\uD83D
-    set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D]
+    set y [encoding convertto wtf-8 \uDE02\uD83D\uDE02\uD83D]
     binary scan $y H* z
     list [string length $y] $z
 } {10 edb882f09f9882eda0bd}
 test encoding-15.7 {UtfToUtfProc emoji character output} {
     set x \uDE02\uD83D\uD83D
-    set y [encoding convertto utf-8 \uDE02\uD83D\uD83D]
+    set y [encoding convertto wtf-8 \uDE02\uD83D\uD83D]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
 } {3 9 edb882eda0bdeda0bd}
 test encoding-15.8 {UtfToUtfProc emoji character output} {
     set x \uDE02\uD83D\xE9
-    set y [encoding convertto utf-8 \uDE02\uD83D\xE9]
+    set y [encoding convertto wtf-8 \uDE02\uD83D\xE9]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
 } {3 8 edb882eda0bdc3a9}
 test encoding-15.9 {UtfToUtfProc emoji character output} {
     set x \uDE02\uD83DX
-    set y [encoding convertto utf-8 \uDE02\uD83DX]
+    set y [encoding convertto wtf-8 \uDE02\uD83DX]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
 } {3 7 edb882eda0bd58}
 test encoding-15.10 {UtfToUtfProc high surrogate character output} {
     set x \uDE02\xE9
-    set y [encoding convertto utf-8 \uDE02\xE9]
+    set y [encoding convertto wtf-8 \uDE02\xE9]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
 } {2 5 edb882c3a9}
 test encoding-15.11 {UtfToUtfProc low surrogate character output} {
     set x \uDA02\xE9
-    set y [encoding convertto utf-8 \uDA02\xE9]
+    set y [encoding convertto wtf-8 \uDA02\xE9]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
 } {2 5 eda882c3a9}
 test encoding-15.12 {UtfToUtfProc high surrogate character output} {
     set x \uDE02Y
-    set y [encoding convertto utf-8 \uDE02Y]
+    set y [encoding convertto wtf-8 \uDE02Y]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
 } {2 4 edb88259}
 test encoding-15.13 {UtfToUtfProc low surrogate character output} {
     set x \uDA02Y
-    set y [encoding convertto utf-8 \uDA02Y]
+    set y [encoding convertto wtf-8 \uDA02Y]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
 } {2 4 eda88259}
 test encoding-15.14 {UtfToUtfProc high surrogate character output} {
     set x \uDE02
-    set y [encoding convertto utf-8 \uDE02]
+    set y [encoding convertto wtf-8 \uDE02]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
 } {1 3 edb882}
 test encoding-15.15 {UtfToUtfProc low surrogate character output} {
     set x \uDA02
-    set y [encoding convertto utf-8 \uDA02]
+    set y [encoding convertto wtf-8 \uDA02]
     binary scan $y H* z
     list [string length $x] [string length $y] $z
 } {1 3 eda882}
@@ -409,6 +409,66 @@ test encoding-15.17 {UtfToUtfProc emoji character output} {
     binary scan $y H* z
     list [string length $y] $z
 } {4 f09f9882}
+test encoding-15.18 {UtfToUtfProc emoji character output} {
+    set x \uDE02\uD83D\uDE02\uD83D
+    set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D]
+    binary scan $y H* z
+    list [string length $y] $z
+} {10 efbfbdf09f9882efbfbd}
+test encoding-15.19 {UtfToUtfProc emoji character output} {
+    set x \uDE02\uD83D\uD83D
+    set y [encoding convertto utf-8 \uDE02\uD83D\uD83D]
+    binary scan $y H* z
+    list [string length $x] [string length $y] $z
+} {3 9 efbfbdefbfbdefbfbd}
+test encoding-15.20 {UtfToUtfProc emoji character output} {
+    set x \uDE02\uD83D\xE9
+    set y [encoding convertto utf-8 \uDE02\uD83D\xE9]
+    binary scan $y H* z
+    list [string length $x] [string length $y] $z
+} {3 8 efbfbdefbfbdc3a9}
+test encoding-15.21 {UtfToUtfProc emoji character output} {
+    set x \uDE02\uD83DX
+    set y [encoding convertto utf-8 \uDE02\uD83DX]
+    binary scan $y H* z
+    list [string length $x] [string length $y] $z
+} {3 7 efbfbdefbfbd58}
+test encoding-15.22 {UtfToUtfProc high surrogate character output} {
+    set x \uDE02\xE9
+    set y [encoding convertto utf-8 \uDE02\xE9]
+    binary scan $y H* z
+    list [string length $x] [string length $y] $z
+} {2 5 efbfbdc3a9}
+test encoding-15.23 {UtfToUtfProc low surrogate character output} {
+    set x \uDA02\xE9
+    set y [encoding convertto utf-8 \uDA02\xE9]
+    binary scan $y H* z
+    list [string length $x] [string length $y] $z
+} {2 5 efbfbdc3a9}
+test encoding-15.24 {UtfToUtfProc high surrogate character output} {
+    set x \uDE02Y
+    set y [encoding convertto utf-8 \uDE02Y]
+    binary scan $y H* z
+    list [string length $x] [string length $y] $z
+} {2 4 efbfbd59}
+test encoding-15.25 {UtfToUtfProc low surrogate character output} {
+    set x \uDA02Y
+    set y [encoding convertto utf-8 \uDA02Y]
+    binary scan $y H* z
+    list [string length $x] [string length $y] $z
+} {2 4 efbfbd59}
+test encoding-15.26 {UtfToUtfProc high surrogate character output} {
+    set x \uDE02
+    set y [encoding convertto utf-8 \uDE02]
+    binary scan $y H* z
+    list [string length $x] [string length $y] $z
+} {1 3 efbfbd}
+test encoding-15.27 {UtfToUtfProc low surrogate character output} {
+    set x \uDA02
+    set y [encoding convertto utf-8 \uDA02]
+    binary scan $y H* z
+    list [string length $x] [string length $y] $z
+} {1 3 efbfbd}
 
 test encoding-16.1 {Utf16ToUtfProc} -body {
     set val [encoding convertfrom utf-16 NN]
@@ -742,7 +802,7 @@ test encoding-28.0 {all encodings load} -body {
 		llength $name
 	}
 	return $count
-} -result [expr {[info exists ::tcl_precision] ? 86 : 85}]
+} -result [expr {[info exists ::tcl_precision] ? 87 : 86}]
 
 runtests
 
diff --git a/tests/string.test b/tests/string.test
index 0eaa3da..b3b278c 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -525,10 +525,10 @@ test string-6.4.$noComp {string is, too many args} {
 } {1 {wrong # args: should be "string is class ?-strict? ?-failindex var? str"}}
 test string-6.5.$noComp {string is, class check} {
     list [catch {run {string is bogus str}} msg] $msg
-} {1 {bad class "bogus": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, wideinteger, wordchar, or xdigit}}
+} {1 {bad class "bogus": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, unicode, wideinteger, wordchar, or xdigit}}
 test string-6.6.$noComp {string is, ambiguous class} {
     list [catch {run {string is al str}} msg] $msg
-} {1 {ambiguous class "al": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, wideinteger, wordchar, or xdigit}}
+} {1 {ambiguous class "al": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, unicode, wideinteger, wordchar, or xdigit}}
 test string-6.7.$noComp {string is alpha, all ok} {
     run {string is alpha -strict -failindex var abc}
 } 1
@@ -961,6 +961,28 @@ test string-6.130.1.$noComp {string is entier, false on bad octal} {
 test string-6.131.$noComp {string is entier, false on bad hex} {
     list [run {string is entier -fail var 0X12345611234123456123456562345612345612345612345612345612345612345612345612345612345345XYZ}] $var
 } {0 88}
+test string-6.132.$noComp {string is unicode} {
+    run {string is unicode \U10FFFD\uD7FF\uE000\uFDCF\uFDF0}
+} 1
+test string-6.133.$noComp {string is unicode, upper surrogate} {
+    run {string is unicode \uD800}
+} 0
+test string-6.134.$noComp {string is unicode, lower surrogate} {
+    run {string is unicode \uDFFF}
+} 0
+test string-6.135.$noComp {string is unicode, noncharacter} {
+    run {string is unicode \uFFFE}
+} 0
+test string-6.136.$noComp {string is unicode, noncharacter} {
+    run {string is unicode \uFFFF}
+} 0
+test string-6.137.$noComp {string is unicode, noncharacter} {
+    run {string is unicode \uFDD0}
+} 0
+test string-6.138.$noComp {string is unicode, noncharacter} {
+    run {string is unicode \uFDEF}
+} 0
+
 
 test string-7.1.$noComp {string last, not enough args} {
     list [catch {run {string last a}} msg] $msg