summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/UniCharIsAlpha.37
-rw-r--r--generic/tcl.decls4
-rw-r--r--generic/tclCmdMZ.c11
-rw-r--r--generic/tclCompCmdsSZ.c24
-rw-r--r--generic/tclCompile.h3
-rw-r--r--generic/tclDecls.h13
-rw-r--r--generic/tclEncoding.c19
-rw-r--r--generic/tclStubInit.c3
-rw-r--r--generic/tclUtf.c30
-rw-r--r--tests/encoding.test82
-rw-r--r--tests/string.test26
11 files changed, 182 insertions, 40 deletions
diff --git a/doc/UniCharIsAlpha.3 b/doc/UniCharIsAlpha.3
index 61490ed..20828e4 100644
--- a/doc/UniCharIsAlpha.3
+++ b/doc/UniCharIsAlpha.3
@@ -8,7 +8,7 @@
.so man.macros
.BS
.SH NAME
-Tcl_UniCharIsAlnum, Tcl_UniCharIsAlpha, Tcl_UniCharIsControl, Tcl_UniCharIsDigit, Tcl_UniCharIsGraph, Tcl_UniCharIsLower, Tcl_UniCharIsPrint, Tcl_UniCharIsPunct, Tcl_UniCharIsSpace, Tcl_UniCharIsUpper, Tcl_UniCharIsWordChar \- routines for classification of Tcl_UniChar characters
+Tcl_UniCharIsAlnum, Tcl_UniCharIsAlpha, Tcl_UniCharIsControl, Tcl_UniCharIsDigit, Tcl_UniCharIsGraph, Tcl_UniCharIsLower, Tcl_UniCharIsPrint, Tcl_UniCharIsPunct, Tcl_UniCharIsSpace, Tcl_UniCharIsUpper, Tcl_UniCharIsUnicode, Tcl_UniCharIsWordChar \- routines for classification of Tcl_UniChar characters
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
@@ -44,6 +44,9 @@ int
\fBTcl_UniCharIsUpper\fR(\fIch\fR)
.sp
int
+\fBTcl_UniCharIsUnicode\fR(\fIch\fR)
+.sp
+int
\fBTcl_UniCharIsWordChar\fR(\fIch\fR)
.SH ARGUMENTS
.AS int ch
@@ -81,6 +84,8 @@ with the various routines.
.PP
\fBTcl_UniCharIsUpper\fR tests if the character is an uppercase Unicode character.
.PP
+\fBTcl_UniCharIsUpper\fR tests if the character is a Unicode character.
+.PP
\fBTcl_UniCharIsWordChar\fR tests if the character is alphanumeric or
a connector punctuation mark.
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 51ece1a..03d43ce 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -2412,8 +2412,8 @@ declare 652 {
declare 653 {
unsigned char *TclGetByteArrayFromObj(Tcl_Obj *objPtr, size_t *lengthPtr)
}
-declare 656 {
- void TclUnusedStubEntry(void)
+declare 657 {
+ int Tcl_UniCharIsUnicode(int ch)
}
# ----- BASELINE -- FOR -- 8.7.0 ----- #
diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c
index d020a93..61d1010 100644
--- a/generic/tclCmdMZ.c
+++ b/generic/tclCmdMZ.c
@@ -1533,16 +1533,16 @@ StringIsCmd(
"boolean", "dict", "digit", "double",
"entier", "false", "graph", "integer",
"list", "lower", "print", "punct",
- "space", "true", "upper", "wideinteger",
- "wordchar", "xdigit", NULL
+ "space", "true", "upper", "unicode",
+ "wideinteger", "wordchar", "xdigit", NULL
};
enum isClassesEnum {
STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL,
STR_IS_BOOL, STR_IS_DICT, STR_IS_DIGIT, STR_IS_DOUBLE,
STR_IS_ENTIER, STR_IS_FALSE, STR_IS_GRAPH, STR_IS_INT,
STR_IS_LIST, STR_IS_LOWER, STR_IS_PRINT, STR_IS_PUNCT,
- STR_IS_SPACE, STR_IS_TRUE, STR_IS_UPPER, STR_IS_WIDE,
- STR_IS_WORD, STR_IS_XDIGIT
+ STR_IS_SPACE, STR_IS_TRUE, STR_IS_UPPER, STR_IS_UNICODE,
+ STR_IS_WIDE, STR_IS_WORD, STR_IS_XDIGIT
};
static const char *const isOptions[] = {
"-strict", "-failindex", NULL
@@ -1872,6 +1872,9 @@ StringIsCmd(
case STR_IS_UPPER:
chcomp = Tcl_UniCharIsUpper;
break;
+ case STR_IS_UNICODE:
+ chcomp = Tcl_UniCharIsUnicode;
+ break;
case STR_IS_WORD:
chcomp = Tcl_UniCharIsWordChar;
break;
diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c
index 0bac52b..a160625 100644
--- a/generic/tclCompCmdsSZ.c
+++ b/generic/tclCompCmdsSZ.c
@@ -505,19 +505,19 @@ TclCompileStringIsCmd(
Tcl_Token *tokenPtr = TokenAfter(parsePtr->tokenPtr);
static const char *const isClasses[] = {
"alnum", "alpha", "ascii", "control",
- "boolean", "dict", "digit", "double", "entier",
- "false", "graph", "integer", "list",
- "lower", "print", "punct", "space",
- "true", "upper", "wideinteger", "wordchar",
- "xdigit", NULL
+ "boolean", "dict", "digit", "double",
+ "entier", "false", "graph", "integer",
+ "list", "lower", "print", "punct",
+ "space", "true", "upper", "unicode",
+ "wideinteger", "wordchar", "xdigit", NULL
};
enum isClassesEnum {
STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL,
- STR_IS_BOOL, STR_IS_DICT, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_ENTIER,
- STR_IS_FALSE, STR_IS_GRAPH, STR_IS_INT, STR_IS_LIST,
- STR_IS_LOWER, STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE,
- STR_IS_TRUE, STR_IS_UPPER, STR_IS_WIDE, STR_IS_WORD,
- STR_IS_XDIGIT
+ STR_IS_BOOL, STR_IS_DICT, STR_IS_DIGIT, STR_IS_DOUBLE,
+ STR_IS_ENTIER, STR_IS_FALSE, STR_IS_GRAPH, STR_IS_INT,
+ STR_IS_LIST, STR_IS_LOWER, STR_IS_PRINT, STR_IS_PUNCT,
+ STR_IS_SPACE, STR_IS_TRUE, STR_IS_UPPER, STR_IS_UNICODE,
+ STR_IS_WIDE, STR_IS_WORD, STR_IS_XDIGIT
};
int t, range, allowEmpty = 0, end;
InstStringClassType strClassType;
@@ -609,6 +609,9 @@ TclCompileStringIsCmd(
case STR_IS_UPPER:
strClassType = STR_CLASS_UPPER;
goto compileStrClass;
+ case STR_IS_UNICODE:
+ strClassType = STR_CLASS_UNICODE;
+ goto compileStrClass;
case STR_IS_WORD:
strClassType = STR_CLASS_WORD;
goto compileStrClass;
@@ -1415,6 +1418,7 @@ StringClassDesc const tclStringClassTable[] = {
{"upper", Tcl_UniCharIsUpper},
{"word", Tcl_UniCharIsWordChar},
{"xdigit", UniCharIsHexDigit},
+ {"unicode", Tcl_UniCharIsUnicode},
{"", NULL}
};
diff --git a/generic/tclCompile.h b/generic/tclCompile.h
index 21a27f7..6a5faaf 100644
--- a/generic/tclCompile.h
+++ b/generic/tclCompile.h
@@ -922,8 +922,9 @@ typedef enum InstStringClassType {
STR_CLASS_UPPER, /* Unicode upper-case alphabet characters. */
STR_CLASS_WORD, /* Unicode word (alphabetic, digit, connector
* punctuation) characters. */
- STR_CLASS_XDIGIT /* Characters that can be used as digits in
+ STR_CLASS_XDIGIT, /* Characters that can be used as digits in
* hexadecimal numbers ([0-9A-Fa-f]). */
+ STR_CLASS_UNICODE /* Unicode characters. */
} InstStringClassType;
typedef struct StringClassDesc {
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 4b91817..bb31355 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -1933,8 +1933,9 @@ EXTERN unsigned char * TclGetByteArrayFromObj(Tcl_Obj *objPtr,
size_t *lengthPtr);
/* Slot 654 is reserved */
/* Slot 655 is reserved */
-/* 656 */
-EXTERN void TclUnusedStubEntry(void);
+/* Slot 656 is reserved */
+/* 657 */
+EXTERN int Tcl_UniCharIsUnicode(int ch);
typedef struct {
const struct TclPlatStubs *tclPlatStubs;
@@ -2626,7 +2627,8 @@ typedef struct TclStubs {
unsigned char * (*tclGetByteArrayFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 653 */
void (*reserved654)(void);
void (*reserved655)(void);
- void (*tclUnusedStubEntry) (void); /* 656 */
+ void (*reserved656)(void);
+ int (*tcl_UniCharIsUnicode) (int ch); /* 657 */
} TclStubs;
extern const TclStubs *tclStubsPtr;
@@ -3965,8 +3967,9 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tclGetByteArrayFromObj) /* 653 */
/* Slot 654 is reserved */
/* Slot 655 is reserved */
-#define TclUnusedStubEntry \
- (tclStubsPtr->tclUnusedStubEntry) /* 656 */
+/* Slot 656 is reserved */
+#define Tcl_UniCharIsUnicode \
+ (tclStubsPtr->tcl_UniCharIsUnicode) /* 657 */
#endif /* defined(USE_TCL_STUBS) */
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index c4ef159..d994f10 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -517,6 +517,9 @@ FillEncodingFileMap(void)
*---------------------------------------------------------------------------
*/
+#define FLAG_LE 1
+#define FLAG_WTF 2
+
void
TclInitEncodingSubsystem(void)
{
@@ -559,13 +562,16 @@ TclInitEncodingSubsystem(void)
type.nullSize = 1;
type.clientData = NULL;
Tcl_CreateEncoding(&type);
+ type.clientData = INT2PTR(FLAG_WTF);
+ type.encodingName = "wtf-8";
+ Tcl_CreateEncoding(&type);
type.toUtfProc = Utf16ToUtfProc;
type.fromUtfProc = UtfToUcs2Proc;
type.freeProc = NULL;
type.nullSize = 2;
type.encodingName = "ucs-2le";
- type.clientData = INT2PTR(1);
+ type.clientData = INT2PTR(FLAG_LE);
Tcl_CreateEncoding(&type);
type.encodingName = "ucs-2be";
type.clientData = INT2PTR(0);
@@ -579,7 +585,7 @@ TclInitEncodingSubsystem(void)
type.freeProc = NULL;
type.nullSize = 2;
type.encodingName = "utf-16le";
- type.clientData = INT2PTR(1);
+ type.clientData = INT2PTR(FLAG_LE);
Tcl_CreateEncoding(&type);
type.encodingName = "utf-16be";
type.clientData = INT2PTR(0);
@@ -2271,7 +2277,7 @@ UtfExtToUtfIntProc(
static int
UtfToUtfProc(
- TCL_UNUSED(ClientData),
+ void *clientData, /* flags */
const char *src, /* Source string in UTF-8. */
int srcLen, /* Source string length in bytes. */
int flags, /* Conversion control flags. */
@@ -2302,6 +2308,7 @@ UtfToUtfProc(
const char *srcStart, *srcEnd, *srcClose;
const char *dstStart, *dstEnd;
int result, numChars, charLimit = INT_MAX;
+ int encflags = PTR2INT(clientData);
int *chPtr = (int *) statePtr;
if (flags & TCL_ENCODING_START) {
@@ -2370,6 +2377,9 @@ UtfToUtfProc(
int low = *chPtr;
size_t len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
if (((low & ~0x3FF) != 0xDC00) || (*chPtr & 0x400)) {
+ if ((pureNullMode == 1) && !(encflags & FLAG_WTF)) {
+ *chPtr = 0xFFFD;
+ }
*dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF);
*dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF);
*dst++ = (char) ((*chPtr | 0x80) & 0xBF);
@@ -2378,6 +2388,9 @@ UtfToUtfProc(
src += len;
dst += Tcl_UniCharToUtf(*chPtr, dst);
*chPtr = low;
+ } else if ((pureNullMode == 1) && !(encflags & FLAG_WTF)
+ && !Tcl_UniCharIsUnicode(*chPtr)) {
+ *chPtr = 0xFFFD;
}
dst += Tcl_UniCharToUtf(*chPtr, dst);
}
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index fa5562d..cd1ce81 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -1905,7 +1905,8 @@ const TclStubs tclStubs = {
TclGetByteArrayFromObj, /* 653 */
0, /* 654 */
0, /* 655 */
- TclUnusedStubEntry, /* 656 */
+ 0, /* 656 */
+ Tcl_UniCharIsUnicode, /* 657 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index e096c06..2687a1d 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -2187,6 +2187,36 @@ Tcl_UniCharIsUpper(
/*
*----------------------------------------------------------------------
*
+ * Tcl_UniCharIsUnicode --
+ *
+ * Test if a character is a Unicode character.
+ *
+ * Results:
+ * Returns non-zero if character belongs to the Unicode set.
+ *
+ * Excluded are:
+ * 1) All characters > U+10FFFF
+ * 2) Surrogates U+D800 - U+DFFF
+ * 3) Last 2 characters of each plane, so U+??FFFE and U+??FFFF
+ * 4) The characters in the range U+FDD0 - U+FDEF
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharIsUnicode(
+ int ch) /* Unicode character to test. */
+{
+ return ((unsigned int)ch <= 0x10FFFF) && ((ch & 0xFFF800) != 0xD800)
+ && ((ch & 0xFFFE) != 0xFFFE) && ((unsigned int)(ch - 0xFDD0) >= 32);
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharIsWordChar --
*
* Test if a character is alphanumeric or a connector punctuation mark.
diff --git a/tests/encoding.test b/tests/encoding.test
index b1150c6..76b830d 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -340,61 +340,61 @@ test encoding-15.5 {UtfToUtfProc emoji character input} {
} "4 😂"
test encoding-15.6 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\uDE02\uD83D
- set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D]
+ set y [encoding convertto wtf-8 \uDE02\uD83D\uDE02\uD83D]
binary scan $y H* z
list [string length $y] $z
} {10 edb882f09f9882eda0bd}
test encoding-15.7 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\uD83D
- set y [encoding convertto utf-8 \uDE02\uD83D\uD83D]
+ set y [encoding convertto wtf-8 \uDE02\uD83D\uD83D]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 9 edb882eda0bdeda0bd}
test encoding-15.8 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\xE9
- set y [encoding convertto utf-8 \uDE02\uD83D\xE9]
+ set y [encoding convertto wtf-8 \uDE02\uD83D\xE9]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 8 edb882eda0bdc3a9}
test encoding-15.9 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83DX
- set y [encoding convertto utf-8 \uDE02\uD83DX]
+ set y [encoding convertto wtf-8 \uDE02\uD83DX]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 7 edb882eda0bd58}
test encoding-15.10 {UtfToUtfProc high surrogate character output} {
set x \uDE02\xE9
- set y [encoding convertto utf-8 \uDE02\xE9]
+ set y [encoding convertto wtf-8 \uDE02\xE9]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 5 edb882c3a9}
test encoding-15.11 {UtfToUtfProc low surrogate character output} {
set x \uDA02\xE9
- set y [encoding convertto utf-8 \uDA02\xE9]
+ set y [encoding convertto wtf-8 \uDA02\xE9]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 5 eda882c3a9}
test encoding-15.12 {UtfToUtfProc high surrogate character output} {
set x \uDE02Y
- set y [encoding convertto utf-8 \uDE02Y]
+ set y [encoding convertto wtf-8 \uDE02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 4 edb88259}
test encoding-15.13 {UtfToUtfProc low surrogate character output} {
set x \uDA02Y
- set y [encoding convertto utf-8 \uDA02Y]
+ set y [encoding convertto wtf-8 \uDA02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 4 eda88259}
test encoding-15.14 {UtfToUtfProc high surrogate character output} {
set x \uDE02
- set y [encoding convertto utf-8 \uDE02]
+ set y [encoding convertto wtf-8 \uDE02]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {1 3 edb882}
test encoding-15.15 {UtfToUtfProc low surrogate character output} {
set x \uDA02
- set y [encoding convertto utf-8 \uDA02]
+ set y [encoding convertto wtf-8 \uDA02]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {1 3 eda882}
@@ -409,6 +409,66 @@ test encoding-15.17 {UtfToUtfProc emoji character output} {
binary scan $y H* z
list [string length $y] $z
} {4 f09f9882}
+test encoding-15.18 {UtfToUtfProc emoji character output} {
+ set x \uDE02\uD83D\uDE02\uD83D
+ set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D]
+ binary scan $y H* z
+ list [string length $y] $z
+} {10 efbfbdf09f9882efbfbd}
+test encoding-15.19 {UtfToUtfProc emoji character output} {
+ set x \uDE02\uD83D\uD83D
+ set y [encoding convertto utf-8 \uDE02\uD83D\uD83D]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {3 9 efbfbdefbfbdefbfbd}
+test encoding-15.20 {UtfToUtfProc emoji character output} {
+ set x \uDE02\uD83D\xE9
+ set y [encoding convertto utf-8 \uDE02\uD83D\xE9]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {3 8 efbfbdefbfbdc3a9}
+test encoding-15.21 {UtfToUtfProc emoji character output} {
+ set x \uDE02\uD83DX
+ set y [encoding convertto utf-8 \uDE02\uD83DX]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {3 7 efbfbdefbfbd58}
+test encoding-15.22 {UtfToUtfProc high surrogate character output} {
+ set x \uDE02\xE9
+ set y [encoding convertto utf-8 \uDE02\xE9]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {2 5 efbfbdc3a9}
+test encoding-15.23 {UtfToUtfProc low surrogate character output} {
+ set x \uDA02\xE9
+ set y [encoding convertto utf-8 \uDA02\xE9]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {2 5 efbfbdc3a9}
+test encoding-15.24 {UtfToUtfProc high surrogate character output} {
+ set x \uDE02Y
+ set y [encoding convertto utf-8 \uDE02Y]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {2 4 efbfbd59}
+test encoding-15.25 {UtfToUtfProc low surrogate character output} {
+ set x \uDA02Y
+ set y [encoding convertto utf-8 \uDA02Y]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {2 4 efbfbd59}
+test encoding-15.26 {UtfToUtfProc high surrogate character output} {
+ set x \uDE02
+ set y [encoding convertto utf-8 \uDE02]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {1 3 efbfbd}
+test encoding-15.27 {UtfToUtfProc low surrogate character output} {
+ set x \uDA02
+ set y [encoding convertto utf-8 \uDA02]
+ binary scan $y H* z
+ list [string length $x] [string length $y] $z
+} {1 3 efbfbd}
test encoding-16.1 {Utf16ToUtfProc} -body {
set val [encoding convertfrom utf-16 NN]
@@ -742,7 +802,7 @@ test encoding-28.0 {all encodings load} -body {
llength $name
}
return $count
-} -result [expr {[info exists ::tcl_precision] ? 86 : 85}]
+} -result [expr {[info exists ::tcl_precision] ? 87 : 86}]
runtests
diff --git a/tests/string.test b/tests/string.test
index 0eaa3da..b3b278c 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -525,10 +525,10 @@ test string-6.4.$noComp {string is, too many args} {
} {1 {wrong # args: should be "string is class ?-strict? ?-failindex var? str"}}
test string-6.5.$noComp {string is, class check} {
list [catch {run {string is bogus str}} msg] $msg
-} {1 {bad class "bogus": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, wideinteger, wordchar, or xdigit}}
+} {1 {bad class "bogus": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, unicode, wideinteger, wordchar, or xdigit}}
test string-6.6.$noComp {string is, ambiguous class} {
list [catch {run {string is al str}} msg] $msg
-} {1 {ambiguous class "al": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, wideinteger, wordchar, or xdigit}}
+} {1 {ambiguous class "al": must be alnum, alpha, ascii, control, boolean, dict, digit, double, entier, false, graph, integer, list, lower, print, punct, space, true, upper, unicode, wideinteger, wordchar, or xdigit}}
test string-6.7.$noComp {string is alpha, all ok} {
run {string is alpha -strict -failindex var abc}
} 1
@@ -961,6 +961,28 @@ test string-6.130.1.$noComp {string is entier, false on bad octal} {
test string-6.131.$noComp {string is entier, false on bad hex} {
list [run {string is entier -fail var 0X12345611234123456123456562345612345612345612345612345612345612345612345612345612345345XYZ}] $var
} {0 88}
+test string-6.132.$noComp {string is unicode} {
+ run {string is unicode \U10FFFD\uD7FF\uE000\uFDCF\uFDF0}
+} 1
+test string-6.133.$noComp {string is unicode, upper surrogate} {
+ run {string is unicode \uD800}
+} 0
+test string-6.134.$noComp {string is unicode, lower surrogate} {
+ run {string is unicode \uDFFF}
+} 0
+test string-6.135.$noComp {string is unicode, noncharacter} {
+ run {string is unicode \uFFFE}
+} 0
+test string-6.136.$noComp {string is unicode, noncharacter} {
+ run {string is unicode \uFFFF}
+} 0
+test string-6.137.$noComp {string is unicode, noncharacter} {
+ run {string is unicode \uFDD0}
+} 0
+test string-6.138.$noComp {string is unicode, noncharacter} {
+ run {string is unicode \uFDEF}
+} 0
+
test string-7.1.$noComp {string last, not enough args} {
list [catch {run {string last a}} msg] $msg