summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclTest.c35
-rw-r--r--tests/utf.test117
2 files changed, 148 insertions, 4 deletions
diff --git a/generic/tclTest.c b/generic/tclTest.c
index 1676bae..322d6b4 100644
--- a/generic/tclTest.c
+++ b/generic/tclTest.c
@@ -6713,7 +6713,8 @@ TestUtfNextCmd(
int objc,
Tcl_Obj *const objv[])
{
- int numBytes;
+ int numBytes; /* Number of bytes supplied in the test string */
+ int offset; /* Number of bytes we are permitted to read */
char *bytes;
const char *result, *first;
char buffer[32];
@@ -6721,20 +6722,46 @@ TestUtfNextCmd(
const char *p = tobetested;
(void)dummy;
- if (objc != 2) {
- Tcl_WrongNumArgs(interp, 1, objv, "bytes");
+ if (objc < 2 || objc > 3) {
+ Tcl_WrongNumArgs(interp, 1, objv, "bytes ?numBytes?");
return TCL_ERROR;
}
+
bytes = (char *) Tcl_GetByteArrayFromObj(objv[1], &numBytes);
+ offset = numBytes +TCL_UTF_MAX -1; /* If no constraint is given, allow
+ * the terminating NUL to limit
+ * operations. */
+
+ if (objc == 3) {
+ if (TCL_OK != TclGetIntForIndex(interp, objv[2], numBytes, &offset)) {
+ return TCL_ERROR;
+ }
+ if (offset < 0) {
+ offset = 0;
+ }
+ if (offset > numBytes +TCL_UTF_MAX -1) {
+ offset = numBytes +TCL_UTF_MAX -1;
+ }
+ }
+
if (numBytes > (int)sizeof(buffer)-2) {
- Tcl_AppendResult(interp, "\"testutfnext\" can only handle 30 bytes", NULL);
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf(
+ "\"testutfnext\" can only handle %d bytes",
+ (int)(sizeof(buffer) - 2)));
return TCL_ERROR;
}
memcpy(buffer + 1, bytes, numBytes);
buffer[0] = buffer[numBytes + 1] = '\x00';
+ if (!Tcl_UtfCharComplete(buffer + 1, offset)) {
+ /* Cannot scan a complete sequence from the data */
+
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(0));
+ return TCL_OK;
+ }
+
first = TclUtfNext(buffer + 1);
while ((buffer[0] = *p++) != '\0') {
/* Run Tcl_UtfNext with many more possible bytes at src[-1], all should give the same result */
diff --git a/tests/utf.test b/tests/utf.test
index 63ae9ee..c739bb4 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -470,6 +470,123 @@ test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testu
test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext {
testutfnext \x80\x80\x80
} 1
+test utf-6.94 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext G 0
+} 0
+test utf-6.95 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xA0 0
+} 0
+test utf-6.96 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext AG 1
+} 1
+test utf-6.97 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext A\xA0 1
+} 1
+test utf-6.98 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xD0\xA0G 1
+} 0
+test utf-6.99 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xD0\xA0G 2
+} 2
+test utf-6.100 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xD0\xA0\xA0 1
+} 0
+test utf-6.101 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xD0\xA0\xA0 2
+} 2
+test utf-6.102 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xE8\xA0\xA0G 1
+} 0
+test utf-6.103 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xE8\xA0\xA0G 2
+} 0
+test utf-6.104 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xE8\xA0\xA0G 3
+} 3
+test utf-6.105 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xE8\xA0\xA0\xA0 1
+} 0
+test utf-6.106 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xE8\xA0\xA0\xA0 2
+} 0
+test utf-6.107 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xE8\xA0\xA0\xA0 3
+} 3
+test utf-6.108.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
+ testutfnext \xF2\xA0\xA0\xA0G 1
+} 1
+test utf-6.108.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} {
+ testutfnext \xF2\xA0\xA0\xA0G 1
+} 0
+test utf-6.109.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
+ testutfnext \xF2\xA0\xA0\xA0G 2
+} 1
+test utf-6.109.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} {
+ testutfnext \xF2\xA0\xA0\xA0G 2
+} 0
+test utf-6.110.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
+ testutfnext \xF2\xA0\xA0\xA0G 3
+} 1
+test utf-6.110.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} {
+ testutfnext \xF2\xA0\xA0\xA0G 3
+} 0
+test utf-6.111.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
+ testutfnext \xF2\xA0\xA0\xA0G 4
+} 1
+test utf-6.111.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} {
+ testutfnext \xF2\xA0\xA0\xA0G 4
+} 4
+test utf-6.112.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
+ testutfnext \xF2\xA0\xA0\xA0\xA0 1
+} 1
+test utf-6.112.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} {
+ testutfnext \xF2\xA0\xA0\xA0\xA0 1
+} 0
+test utf-6.113.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
+ testutfnext \xF2\xA0\xA0\xA0\xA0 2
+} 1
+test utf-6.113.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} {
+ testutfnext \xF2\xA0\xA0\xA0\xA0 2
+} 0
+test utf-6.114.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
+ testutfnext \xF2\xA0\xA0\xA0\xA0 3
+} 1
+test utf-6.114.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} {
+ testutfnext \xF2\xA0\xA0\xA0\xA0 3
+} 0
+test utf-6.115.0 {Tcl_UtfNext, read limits} {testutfnext ucs2} {
+ testutfnext \xF2\xA0\xA0\xA0\xA0 4
+} 1
+test utf-6.115.1 {Tcl_UtfNext, read limits} {testutfnext fullutf} {
+ testutfnext \xF2\xA0\xA0\xA0\xA0 4
+} 4
+test utf-6.116 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xA0G 0
+} 0
+test utf-6.117 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xA0G 1
+} 1
+test utf-6.118 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xA0\xA0 1
+} 1
+test utf-6.119 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xA0\xA0G 2
+} 1
+test utf-6.120 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xA0\xA0\xA0 2
+} 1
+test utf-6.121 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xA0\xA0\xA0G 3
+} 1
+test utf-6.122 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xA0\xA0\xA0\xA0 3
+} 1
+test utf-6.123 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xA0\xA0\xA0\xA0G 4
+} 1
+test utf-6.124 {Tcl_UtfNext, read limits} testutfnext {
+ testutfnext \xA0\xA0\xA0\xA0\xA0 4
+} 1
test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext {
testutfnext \xA0\xA0\xA0\xA0
} 1