diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-04-17 10:32:52 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-04-17 10:32:52 (GMT) |
commit | 0c272ad2ce2e910b0642d1a2691ddb19e63291fb (patch) | |
tree | a612a54786dbdd46b99c7fde24a183fc5b1d93e6 | |
parent | 98700eaf2b0d54d262c427dd5dd30133e6a14fbd (diff) | |
parent | b4ecc6b747780c309fbf05d525ecb3c5f74f3ff2 (diff) | |
download | tcl-0c272ad2ce2e910b0642d1a2691ddb19e63291fb.zip tcl-0c272ad2ce2e910b0642d1a2691ddb19e63291fb.tar.gz tcl-0c272ad2ce2e910b0642d1a2691ddb19e63291fb.tar.bz2 |
More test-cases. Mark test-case utf-2.11 as "knownBug", doesn't give the right answer for any TCL_UTC_MAX value. TODO: To Be Fixed! (Don ????)
Fix build/testcase for TCL_UTF_MAX=6 (testcase is OK, "string length" implementation was not!)
-rw-r--r-- | generic/tclUtf.c | 12 | ||||
-rw-r--r-- | tests/utf.test | 31 |
2 files changed, 41 insertions, 2 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 4b5d500..94fa5f6 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -598,7 +598,11 @@ Tcl_NumUtfChars( if (length < 0) { while ((*src != '\0') && (i < INT_MAX)) { next = TclUtfNext(src); +#if TCL_UTF_MAX > 4 + i++; +#else i += 1 + ((next - src) > 3); +#endif src = next; } } else { @@ -606,13 +610,21 @@ Tcl_NumUtfChars( while (src < endPtr) { next = TclUtfNext(src); +#if TCL_UTF_MAX > 4 + i++; +#else i += 1 + ((next - src) > 3); +#endif src = next; } endPtr += /*TCL_UTF_MAX*/ 4; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { next = TclUtfNext(src); +#if TCL_UTF_MAX > 4 + i++; +#else i += 1 + ((next - src) > 3); +#endif src = next; } if (src < endPtr) { diff --git a/tests/utf.test b/tests/utf.test index c584ea1..0ba2b85 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -94,9 +94,9 @@ test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { string length [testbytestring "\xF0\x8F\xBF\xBF"] } {4} -test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring { +test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} {testbytestring knownBug} {# Doesn't work with any TCL_UTF_MAX value string length [testbytestring "\xF4\x90\x80\x80"] -} {2} +} {4} test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring { string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"] } {5} @@ -419,8 +419,17 @@ test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} { testutfnext \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { + testutfnext \xA0\xA0 +} 1 +test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { testutfnext \xE8\xA0\xA0 1 } 2 +test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { + testutfnext \x80\x80 +} 1 +test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} { + testutfnext \xF0\x80\x80 1 +} 2 testConstraint testutfprev [llength [info commands testutfprev]] @@ -505,6 +514,9 @@ test utf-7.11.1 {Tcl_UtfPrev} testutfprev { test utf-7.11.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xE8\xA0\xF8\xA0 3 } 1 +test utf-7.11.3 {Tcl_UtfPrev} testutfprev { + testutfprev A\xE8\xA0\xF8 3 +} 1 test utf-7.12 {Tcl_UtfPrev} testutfprev { testutfprev A\xD0\xA0 } 1 @@ -593,9 +605,15 @@ test utf-7.26 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\x80\x80 } 3 test utf-7.27 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0\x80 +} 2 +test utf-7.27.1 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\x80\x80 3 } 2 test utf-7.28 {Tcl_UtfPrev -- overlong sequence} testutfprev { + testutfprev A\xE0 +} 1 +test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\x80\x80 2 } 1 test utf-7.29 {Tcl_UtfPrev -- overlong sequence} testutfprev { @@ -652,6 +670,15 @@ test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev { test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} testutfprev { testutfprev \xA0\xA0\xA0\xA0 } 3 +test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} { + testutfprev \xE8\xA0 +} 0 +test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} { + testutfprev \xE8\xA0\xA0 2 +} 0 +test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} { + testutfprev \xE8\xA0\x00 2 +} 0 test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { string index abcd 0 |