diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-04-20 15:20:55 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-04-20 15:20:55 (GMT) |
| commit | 4f2621d9d59b2df9183fc4a90bb530dbccd18fc2 (patch) | |
| tree | 89d59c07d835e1eee3c358e7f8aca4f6915b807a | |
| parent | effab20437479dde3420488467f7ab773743d849 (diff) | |
| download | tcl-4f2621d9d59b2df9183fc4a90bb530dbccd18fc2.zip tcl-4f2621d9d59b2df9183fc4a90bb530dbccd18fc2.tar.gz tcl-4f2621d9d59b2df9183fc4a90bb530dbccd18fc2.tar.bz2 | |
Proposed fix for [c11e0c5ce4]: Regression in Tcl_UtfCharComplete.
| -rw-r--r-- | generic/tclUtf.c | 32 | ||||
| -rw-r--r-- | tests/utf.test | 136 |
2 files changed, 123 insertions, 45 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index aa949ca..842744d 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -64,17 +64,6 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 -}; - -static const unsigned char complete[256] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, #if TCL_UTF_MAX > 4 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -84,7 +73,11 @@ static const unsigned char complete[256] = { #endif 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 4,4,4,4,4, +#if TCL_UTF_MAX > 3 + 4,4,4,4,4, +#else + 1,1,1,1,1, +#endif 1,1,1,1,1,1,1,1,1,1,1 }; @@ -558,7 +551,7 @@ Tcl_UtfCharComplete( * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { - return length >= complete[(unsigned char)*src]; + return length >= totalBytes[(unsigned char)*src]; } /* @@ -606,7 +599,7 @@ Tcl_NumUtfChars( src = next; } } else { - register const char *endPtr = src + length - /*TCL_UTF_MAX*/ 4; + register const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { next = TclUtfNext(src); @@ -617,7 +610,7 @@ Tcl_NumUtfChars( #endif src = next; } - endPtr += /*TCL_UTF_MAX*/ 4; + endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { next = TclUtfNext(src); #if TCL_UTF_MAX > 4 @@ -895,15 +888,18 @@ Tcl_UtfPrev( /* Continue the search backwards... */ look--; - } while (trailBytesSeen < /* was TCL_UTF_MAX */ 4); + } while (trailBytesSeen < TCL_UTF_MAX); /* - * We've seen 4 (was TCL_UTF_MAX) trail bytes, so we know there will not be a + * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a * properly formed byte sequence to find, and we can stop looking, * accepting the fallback. */ - +#if TCL_UTF_MAX < 4 + return src - TCL_UTF_MAX; +#else return fallback; +#endif } /* diff --git a/tests/utf.test b/tests/utf.test index f56fabc..3301dde 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -21,6 +21,7 @@ testConstraint testbytestring [llength [info commands testbytestring]] catch {unset x} # Some tests require support for 4-byte UTF-8 sequences +testConstraint smallutf [expr {[format %c 0x010000] == "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] != "\uFFFD"}] testConstraint tip389 [expr {[string length \U010000] == 2}] @@ -361,7 +362,10 @@ test utf-6.67 {Tcl_UtfNext} testutfnext { test utf-6.68 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0G } 1 -test utf-6.69 {Tcl_UtfNext} testutfnext { +test utf-6.69 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0 +} 1 +test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0 } 4 test utf-6.70 {Tcl_UtfNext} testutfnext { @@ -376,22 +380,40 @@ test utf-6.71 {Tcl_UtfNext} testutfnext { test utf-6.73 {Tcl_UtfNext} testutfnext { testutfnext \xF4\xA0\xA0\xF8 } 1 -test utf-6.74 {Tcl_UtfNext} testutfnext { +test utf-6.74 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0G +} 1 +test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0G } 4 -test utf-6.75 {Tcl_UtfNext} testutfnext { +test utf-6.75 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0\xA0 +} 1 +test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0\xA0 } 4 -test utf-6.76 {Tcl_UtfNext} testutfnext { +test utf-6.76 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0\xD0 +} 1 +test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0\xD0 } 4 -test utf-6.77 {Tcl_UtfNext} testutfnext { +test utf-6.77 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0\xE8 +} 1 +test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0\xE8 } 4 -test utf-6.78 {Tcl_UtfNext} testutfnext { +test utf-6.78 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0\xF4 +} 1 +test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0\xF4 } 4 -test utf-6.79 {Tcl_UtfNext} testutfnext { +test utf-6.79 {Tcl_UtfNext} {testutfnext smallutf} { + testutfnext \xF4\xA0\xA0\xA0G\xF8 +} 1 +test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} { testutfnext \xF4\xA0\xA0\xA0G\xF8 } 4 test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext { @@ -415,7 +437,10 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext { test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext { testutfnext \xF0\x80\x80\x80 } 1 -test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} { +test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext smallutf} { + testutfnext \xF0\x90\x80\x80 +} 1 +test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} { testutfnext \xF0\x90\x80\x80 } 4 test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} { @@ -508,13 +533,22 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev { test utf-7.9.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xF8\xA0 3 } 2 -test utf-7.10 {Tcl_UtfPrev} testutfprev { +test utf-7.10 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0 +} 2 +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0\xA0\xA0 3 +} 2 +test utf-7.10.2 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0\xF8\xA0 3 +} 2 +test utf-7.10 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0 } 1 -test utf-7.10.1 {Tcl_UtfPrev} testutfprev { +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0\xA0\xA0 3 } 1 -test utf-7.10.2 {Tcl_UtfPrev} testutfprev { +test utf-7.10.2 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0\xF8\xA0 3 } 1 test utf-7.11 {Tcl_UtfPrev} testutfprev { @@ -556,13 +590,22 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev { test utf-7.14.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xF8\xA0\xA0\xF8 4 } 3 -test utf-7.15 {Tcl_UtfPrev} testutfprev { +test utf-7.15 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0\xA0 +} 3 +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0\xA0\xA0 4 +} 3 +test utf-7.15.2 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0\xA0\xF8 4 +} 3 +test utf-7.15.3 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0\xA0 } 1 -test utf-7.15.1 {Tcl_UtfPrev} testutfprev { +test utf-7.15.4 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0\xA0\xA0 4 } 1 -test utf-7.15.2 {Tcl_UtfPrev} testutfprev { +test utf-7.15.5 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0\xA0\xF8 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { @@ -583,28 +626,52 @@ test utf-7.17.1 {Tcl_UtfPrev} testutfprev { test utf-7.17.2 {Tcl_UtfPrev} testutfprev { testutfprev A\xD0\xA0\xA0\xF8 4 } 3 -test utf-7.18 {Tcl_UtfPrev} testutfprev { +test utf-7.18 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xA0\xA0\xA0 +} 1 +test utf-7.18.1 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xA0\xA0\xA0\xA0 4 +} 1 +test utf-7.18.2 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xA0\xA0\xA0\xF8 4 +} 1 +test utf-7.18.3 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xA0\xA0\xA0 } 3 -test utf-7.18.1 {Tcl_UtfPrev} testutfprev { +test utf-7.18.4 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xA0\xA0\xA0\xA0 4 } 3 -test utf-7.18.2 {Tcl_UtfPrev} testutfprev { +test utf-7.18.5 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xA0\xA0\xA0\xF8 4 } 3 -test utf-7.19 {Tcl_UtfPrev} testutfprev { +test utf-7.19 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF8\xA0\xA0\xA0 +} 2 +test utf-7.19.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF8\xA0\xA0\xA0 } 4 -test utf-7.20 {Tcl_UtfPrev} testutfprev { +test utf-7.20 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xF4\xA0\xA0\xA0 +} 2 +test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xF4\xA0\xA0\xA0 } 1 -test utf-7.21 {Tcl_UtfPrev} testutfprev { +test utf-7.21 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xE8\xA0\xA0\xA0 +} 2 +test utf-7.21.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xE8\xA0\xA0\xA0 } 4 -test utf-7.22 {Tcl_UtfPrev} testutfprev { +test utf-7.22 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xD0\xA0\xA0\xA0 +} 2 +test utf-7.22.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xD0\xA0\xA0\xA0 } 4 -test utf-7.23 {Tcl_UtfPrev} testutfprev { +test utf-7.23 {Tcl_UtfPrev} {testutfprev smallutf} { + testutfprev A\xA0\xA0\xA0\xA0 +} 2 +test utf-7.23.1 {Tcl_UtfPrev} {testutfprev fullutf} { testutfprev A\xA0\xA0\xA0\xA0 } 4 test utf-7.24 {Tcl_UtfPrev -- overlong sequence} testutfprev { @@ -628,7 +695,10 @@ test utf-7.28 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\x80\x80 2 } 1 -test utf-7.29 {Tcl_UtfPrev -- overlong sequence} testutfprev { +test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { + testutfprev A\xF0\x80\x80\x80 +} 2 +test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x80\x80\x80 } 4 test utf-7.30 {Tcl_UtfPrev -- overlong sequence} testutfprev { @@ -658,13 +728,22 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev { test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xE0\xA0\x80 2 } 1 -test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { +test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { + testutfprev A\xF0\x90\x80\x80 +} 2 +test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 } 1 -test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { +test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { + testutfprev A\xF0\x90\x80\x80 4 +} 3 +test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 4 } 1 -test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev} { +test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} { + testutfprev A\xF0\x90\x80\x80 3 +} 2 +test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} { testutfprev A\xF0\x90\x80\x80 3 } 1 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev { @@ -679,7 +758,10 @@ test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} testutfprev { test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev { testutfprev \xA0\xA0\xA0 } 2 -test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} testutfprev { +test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev smallutf} { + testutfprev \xA0\xA0\xA0\xA0 +} 1 +test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev fullutf} { testutfprev \xA0\xA0\xA0\xA0 } 3 test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} { |
