From 7d2cbe2aef563bac2e041eca4e8918de2bb66ccf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Apr 2020 20:39:05 +0000 Subject: Fix [cc4d805771]: reg-13.17.error fails in 8.5 tip --- generic/regc_lex.c | 2 +- tests/reg.test | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/generic/regc_lex.c b/generic/regc_lex.c index f8fa772..039495a 100644 --- a/generic/regc_lex.c +++ b/generic/regc_lex.c @@ -832,7 +832,7 @@ lexescape( RETV(PLAIN, CHR('\t')); break; case CHR('u'): - c = (uchr) lexdigits(v, 16, 1, 4); + c = (uchr) lexdigits(v, 16, 4, 4); if (ISERR()) { FAILW(REG_EESCAPE); } diff --git a/tests/reg.test b/tests/reg.test index 7e66f6d..6cd2eb3 100644 --- a/tests/reg.test +++ b/tests/reg.test @@ -622,8 +622,7 @@ expectMatch 13.13 P "a\\nb" "a\nb" "a\nb" expectMatch 13.14 P "a\\rb" "a\rb" "a\rb" expectMatch 13.15 P "a\\tb" "a\tb" "a\tb" expectMatch 13.16 P "a\\u0008x" "a\bx" "a\bx" -expectMatch 13.17 P {a\u008x} "a\bx" "a\bx" -expectError 13.17.1 - {a\ux} EESCAPE +expectError 13.17 - {a\u008x} EESCAPE expectMatch 13.18 P "a\\u00088x" "a\b8x" "a\b8x" expectMatch 13.19 P "a\\U00000008x" "a\bx" "a\bx" expectError 13.20 - {a\U0000008x} EESCAPE -- cgit v0.12 From ba884cc1dd1d227fad00c29309a5add78a73b2ba Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Apr 2020 21:03:47 +0000 Subject: Backout [ 649f53b9f4]: This was not meant for 8.6 --- generic/regc_lex.c | 2 +- tests/reg.test | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/generic/regc_lex.c b/generic/regc_lex.c index dc7a012..2208c0e 100644 --- a/generic/regc_lex.c +++ b/generic/regc_lex.c @@ -832,7 +832,7 @@ lexescape( RETV(PLAIN, CHR('\t')); break; case CHR('u'): - c = (uchr) lexdigits(v, 16, 4, 4); + c = (uchr) lexdigits(v, 16, 1, 4); if (ISERR()) { FAILW(REG_EESCAPE); } diff --git a/tests/reg.test b/tests/reg.test index 12cea7e..a95d1e2 100644 --- a/tests/reg.test +++ b/tests/reg.test @@ -625,7 +625,8 @@ expectMatch 13.13 P "a\\nb" "a\nb" "a\nb" expectMatch 13.14 P "a\\rb" "a\rb" "a\rb" expectMatch 13.15 P "a\\tb" "a\tb" "a\tb" expectMatch 13.16 P "a\\u0008x" "a\bx" "a\bx" -expectError 13.17 - {a\u008x} EESCAPE +expectMatch 13.17 P {a\u008x} "a\bx" "a\bx" +expectError 13.17.1 - {a\ux} EESCAPE expectMatch 13.18 P "a\\u00088x" "a\b8x" "a\b8x" expectMatch 13.19 P "a\\U00000008x" "a\bx" "a\bx" expectMatch 13.20 P {a\U0000008x} "a\bx" "a\bx" -- cgit v0.12 From 3ac32056bc3f77e2f2d2af1c7f32ccef2dbf2fda Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Apr 2020 22:40:03 +0000 Subject: Quickfix to Tcl_NumUtfChars(). Barely used in Tcl core. Still needs a better look. Mark two new tests as knownBug. Needs a further look as well. --- generic/tclUtf.c | 16 ++++++++++++++-- tests/utf.test | 4 ++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 1f526f1..ef3fd9e 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -599,12 +599,24 @@ Tcl_NumUtfChars( register const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { - src += TclUtfToUniChar(src, &ch); + if (((unsigned)(unsigned char)*src - 0xF0) < 5) { + /* treat F0 - F4 as single character */ + ch = 0; + src++; + } else { + src += TclUtfToUniChar(src, &ch); + } i++; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { - src += TclUtfToUniChar(src, &ch); + if (((unsigned)(unsigned char)*src - 0xF0) < 5) { + /* treat F0 - F4 as single character */ + ch = 0; + src++; + } else { + src += TclUtfToUniChar(src, &ch); + } i++; } if (src < endPtr) { diff --git a/tests/utf.test b/tests/utf.test index acdd50e..1180bf2 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -470,10 +470,10 @@ test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testu test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { testutfnext -bytestring \x80\x80\x80 } 1 -test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { +test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext knownBug} { testutfnext \xA0\xA0\xA0\xA0 } 1 -test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { +test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext knownBug} { testutfnext \x80\x80\x80\x80 } 1 -- cgit v0.12 From e598b0d32c0f6d2ba5356cbb58d8dcc56c1b2772 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Apr 2020 22:42:30 +0000 Subject: Found problem with utf-6.125/6.126: argument -bytestring was missing in test --- tests/utf.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/utf.test b/tests/utf.test index 1180bf2..c2bc896 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -470,11 +470,11 @@ test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testu test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { testutfnext -bytestring \x80\x80\x80 } 1 -test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext knownBug} { - testutfnext \xA0\xA0\xA0\xA0 +test utf-6.125 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { + testutfnext -bytestring \xA0\xA0\xA0\xA0 } 1 -test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} {testutfnext knownBug} { - testutfnext \x80\x80\x80\x80 +test utf-6.126 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { + testutfnext -bytestring \x80\x80\x80\x80 } 1 test utf-7.1 {Tcl_UtfPrev} testutfprev { -- cgit v0.12