From 2ea2ef0609d7e306bf981672cda2e66782ed4db3 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 11 Mar 2021 12:19:14 +0000 Subject: Backport Tcl_UtfCharComplete() functionality from 8.6 for TCL_UTF_MAX>3. This makes Tcl_UtfCharComplete() usable to protect Tcl_UtfNext() calls for overflow. No change for TCL_UTF_MAX=3 (default build) --- generic/tclUtf.c | 19 ++++++++++++++++++- tests/utf.test | 6 +++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 03d0f3a..efbd383 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -76,6 +76,23 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1 }; +#if TCL_UTF_MAX > 3 +static const unsigned char complete[256] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +/* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/* End of "continuation byte section" */ + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 +}; +#else +# define complete totalBytes +#endif + /* * Functions used only in this module. */ @@ -492,7 +509,7 @@ Tcl_UtfCharComplete( * a complete UTF-8 character. */ int length) /* Length of above string in bytes. */ { - return length >= totalBytes[UCHAR(*src)]; + return length >= complete[UCHAR(*src)]; } /* diff --git a/tests/utf.test b/tests/utf.test index 06ac329..76b6847 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -8,8 +8,8 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. -if {[lsearch [namespace children] ::tcltest] == -1} { - package require tcltest 2 +if {"::tcltest" ni [namespace children]} { + package require tcltest 2.5 namespace import -force ::tcltest::* } @@ -614,7 +614,7 @@ test utf-6.117.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} test utf-6.118 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { testutfnext [testbytestring \xA0]G 0 } 0 -test utf-6.119 {Tcl_UtfNext, read limits} {testutfnext testbytestring} { +test utf-6.119 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { testutfnext [testbytestring \xA0]G 1 } 1 test utf-6.120 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} { -- cgit v0.12