summaryrefslogtreecommitdiffstats
path: root/generic/tclInt.h
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-01 14:20:21 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-01 14:20:21 (GMT)
commitc0381fd712c458f3b4ee476e00d0857bec4cf5cc (patch)
tree073e28ab81f4331a3b1b9dceea126f1e278bce76 /generic/tclInt.h
parenta3d900f322a935841622ac0c8d81a45c91a16e65 (diff)
parent9eaf82b745ac07bc55f7238813c449fc5a447cf8 (diff)
downloadtcl-c0381fd712c458f3b4ee476e00d0857bec4cf5cc.zip
tcl-c0381fd712c458f3b4ee476e00d0857bec4cf5cc.tar.gz
tcl-c0381fd712c458f3b4ee476e00d0857bec4cf5cc.tar.bz2
Fix first part of [ed29806baf]: Tcl_UtfToUniChar reads more than TCL_UTF_MAX bytes.
Tcl_UtfToUniChar() now never reads more than TCL_UTF_MAX bytes any more. Since the UtfToUtf encoder/decoder now uses TclUtfToUCS4() it doesn't join 2 surrogates as 2 x 3-byte sequences any more. Actually, it shouldn't, because such sequences are invalid UTF-8. Therefore, added the ucs2 constraint to testcase encoding-15.4. Let's see how TIP #573 goes, this TIP should make this change official. Other callers of Tcl_UtfToUniChar() needs to be revised for the same problem. Most callers will need to change Tcl_UtfToUniChar() -> TclUtfToUCS4() and Tcl_UtfCharComplete() -> TclUCS4Complete(), but that's not done yet.
Diffstat (limited to 'generic/tclInt.h')
-rw-r--r--generic/tclInt.h9
1 files changed, 6 insertions, 3 deletions
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 2ff644e..9ef1065 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3252,8 +3252,11 @@ MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfCount(int ch);
#if TCL_UTF_MAX > 3
# define TclUtfToUCS4 Tcl_UtfToUniChar
+# define TclUCS4Complete Tcl_UtfCharComplete
#else
- MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr);
+ MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr);
+# define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
+ ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length)))
#endif
MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr);
@@ -4655,8 +4658,8 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[];
#if TCL_UTF_MAX > 3
#define TclUtfToUniChar(str, chPtr) \
- ((((unsigned char) *(str)) < 0x80) ? \
- ((*(chPtr) = (unsigned char) *(str)), 1) \
+ (((UCHAR(*(str))) < 0x80) ? \
+ ((*(chPtr) = UCHAR(*(str))), 1) \
: Tcl_UtfToUniChar(str, chPtr))
#else
#define TclUtfToUniChar(str, chPtr) \