diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-03 22:16:21 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-03 22:16:21 (GMT) |
commit | c17661c31e3f4fac5a70dd487b4c9b3372ee5e5b (patch) | |
tree | 851e3b06e06747b32ec58cbcc95d1061ebc60f1d /generic/tclInt.h | |
parent | 9501890b6c738830781eebe5d8bdcff2d6a0068c (diff) | |
download | tcl-c17661c31e3f4fac5a70dd487b4c9b3372ee5e5b.zip tcl-c17661c31e3f4fac5a70dd487b4c9b3372ee5e5b.tar.gz tcl-c17661c31e3f4fac5a70dd487b4c9b3372ee5e5b.tar.bz2 |
Re-join utf-6.93.0 and utf-6.93.1 (please disregard comment in previous commit, it was not correct).
Perfectionalize TclUtfToUCS4()/TclUCS4Complete() and new (internal) function TclUCS4ToUtf(). They can help preventing bugs regarding splitting/joining surrogates. Used them in a few more places.
Diffstat (limited to 'generic/tclInt.h')
-rw-r--r-- | generic/tclInt.h | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/generic/tclInt.h b/generic/tclInt.h index 593d878..6f024a6 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3184,8 +3184,15 @@ MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes, const char *trim, int numTrim); MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); +MODULE_SCOPE int TclUCS4ToUtf(int, char *); + +/* + * Bytes F0-F4 are start-bytes for 4-byte sequences. + * Byte 0xED can be the start-byte of an upper surrogate. In that case, + * TclUtfToUCS4() might read the lower surrogate following it too. + */ # define TclUCS4Complete(src, length) (((unsigned)(UCHAR(*(src)) - 0xF0) < 5) \ - ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length))) + ? ((length) >= 4) : (UCHAR(*(src)) == 0xED) ? ((length) >= 6) : Tcl_UtfCharComplete((src), (length))) MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData); MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr); MODULE_SCOPE int TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr, |