diff options
| author | sebres <sebres@users.sourceforge.net> | 2024-03-20 17:56:25 (GMT) |
|---|---|---|
| committer | sebres <sebres@users.sourceforge.net> | 2024-03-20 17:56:25 (GMT) |
| commit | 30f26cff8be85483cb7c90b15ce9acc2f4607583 (patch) | |
| tree | 7027a041707de66998975b2ebb200c47e67fcde1 | |
| parent | 4d5b75e119924242a8d98267802d7b57d396de43 (diff) | |
| download | tcl-30f26cff8be85483cb7c90b15ce9acc2f4607583.zip tcl-30f26cff8be85483cb7c90b15ce9acc2f4607583.tar.gz tcl-30f26cff8be85483cb7c90b15ce9acc2f4607583.tar.bz2 | |
optimize TclUtfToUCS4 for single code units (non high surrogates), especially for ascii; fixes performance regression [6811a0081940b76c]
| -rw-r--r-- | generic/tclInt.h | 8 | ||||
| -rw-r--r-- | generic/tclUtf.c | 6 |
2 files changed, 10 insertions, 4 deletions
diff --git a/generic/tclInt.h b/generic/tclInt.h index de92a7d..7efaf80 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3180,7 +3180,7 @@ MODULE_SCOPE int TclTrimLeft(const char *bytes, int numBytes, MODULE_SCOPE int TclTrimRight(const char *bytes, int numBytes, const char *trim, int numTrim); MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); -MODULE_SCOPE int TclUtfToUCS4(const char *, int *); +MODULE_SCOPE int TclpUtfToUCS4(const char *, int *); MODULE_SCOPE int TclUCS4ToUtf(int, char *); MODULE_SCOPE int TclUCS4ToLower(int ch); #if TCL_UTF_MAX == 4 @@ -3995,6 +3995,7 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, const char *file, * the result of Tcl_UtfToUniChar. The ANSI C "prototype" for this macro is: * * MODULE_SCOPE int TclUtfToUniChar(const char *string, Tcl_UniChar *ch); + * MODULE_SCOPE int TclpUtfToUCS4(const char *src, int *ucs4Ptr); *---------------------------------------------------------------- */ @@ -4003,6 +4004,11 @@ MODULE_SCOPE void TclDbInitNewObj(Tcl_Obj *objPtr, const char *file, ((*(chPtr) = UCHAR(*(str))), 1) \ : Tcl_UtfToUniChar(str, chPtr)) +#define TclUtfToUCS4(src, ucs4Ptr) \ + (((UCHAR(*(src))) < 0x80) ? \ + ((*(ucs4Ptr) = UCHAR(*(src))), 1) \ + : TclpUtfToUCS4(src, ucs4Ptr)) + /* *---------------------------------------------------------------- * Macro counterpart of the Tcl_NumUtfChars() function. To be used in speed- diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 6fbeb36..04f7208 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -2462,18 +2462,18 @@ TclUniCharMatch( */ int -TclUtfToUCS4( +TclpUtfToUCS4( const char *src, /* The UTF-8 string. */ int *ucs4Ptr) /* Filled with the UCS4 codepoint represented * by the UTF-8 string. */ { Tcl_UniChar ch = 0; - int len = Tcl_UtfToUniChar(src, &ch); + int len = TclUtfToUniChar(src, &ch); #if TCL_UTF_MAX <= 4 if ((ch & ~0x3FF) == 0xD800) { Tcl_UniChar low = ch; - int len2 = Tcl_UtfToUniChar(src+len, &low); + int len2 = TclUtfToUniChar(src+len, &low); if ((low & ~0x3FF) == 0xDC00) { *ucs4Ptr = (((ch & 0x3FF) << 10) | (low & 0x3FF)) + 0x10000; return len + len2; |
