diff options
author | hobbs <hobbs> | 2002-11-12 02:26:29 (GMT) |
---|---|---|
committer | hobbs <hobbs> | 2002-11-12 02:26:29 (GMT) |
commit | 3a517a945ca5d1107633812f169ad5b449535060 (patch) | |
tree | 4f3dd459e34eb75e65384d10e757a8a5bbde00fe /generic | |
parent | b6aa6101cc1c9d53944f03409d39a2310b475be4 (diff) | |
download | tcl-3a517a945ca5d1107633812f169ad5b449535060.zip tcl-3a517a945ca5d1107633812f169ad5b449535060.tar.gz tcl-3a517a945ca5d1107633812f169ad5b449535060.tar.bz2 |
* generic/tclUtf.c: make use of TclUtfToUniChar macro throughout
the functions, and add extra optimization to Tcl_NumUtfChars for
one-byte/char case.
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclUtf.c | 53 |
1 files changed, 31 insertions, 22 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 0fccf95..72a23ca 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtf.c,v 1.28 2002/08/05 03:24:41 dgp Exp $ + * RCS: @(#) $Id: tclUtf.c,v 1.29 2002/11/12 02:26:29 hobbs Exp $ */ #include "tclInt.h" @@ -423,7 +423,7 @@ Tcl_UtfToUniCharDString(string, length, dsPtr) w = wString; end = string + length; for (p = string; p < end; ) { - p += Tcl_UtfToUniChar(p, w); + p += TclUtfToUniChar(p, w); w++; } *w = '\0'; @@ -490,27 +490,36 @@ Tcl_NumUtfChars(str, len) { Tcl_UniChar ch; register Tcl_UniChar *chPtr = &ch; - register int n; - int i; + register int i; /* * The separate implementations are faster. + * + * Since this is a time-sensitive function, we also do the check for + * the single-byte char case specially. */ - + i = 0; if (len < 0) { while (1) { - str += Tcl_UtfToUniChar(str, chPtr); + str += TclUtfToUniChar(str, chPtr); if (ch == '\0') { break; } i++; } } else { + register int n; + while (len > 0) { - n = Tcl_UtfToUniChar(str, chPtr); - len -= n; - str += n; + if (UCHAR(*str) < 0xC0) { + len--; + str++; + } else { + n = Tcl_UtfToUniChar(str, chPtr); + len -= n; + str += n; + } i++; } } @@ -545,7 +554,7 @@ Tcl_UtfFindFirst(string, ch) Tcl_UniChar find; while (1) { - len = Tcl_UtfToUniChar(string, &find); + len = TclUtfToUniChar(string, &find); if (find == ch) { return string; } @@ -587,7 +596,7 @@ Tcl_UtfFindLast(string, ch) last = NULL; while (1) { - len = Tcl_UtfToUniChar(string, &find); + len = TclUtfToUniChar(string, &find); if (find == ch) { last = string; } @@ -625,7 +634,7 @@ Tcl_UtfNext(str) { Tcl_UniChar ch; - return str + Tcl_UtfToUniChar(str, &ch); + return str + TclUtfToUniChar(str, &ch); } /* @@ -706,7 +715,7 @@ Tcl_UniCharAtIndex(src, index) while (index >= 0) { index--; - src += Tcl_UtfToUniChar(src, &ch); + src += TclUtfToUniChar(src, &ch); } return ch; } @@ -737,7 +746,7 @@ Tcl_UtfAtIndex(src, index) while (index > 0) { index--; - src += Tcl_UtfToUniChar(src, &ch); + src += TclUtfToUniChar(src, &ch); } return src; } @@ -825,7 +834,7 @@ Tcl_UtfToUpper(str) src = dst = str; while (*src) { - bytes = Tcl_UtfToUniChar(src, &ch); + bytes = TclUtfToUniChar(src, &ch); upChar = Tcl_UniCharToUpper(ch); /* @@ -878,7 +887,7 @@ Tcl_UtfToLower(str) src = dst = str; while (*src) { - bytes = Tcl_UtfToUniChar(src, &ch); + bytes = TclUtfToUniChar(src, &ch); lowChar = Tcl_UniCharToLower(ch); /* @@ -934,7 +943,7 @@ Tcl_UtfToTitle(str) src = dst = str; if (*src) { - bytes = Tcl_UtfToUniChar(src, &ch); + bytes = TclUtfToUniChar(src, &ch); titleChar = Tcl_UniCharToTitle(ch); if (bytes < UtfCount(titleChar)) { @@ -946,7 +955,7 @@ Tcl_UtfToTitle(str) src += bytes; } while (*src) { - bytes = Tcl_UtfToUniChar(src, &ch); + bytes = TclUtfToUniChar(src, &ch); lowChar = Tcl_UniCharToLower(ch); if (bytes < UtfCount(lowChar)) { @@ -1041,8 +1050,8 @@ Tcl_UtfNcmp(cs, ct, n) * This should be called only when both strings are of * at least n chars long (no need for \0 check) */ - cs += Tcl_UtfToUniChar(cs, &ch1); - ct += Tcl_UtfToUniChar(ct, &ch2); + cs += TclUtfToUniChar(cs, &ch1); + ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { return (ch1 - ch2); } @@ -1081,8 +1090,8 @@ Tcl_UtfNcasecmp(cs, ct, n) * This should be called only when both strings are of * at least n chars long (no need for \0 check) */ - cs += Tcl_UtfToUniChar(cs, &ch1); - ct += Tcl_UtfToUniChar(ct, &ch2); + cs += TclUtfToUniChar(cs, &ch1); + ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { ch1 = Tcl_UniCharToLower(ch1); ch2 = Tcl_UniCharToLower(ch2); |