summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2020-05-06 21:08:35 (GMT)
committerdgp <dgp@users.sourceforge.net>2020-05-06 21:08:35 (GMT)
commitc2fa3a3fa94ad6516014f5376cb9d97e8b5550bb (patch)
tree2b228714202155304bfbf26357f9a3c5ea8bb33c
parent391bf996873721fdcb8d68003d96121b378d2654 (diff)
downloadtcl-c2fa3a3fa94ad6516014f5376cb9d97e8b5550bb.zip
tcl-c2fa3a3fa94ad6516014f5376cb9d97e8b5550bb.tar.gz
tcl-c2fa3a3fa94ad6516014f5376cb9d97e8b5550bb.tar.bz2
Tighten optimization in Tcl_NumUtfChars. Explain in comments.
-rw-r--r--generic/tclUtf.c39
1 files changed, 25 insertions, 14 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 24ec3d2..6a142bc 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -512,38 +512,49 @@ Tcl_UtfCharComplete(
int
Tcl_NumUtfChars(
- register CONST char *src, /* The UTF-8 string to measure. */
- int length) /* The length of the string in bytes, or -1
- * for strlen(string). */
+ CONST char *src, /* The UTF-8 string to measure. */
+ int length) /* The length of the string in bytes, or -1
+ * for strlen(string). */
{
Tcl_UniChar ch;
- register int i = 0;
-
- /*
- * The separate implementations are faster.
- *
- * Since this is a time-sensitive function, we also do the check for the
- * single-byte char case specially.
- */
+ int i = 0;
if (length < 0) {
+ /* string is NUL-terminated, so TclUtfToUniChar calls are safe. */
while ((*src != '\0') && (i < INT_MAX)) {
src += TclUtfToUniChar(src, &ch);
i++;
}
} else {
- register const char *endPtr = src + length - TCL_UTF_MAX;
+ /* Will return value between 0 and length. No overflow checks. */
+
+ /* Pointer to the end of string. Never read endPtr[0] */
+ const char *endPtr = src + length;
+ /* Pointer to breakpoint in scan where optimization is lost */
+ const char *optPtr = endPtr - TCL_UTF_MAX + 1;
- while (src < endPtr) {
+ /*
+ * Optimize away the call in this loop. Justified because...
+ * when (src < optPtr), (endPtr - src) > (endPtr - optPtr)
+ * By initialization above (endPtr - optPtr) = TCL_UTF_MAX - 1
+ * So (endPtr - src) >= TCL_UTF_MAX, and passing that to
+ * Tcl_UtfCharComplete we know will cause return of 1.
+ */
+ while ((src < optPtr)
+ /* && Tcl_UtfCharComplete(src, endPtr - src) */ ) {
src += TclUtfToUniChar(src, &ch);
i++;
}
- endPtr += TCL_UTF_MAX;
+ /* Loop over the remaining string where call must happen */
while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
src += TclUtfToUniChar(src, &ch);
i++;
}
if (src < endPtr) {
+ /*
+ * String ends in an incomplete UTF-8 sequence.
+ * Count every byte in it.
+ */
i += endPtr - src;
}
}