From 594b85ce18c43c1a0665f90f702fa3d0da4659cf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 10 May 2020 20:58:10 +0000 Subject: Demonstration for documentation bug, and suggestion for improved wording. More explanation will follow in the ticket. --- doc/Utf.3 | 4 +++- generic/tclUtf.c | 12 +----------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/doc/Utf.3 b/doc/Utf.3 index c8c6132..35f9327 100644 --- a/doc/Utf.3 +++ b/doc/Utf.3 @@ -290,7 +290,9 @@ characters. Behavior is undefined if a negative \fIindex\fR is given. \fBTcl_UtfAtIndex\fR returns a pointer to the specified character (not byte) \fIindex\fR in the UTF-8 string \fIsrc\fR. The source string must contain at least \fIindex\fR characters. This is equivalent to calling -\fBTcl_UtfNext\fR \fIindex\fR times. If a negative \fIindex\fR is given, +\fBTcl_UtfToUniChar\fR \fIindex\fR times, except if the index points to +a lower surrogate preceded by an upper surrogate: In that case, the returned +pointer will point just after the lower surrogate. If a negative \fIindex\fR is given, the return pointer points to the first character in the source string. .PP \fBTcl_UtfBackslash\fR is a utility procedure used by several of the Tcl diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 5951f68..dbcfd6d 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1166,19 +1166,9 @@ Tcl_UtfAtIndex( const char *src, /* The UTF-8 string. */ int index) /* The position of the desired character. */ { - Tcl_UniChar ch = 0; - int len = 0; - while (index-- > 0) { - len = TclUtfToUniChar(src, &ch); - src += len; + src = Tcl_UtfNext(src); } -#if TCL_UTF_MAX <= 3 - if ((ch >= 0xD800) && (len < 3)) { - /* Index points at character following high Surrogate */ - src += TclUtfToUniChar(src, &ch); - } -#endif return src; } -- cgit v0.12