From 99c0854650e170e35db77c4984ab41fb6b398f26 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 12 May 2020 21:17:05 +0000 Subject: Little tweak to Tcl_UniCharAtIndex(): Protect against negative index, return -1 in that case. --- doc/Utf.3 | 3 ++- generic/tclUtf.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/Utf.3 b/doc/Utf.3 index c8c6132..1faab15 100644 --- a/doc/Utf.3 +++ b/doc/Utf.3 @@ -285,7 +285,8 @@ byte \fIsrc[0]\fR nor the byte \fIstart[-1]\fR nor the byte Pascal Ord() function. It returns the Unicode character represented at the specified character (not byte) \fIindex\fR in the UTF-8 string \fIsrc\fR. The source string must contain at least \fIindex\fR -characters. Behavior is undefined if a negative \fIindex\fR is given. +characters. If a negative \fIindex\fR is given or \fIindex\fR points +to the second half of a surrogate pair, it returns -1. .PP \fBTcl_UtfAtIndex\fR returns a pointer to the specified character (not byte) \fIindex\fR in the UTF-8 string \fIsrc\fR. The source string must diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 281753c..309a344 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1145,6 +1145,9 @@ Tcl_UniCharAtIndex( Tcl_UniChar ch = 0; int i = 0; + if (index < 0) { + return -1; + } while (index-- > 0) { i = TclUtfToUniChar(src, &ch); src += i; -- cgit v0.12