From ec1723eeadcf9efe52b0f81a65d683dff9b160c5 Mon Sep 17 00:00:00 2001
From: "jan.nijtmans" <nijtmans@users.sourceforge.net>
Date: Sat, 18 Apr 2020 12:46:54 +0000
Subject: Update documentation of Tcl_UtfPrev/Tcl_UtfNext back to how it was.
 Will be updated later, when implementation is ready and agreed upon.

---
 doc/Utf.3        | 37 +++++++++++------------------
 generic/tclUtf.c | 72 ++++++++++++++++++++------------------------------------
 2 files changed, 39 insertions(+), 70 deletions(-)

diff --git a/doc/Utf.3 b/doc/Utf.3
index cb82699..334fa6f 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -3,7 +3,7 @@
 '\"
 '\" See the file "license.terms" for information on usage and redistribution
 '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
-'\" 
+'\"
 .TH Utf 3 "8.1" Tcl "Tcl Library Procedures"
 .so man.macros
 .BS
@@ -13,7 +13,7 @@ Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_Ut
 .nf
 \fB#include <tcl.h>\fR
 .sp
-typedef ... Tcl_UniChar;
+typedef ... \fBTcl_UniChar\fR;
 .sp
 int
 \fBTcl_UniCharToUtf\fR(\fIch, buf\fR)
@@ -48,7 +48,7 @@ int
 int
 \fBTcl_UtfCharComplete\fR(\fIsrc, length\fR)
 .sp
-int 
+int
 \fBTcl_NumUtfChars\fR(\fIsrc, length\fR)
 .sp
 const char *
@@ -109,7 +109,7 @@ Pointer to the beginning of a UTF-8 string.
 .AP int index in
 The index of a character (not byte) in the UTF-8 string.
 .AP int *readPtr out
-If non-NULL, filled with the number of bytes in the backslash sequence, 
+If non-NULL, filled with the number of bytes in the backslash sequence,
 including the backslash character.
 .AP char *dst out
 Buffer in which the bytes represented by the backslash sequence are stored.
@@ -141,8 +141,8 @@ source buffer is long enough such that this routine does not run off the
 end and dereference non-existent or random memory; if the source buffer
 is known to be null-terminated, this will not happen.  If the input is
 not in proper UTF-8 format, \fBTcl_UtfToUniChar\fR will store the first
-byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0000 and
-0x00FF and return 1.  
+byte of \fIsrc\fR in \fI*chPtr\fR as a Tcl_UniChar between 0x0080 and
+0x00FF and return 1.
 .PP
 \fBTcl_UniCharToUtfDString\fR converts the given Unicode string
 to UTF-8, storing the result in a previously initialized \fBTcl_DString\fR.
@@ -210,27 +210,18 @@ length is negative, all bytes up to the first null byte are used.
 \fBTcl_UtfFindFirst\fR corresponds to \fBstrchr\fR for UTF-8 strings.  It
 returns a pointer to the first occurrence of the Tcl_UniChar \fIch\fR
 in the null-terminated UTF-8 string \fIsrc\fR.  The null terminator is
-considered part of the UTF-8 string.  
+considered part of the UTF-8 string.
 .PP
 \fBTcl_UtfFindLast\fR corresponds to \fBstrrchr\fR for UTF-8 strings.  It
 returns a pointer to the last occurrence of the Tcl_UniChar \fIch\fR
 in the null-terminated UTF-8 string \fIsrc\fR.  The null terminator is
-considered part of the UTF-8 string.  
+considered part of the UTF-8 string.
 .PP
-\fBTcl_UtfNext\fR is used to step forward through a UTF-8 string.
-If the UTF-8 string is made up entirely of complete, well-formed, and
-valid character byte sequences, and \fIsrc\fR points to the lead byte
-of one of those sequences, then repeated calls of \fBTcl_UtfNext\fR will
-return pointers to the lead bytes of each character in the string, one
-character at a time. In any other circumstance, \fBTcl_UtfNext\fR
-returns \fIsrc\fR+1.  \fBTcl_UtfNext\fR will always read \fIsrc[0]\fR
-and may read as many following bytes (up to a total of \fBTCL_UTF_MAX\fR)
-as needed to find the end of the byte sequence. If the string is
-\fBNUL\fR-terminated, \fBTcl_UtfNext\fR will not read beyond the terminating
-\fBNUL\fR byte. If not, the caller must use the companion routine
-\fBTcl_UtfCharComplete\fR to determine whether there is any risk
-\fBTcl_UtfNext\fR might read beyond the readable memory occupied
-by the string.
+Given \fIsrc\fR, a pointer to some location in a UTF-8 string,
+\fBTcl_UtfNext\fR returns a pointer to the next UTF-8 character in the
+string.  The caller must not ask for the next character after the last
+character in the string if the string is not terminated by a null
+character.
 .PP
 \fBTcl_UtfPrev\fR is used to step backward through but not beyond the
 UTF-8 string that begins at \fIstart\fR.  If the UTF-8 string is made
@@ -262,7 +253,7 @@ characters.  Behavior is undefined if a negative \fIindex\fR is given.
 .PP
 \fBTcl_UtfAtIndex\fR returns a pointer to the specified character (not
 byte) \fIindex\fR in the UTF-8 string \fIsrc\fR.  The source string must
-contain at least \fIindex\fR characters.  This is equivalent to calling 
+contain at least \fIindex\fR characters.  This is equivalent to calling
 \fBTcl_UtfNext\fR \fIindex\fR times.  If a negative \fIindex\fR is given,
 the return pointer points to the first character in the source string.
 .PP
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 1883804..64ee0a8 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -678,35 +678,13 @@ Tcl_UtfFindLast(
  *
  * Tcl_UtfNext --
  *
- *	The aim of this routine is to provide a way to iterate forward
- *	through a UTF-8 string. The caller is expected to pass a non-NULL
- *	pointer argument /src/ which points to a location within a string.
- *	(*src) will be read, so /src/ must not point to an unreadable
- *	location past the end of the string. If /src/ points to the
- *	beginning of a complete, well-formed and valid UTF_8 byte sequence
- *	of no more than TCL_UTF_MAX bytes, Tcl_UtfNext returns the pointer
- *	just past the end of that sequence. In any other circumstance,
- *	Tcl_UtfNext returns /src/+1.
- *
- *	Because this routine always returns a value > /src/, it is useful
- *	as a forward iterator that will always make progress. If the string
- *	is NUL-terminated, Tcl_UtfNext will not read beyond the terminating
- *	NUL character. If it is not NUL-terminated, the caller must make
- *	use of the companion routine Tcl_UtfCharComplete to test whether
- *	there is risk that Tcl_UtfNext will read beyond the end of the string.
- *	Tcl_UtfNext will never read more than TCL_UTF_MAX bytes.
- *
- *	In a string where all characters are complete and properly formed,
- *	and /src/ points to the first byte of a character, repeated
- *	Tcl_UtfNext calls will step to the starting bytes of characters, one
- *	character at a time. Within those limitations, Tcl_UtfPrev and
- *	Tcl_UtfNext are inverses. If either condition cannot be met,
- *	Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the
- *	caller will have to take greater care.
+ *	Given a pointer to some current location in a UTF-8 string, move
+ *	forward one character. The caller must ensure that they are not asking
+ *	for the next character after the last character in the string.
  *
  * Results:
- *	A pointer to the start of the next character in the string (or to
- *	the end of the string) as described above.
+ *	The return value is the pointer to the next character in the UTF-8
+ *	string.
  *
  * Side effects:
  *	None.
@@ -747,37 +725,37 @@ Tcl_UtfNext(
  *
  *	The aim of this routine is to provide a way to move backward
  *	through a UTF-8 string. The caller is expected to pass non-NULL
- *	pointer arguments /start/ and /src/. /start/ points to the beginning
- *	of a string, and /src/ (>= /start/) points to a location within (or
- *	just past the end) of the string. This routine always returns a
- *	pointer within the string (>= /start/).  When (/src/ == /start/),
- *	it returns /start/. When (/src/ > /start/), it returns a pointer
- *	(< /src/) and (>= /src/ - TCL_UTF_MAX).  Subject to these constraints,
- *	the routine returns a pointer to the earliest byte in the string that
- *	starts a character when characters are read starting at /start/ and
+ *	pointer arguments start and src. start points to the beginning
+ *	of a string, and src >= start points to a location within (or just
+ *	past the end) of the string. This routine always returns a
+ *	pointer within the string (>= start).  When (src == start), it
+ *	returns start. When (src > start), it returns a pointer (< src)
+ *	and (>= src - TCL_UTF_MAX).  Subject to these constraints, the
+ *	routine returns a pointer to the earliest byte in the string that
+ *	starts a character when characters are read starting at start and
  *	that character might include the byte src[-1]. The routine will
  *	examine only those bytes in the range that might be returned.
- *	It will not examine the byte (*src), and because of that cannot
+ *	It will not examine the byte *src, and because of that cannot
  *	determine for certain in all circumstances whether the character
  *	that begins with the returned pointer will or will not include
- *	the byte src[-1]. In the scenario where /src/ points to the end of
- *	a buffer being filled, the returned pointer points to either the
+ *	the byte src[-1]. In the scenario, where src points to the end of
+ *	a buffer being filled, the returned pointer point to either the
  *	final complete character in the string or to the earliest byte
  *	that might start an incomplete character waiting for more bytes to
  *	complete.
  *
- *	Because this routine always returns a value < /src/ until the point
- *	it is forced to return /start/, it is useful as a backward iterator
+ *	Because this routine always returns a value < src until the point
+ *	it is forced to return start, it is useful as a backward iterator
  *	through a string that will always make progress and always be
  *	prevented from running past the beginning of the string.
  *
  *	In a string where all characters are complete and properly formed,
- *	and /src/ points to the first byte of a character, repeated
- *	Tcl_UtfPrev calls will step to the starting bytes of characters, one
- *	character at a time. Within those limitations, Tcl_UtfPrev and
- *	Tcl_UtfNext are inverses. If either condition cannot be met,
- *	Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and the
- *	caller will have to take greater care.
+ *	and the value of src points to the first byte of a character,
+ *	repeated Tcl_UtfPrev calls will step to the starting bytes of
+ *	characters, one character at a time. Within those limitations,
+ *	Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot
+ *	be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and
+ *	the caller will have to take greater care.
  *
  * Results:
  *	A pointer to the start of a character in the string as described
@@ -887,7 +865,7 @@ Tcl_UtfPrev(
  *
  * Tcl_UniCharAtIndex --
  *
- *	Returns the Unicode character represented at the specified character
+ *	Returns the Tcl_UniChar represented at the specified character
  *	(not byte) position in the UTF-8 string.
  *
  * Results:
-- 
cgit v0.12