1 files changed, 41 insertions, 11 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 12d764c..22e26d2 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -675,15 +675,43 @@ Tcl_UtfNext(
  *
  * Tcl_UtfPrev --
  *
- *	Given a pointer to some current location in a UTF-8 string, move
- *	backwards one character. This works correctly when the pointer is in
- *	the middle of a UTF-8 character.
+ *	The aim of this routine is to provide a way to move backward
+ *	through a UTF-8 string. The caller is expected to pass non-NULL
+ *	pointer arguments start and src. start points to the beginning
+ *	of a string, and src >= start points to a location within (or just
+ *	past the end) of the string. This routine always returns a
+ *	pointer within the string (>= start).  When (src == start), it
+ *	returns start. When (src > start), it returns a pointer (< src)
+ *	and (>= src - TCL_UTF_MAX).  Subject to these constraints, the
+ *	routine returns a pointer to the earliest byte in the string that
+ *	starts a character when characters are read starting at start and
+ *	that character might include the byte src[-1]. The routine will
+ *	examine only those bytes in the range that might be returned.
+ *	It will not examine the byte *src, and because of that cannot 
+ *	determine for certain in all circumstances whether the character
+ *	that begins with the returned pointer will or will not include
+ *	the byte src[-1]. In the scenario, where src points to the end of
+ *	a buffer being filled, the returned pointer point to either the
+ *	final complete character in the string or to the earliest byte
+ *	that might start an incomplete character waiting for more bytes to
+ *	complete.
+ *
+ *	Because this routine always returns a value < src until the point
+ *	it is forced to return start, it is useful as a backward iterator
+ *	through a string that will always make progress and always be
+ *	prevented from running past the beginning of the string.
+ *
+ *	In a string where all characters are complete and properly formed,
+ *	and the value of src points to the first byte of a character, 
+ *	repeated Tcl_UtfPrev calls will step to the starting bytes of
+ *	characters, one character at a time. Within those limitations,
+ *	Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot
+ *	be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and
+ *	the caller will have to take greater care.
  *
  * Results:
- *	The return value is a pointer to the previous character in the UTF-8
- *	string. If the current location was already at the beginning of the
- *	string, the return value will also be a pointer to the beginning of
- *	the string.
+ *	A pointer to the start of a character in the string as described
+ *	above.
  *
  * Side effects:
  *	None.
@@ -693,9 +721,8 @@ Tcl_UtfNext(
 
 const char *
 Tcl_UtfPrev(
-    const char *src,		/* The current location in the string. */
-    const char *start)		/* Pointer to the beginning of the string, to
-				 * avoid going backwards too far. */
+    const char *src,		/* A location in a UTF-8 string. */
+    const char *start)		/* Pointer to the beginning of the string */
 {
     const char *look;
     int i, byte;
@@ -713,6 +740,9 @@ Tcl_UtfPrev(
 	    break;
 	}
 	if (byte >= 0xC0) {
+	    if (totalBytes[byte] <= i) {
+		break;
+	    }
 	    return look;
 	}
 	look--;
@@ -1692,7 +1722,7 @@ Tcl_UniCharIsSpace(
      */
 
     if (ch < 0x80) {
-	return TclIsSpaceProc((char) ch);
+	return TclIsSpaceProcM((char) ch);
 #if TCL_UTF_MAX > 3
     } else if (UNICODE_OUT_OF_RANGE(ch)) {
 	return 0;