summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2020-04-13 18:42:09 (GMT)
committerdgp <dgp@users.sourceforge.net>2020-04-13 18:42:09 (GMT)
commit4214a568d4ce47e17c79050848b51f51b2ffb8df (patch)
treecb8a274856e287b0e9ee9948156c5d544c3f8cc8 /generic
parent01fa998afeaf983e50cf0ab93936a53250a0fa4c (diff)
downloadtcl-4214a568d4ce47e17c79050848b51f51b2ffb8df.zip
tcl-4214a568d4ce47e17c79050848b51f51b2ffb8df.tar.gz
tcl-4214a568d4ce47e17c79050848b51f51b2ffb8df.tar.bz2
Make the comments describing Tcl_UtfPrev more complete and precise.
Diffstat (limited to 'generic')
-rw-r--r--generic/tclUtf.c47
1 files changed, 37 insertions, 10 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 1a8d515..fbdba4c 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -654,15 +654,43 @@ Tcl_UtfNext(
*
* Tcl_UtfPrev --
*
- * Given a pointer to some current location in a UTF-8 string, move
- * backwards one character. This works correctly when the pointer is in
- * the middle of a UTF-8 character.
+ * The aim of this routine is to provide a way to move backward
+ * through a UTF-8 string. The caller is expected to pass non-NULL
+ * pointer arguments start and src. start points to the beginning
+ * of a string, and src >= start points to a location within (or just
+ * past the end) of the string. This routine always returns a
+ * pointer within the string (>= start). When (src == start), it
+ * returns start. When (src > start), it returns a pointer (< src)
+ * and (>= src - TCL_UTF_MAX). Subject to these constraints, the
+ * routine returns a pointer to the earliest byte in the string that
+ * starts a character when characters are read starting at start and
+ * that character might include the byte src[-1]. The routine will
+ * examine only those bytes in the range that might be returned.
+ * It will not examine the byte *src, and because of that cannot
+ * determine for certain in all circumstances whether the character
+ * that begins with the returned pointer will or will not include
+ * the byte src[-1]. In the scenario, where src points to the end of
+ * a buffer being filled, the returned pointer point to either the
+ * final complete character in the string or to the earliest byte
+ * that might start an incomplete character waiting for more bytes to
+ * complete.
+ *
+ * Because this routine always returns a value < src until the point
+ * it is forced to return start, it is useful as a backward iterator
+ * through a string that will always make progress and always be
+ * prevented from running past the beginning of the string.
+ *
+ * In a string where all characters are complete and properly formed,
+ * and the value of src points to the first byte of a character,
+ * repeated Tcl_UtfPrev calls will step to the starting bytes of
+ * characters, one character at a time. Within those limitations,
+ * Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot
+ * be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and
+ * the caller will have to take greater care.
*
* Results:
- * The return value is a pointer to the previous character in the UTF-8
- * string. If the current location was already at the beginning of the
- * string, the return value will also be a pointer to the beginning of
- * the string.
+ * A pointer to the start of a character in the string as described
+ * above.
*
* Side effects:
* None.
@@ -672,9 +700,8 @@ Tcl_UtfNext(
CONST char *
Tcl_UtfPrev(
- CONST char *src, /* The current location in the string. */
- CONST char *start) /* Pointer to the beginning of the string, to
- * avoid going backwards too far. */
+ CONST char *src, /* A location in a UTF-8 string. */
+ CONST char *start) /* Pointer to the beginning of the string */
{
CONST char *look;
int i, byte;