diff options
author | dgp <dgp@users.sourceforge.net> | 2020-04-13 18:42:09 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2020-04-13 18:42:09 (GMT) |
commit | 4214a568d4ce47e17c79050848b51f51b2ffb8df (patch) | |
tree | cb8a274856e287b0e9ee9948156c5d544c3f8cc8 /generic | |
parent | 01fa998afeaf983e50cf0ab93936a53250a0fa4c (diff) | |
download | tcl-4214a568d4ce47e17c79050848b51f51b2ffb8df.zip tcl-4214a568d4ce47e17c79050848b51f51b2ffb8df.tar.gz tcl-4214a568d4ce47e17c79050848b51f51b2ffb8df.tar.bz2 |
Make the comments describing Tcl_UtfPrev more complete and precise.
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclUtf.c | 47 |
1 files changed, 37 insertions, 10 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 1a8d515..fbdba4c 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -654,15 +654,43 @@ Tcl_UtfNext( * * Tcl_UtfPrev -- * - * Given a pointer to some current location in a UTF-8 string, move - * backwards one character. This works correctly when the pointer is in - * the middle of a UTF-8 character. + * The aim of this routine is to provide a way to move backward + * through a UTF-8 string. The caller is expected to pass non-NULL + * pointer arguments start and src. start points to the beginning + * of a string, and src >= start points to a location within (or just + * past the end) of the string. This routine always returns a + * pointer within the string (>= start). When (src == start), it + * returns start. When (src > start), it returns a pointer (< src) + * and (>= src - TCL_UTF_MAX). Subject to these constraints, the + * routine returns a pointer to the earliest byte in the string that + * starts a character when characters are read starting at start and + * that character might include the byte src[-1]. The routine will + * examine only those bytes in the range that might be returned. + * It will not examine the byte *src, and because of that cannot + * determine for certain in all circumstances whether the character + * that begins with the returned pointer will or will not include + * the byte src[-1]. In the scenario, where src points to the end of + * a buffer being filled, the returned pointer point to either the + * final complete character in the string or to the earliest byte + * that might start an incomplete character waiting for more bytes to + * complete. + * + * Because this routine always returns a value < src until the point + * it is forced to return start, it is useful as a backward iterator + * through a string that will always make progress and always be + * prevented from running past the beginning of the string. + * + * In a string where all characters are complete and properly formed, + * and the value of src points to the first byte of a character, + * repeated Tcl_UtfPrev calls will step to the starting bytes of + * characters, one character at a time. Within those limitations, + * Tcl_UtfPrev and Tcl_UtfNext are inverses. If either condition cannot + * be met, Tcl_UtfPrev and Tcl_UtfNext may not function as inverses and + * the caller will have to take greater care. * * Results: - * The return value is a pointer to the previous character in the UTF-8 - * string. If the current location was already at the beginning of the - * string, the return value will also be a pointer to the beginning of - * the string. + * A pointer to the start of a character in the string as described + * above. * * Side effects: * None. @@ -672,9 +700,8 @@ Tcl_UtfNext( CONST char * Tcl_UtfPrev( - CONST char *src, /* The current location in the string. */ - CONST char *start) /* Pointer to the beginning of the string, to - * avoid going backwards too far. */ + CONST char *src, /* A location in a UTF-8 string. */ + CONST char *start) /* Pointer to the beginning of the string */ { CONST char *look; int i, byte; |