diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2005-07-21 14:38:31 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2005-07-21 14:38:31 (GMT) |
commit | be7cd35abf2f4421f8c0c70780675e4313589df3 (patch) | |
tree | f4e1f849d58fbb34a2a00e11e8f3286b0d65cf09 /generic/tclUtf.c | |
parent | 04b1bffa1cc7b07cafdb83dd3f39c271f6493f7b (diff) | |
download | tcl-be7cd35abf2f4421f8c0c70780675e4313589df3.zip tcl-be7cd35abf2f4421f8c0c70780675e4313589df3.tar.gz tcl-be7cd35abf2f4421f8c0c70780675e4313589df3.tar.bz2 |
Systematizing the formatting
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r-- | generic/tclUtf.c | 415 |
1 files changed, 217 insertions, 198 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 7a5494a..fbd37e6 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -5,10 +5,10 @@ * * Copyright (c) 1997-1998 Sun Microsystems, Inc. * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * See the file "license.terms" for information on usage and redistribution of + * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtf.c,v 1.34 2005/05/10 18:34:51 kennykb Exp $ + * RCS: @(#) $Id: tclUtf.c,v 1.35 2005/07/21 14:38:51 dkf Exp $ */ #include "tclInt.h" @@ -20,35 +20,35 @@ #include "tclUniData.c" /* - * The following macros are used for fast character category tests. The - * x_BITS values are shifted right by the category value to determine whether - * the given category is included in the set. + * The following macros are used for fast character category tests. The x_BITS + * values are shifted right by the category value to determine whether the + * given category is included in the set. */ #define ALPHA_BITS ((1 << UPPERCASE_LETTER) | (1 << LOWERCASE_LETTER) \ - | (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1 << OTHER_LETTER)) + | (1 << TITLECASE_LETTER) | (1 << MODIFIER_LETTER) | (1<<OTHER_LETTER)) #define DIGIT_BITS (1 << DECIMAL_DIGIT_NUMBER) #define SPACE_BITS ((1 << SPACE_SEPARATOR) | (1 << LINE_SEPARATOR) \ - | (1 << PARAGRAPH_SEPARATOR)) + | (1 << PARAGRAPH_SEPARATOR)) #define CONNECTOR_BITS (1 << CONNECTOR_PUNCTUATION) #define PRINT_BITS (ALPHA_BITS | DIGIT_BITS | SPACE_BITS | \ - (1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \ - (1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \ - (1 << OTHER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | \ - (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \ - (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \ - (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION) | \ - (1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \ - (1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL)) + (1 << NON_SPACING_MARK) | (1 << ENCLOSING_MARK) | \ + (1 << COMBINING_SPACING_MARK) | (1 << LETTER_NUMBER) | \ + (1 << OTHER_NUMBER) | (1 << CONNECTOR_PUNCTUATION) | \ + (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \ + (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \ + (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION) | \ + (1 << MATH_SYMBOL) | (1 << CURRENCY_SYMBOL) | \ + (1 << MODIFIER_SYMBOL) | (1 << OTHER_SYMBOL)) #define PUNCT_BITS ((1 << CONNECTOR_PUNCTUATION) | \ - (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \ - (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \ - (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION)) + (1 << DASH_PUNCTUATION) | (1 << OPEN_PUNCTUATION) | \ + (1 << CLOSE_PUNCTUATION) | (1 << INITIAL_QUOTE_PUNCTUATION) | \ + (1 << FINAL_QUOTE_PUNCTUATION) | (1 << OTHER_PUNCTUATION)) /* * Unicode characters less than this value are represented by themselves @@ -93,7 +93,6 @@ static CONST unsigned char totalBytes[256] = { */ static int UtfCount _ANSI_ARGS_((int ch)); - /* *--------------------------------------------------------------------------- @@ -144,11 +143,11 @@ UtfCount(ch) * Tcl_UniCharToUtf -- * * Store the given Tcl_UniChar as a sequence of UTF-8 bytes in the - * provided buffer. Equivalent to Plan 9 runetochar(). + * provided buffer. Equivalent to Plan 9 runetochar(). * * Results: - * The return values is the number of bytes in the buffer that - * were consumed. + * The return values is the number of bytes in the buffer that were + * consumed. * * Side effects: * None. @@ -160,9 +159,9 @@ INLINE int Tcl_UniCharToUtf(ch, buf) int ch; /* The Tcl_UniChar to be stored in the * buffer. */ - char *buf; /* Buffer in which the UTF-8 representation - * of the Tcl_UniChar is stored. Buffer must - * be large enough to hold the UTF-8 character + char *buf; /* Buffer in which the UTF-8 representation of + * the Tcl_UniChar is stored. Buffer must be + * large enough to hold the UTF-8 character * (at most TCL_UTF_MAX bytes). */ { if ((ch > 0) && (ch < UNICODE_SELF)) { @@ -222,8 +221,8 @@ Tcl_UniCharToUtf(ch, buf) * * Results: * The return value is a pointer to the UTF-8 representation of the - * Unicode string. Storage for the return value is appended to the - * end of dsPtr. + * Unicode string. Storage for the return value is appended to the end of + * dsPtr. * * Side effects: * None. @@ -236,9 +235,8 @@ Tcl_UniCharToUtfDString(uniStr, uniLength, dsPtr) CONST Tcl_UniChar *uniStr; /* Unicode string to convert to UTF-8. */ int uniLength; /* Length of Unicode string in Tcl_UniChars * (must be >= 0). */ - Tcl_DString *dsPtr; /* UTF-8 representation of string is - * appended to this previously initialized - * DString. */ + Tcl_DString *dsPtr; /* UTF-8 representation of string is appended + * to this previously initialized DString. */ { CONST Tcl_UniChar *w, *wEnd; char *p, *string; @@ -269,16 +267,16 @@ Tcl_UniCharToUtfDString(uniStr, uniLength, dsPtr) * * Tcl_UtfToUniChar -- * - * Extract the Tcl_UniChar represented by the UTF-8 string. Bad - * UTF-8 sequences are converted to valid Tcl_UniChars and processing - * continues. Equivalent to Plan 9 chartorune(). + * Extract the Tcl_UniChar represented by the UTF-8 string. Bad UTF-8 + * sequences are converted to valid Tcl_UniChars and processing + * continues. Equivalent to Plan 9 chartorune(). * - * The caller must ensure that the source buffer is long enough that - * this routine does not run off the end and dereference non-existent - * memory looking for trail bytes. If the source buffer is known to - * be '\0' terminated, this cannot happen. Otherwise, the caller - * should call Tcl_UtfCharComplete() before calling this routine to - * ensure that enough bytes remain in the string. + * The caller must ensure that the source buffer is long enough that this + * routine does not run off the end and dereference non-existent memory + * looking for trail bytes. If the source buffer is known to be '\0' + * terminated, this cannot happen. Otherwise, the caller should call + * Tcl_UtfCharComplete() before calling this routine to ensure that + * enough bytes remain in the string. * * Results: * *chPtr is filled with the Tcl_UniChar, and the return value is the @@ -293,8 +291,8 @@ Tcl_UniCharToUtfDString(uniStr, uniLength, dsPtr) int Tcl_UtfToUniChar(src, chPtr) register CONST char *src; /* The UTF-8 string. */ - register Tcl_UniChar *chPtr; /* Filled with the Tcl_UniChar represented - * by the UTF-8 string. */ + register Tcl_UniChar *chPtr; /* Filled with the Tcl_UniChar represented by + * the UTF-8 string. */ { register int byte; @@ -321,6 +319,7 @@ Tcl_UtfToUniChar(src, chPtr) *chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (src[1] & 0x3F)); return 2; } + /* * A two-byte-character lead-byte not followed by trail-byte * represents itself. @@ -338,6 +337,7 @@ Tcl_UtfToUniChar(src, chPtr) | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F)); return 3; } + /* * A three-byte-character lead-byte not followed by two trail-bytes * represents itself. @@ -383,9 +383,8 @@ Tcl_UtfToUniChar(src, chPtr) * * Results: * The return value is a pointer to the Unicode representation of the - * UTF-8 string. Storage for the return value is appended to the - * end of dsPtr. The Unicode string is terminated with a Unicode - * NULL character. + * UTF-8 string. Storage for the return value is appended to the end of + * dsPtr. The Unicode string is terminated with a Unicode NULL character. * * Side effects: * None. @@ -396,8 +395,8 @@ Tcl_UtfToUniChar(src, chPtr) Tcl_UniChar * Tcl_UtfToUniCharDString(src, length, dsPtr) CONST char *src; /* UTF-8 string to convert to Unicode. */ - int length; /* Length of UTF-8 string in bytes, or -1 - * for strlen(). */ + int length; /* Length of UTF-8 string in bytes, or -1 for + * strlen(). */ Tcl_DString *dsPtr; /* Unicode representation of string is * appended to this previously initialized * DString. */ @@ -411,8 +410,8 @@ Tcl_UtfToUniCharDString(src, length, dsPtr) } /* - * Unicode string length in Tcl_UniChars will be <= UTF-8 string length - * in bytes. + * Unicode string length in Tcl_UniChars will be <= UTF-8 string length in + * bytes. */ oldLength = Tcl_DStringLength(dsPtr); @@ -438,9 +437,9 @@ Tcl_UtfToUniCharDString(src, length, dsPtr) * * Tcl_UtfCharComplete -- * - * Determine if the UTF-8 string of the given length is long enough - * to be decoded by Tcl_UtfToUniChar(). This does not ensure that the - * UTF-8 string is properly formed. Equivalent to Plan 9 fullrune(). + * Determine if the UTF-8 string of the given length is long enough to be + * decoded by Tcl_UtfToUniChar(). This does not ensure that the UTF-8 + * string is properly formed. Equivalent to Plan 9 fullrune(). * * Results: * The return value is 0 if the string is not long enough, non-zero @@ -454,8 +453,8 @@ Tcl_UtfToUniCharDString(src, length, dsPtr) int Tcl_UtfCharComplete(src, length) - CONST char *src; /* String to check if first few bytes - * contain a complete UTF-8 character. */ + CONST char *src; /* String to check if first few bytes contain + * a complete UTF-8 character. */ int length; /* Length of above string in bytes. */ { int ch; @@ -469,9 +468,9 @@ Tcl_UtfCharComplete(src, length) * * Tcl_NumUtfChars -- * - * Returns the number of characters (not bytes) in the UTF-8 string, - * not including the terminating NULL byte. This is equivalent to - * Plan 9 utflen() and utfnlen(). + * Returns the number of characters (not bytes) in the UTF-8 string, not + * including the terminating NULL byte. This is equivalent to Plan 9 + * utflen() and utfnlen(). * * Results: * As above. @@ -495,8 +494,8 @@ Tcl_NumUtfChars(src, length) /* * The separate implementations are faster. * - * Since this is a time-sensitive function, we also do the check for - * the single-byte char case specially. + * Since this is a time-sensitive function, we also do the check for the + * single-byte char case specially. */ i = 0; @@ -528,14 +527,13 @@ Tcl_NumUtfChars(src, length) * * Tcl_UtfFindFirst -- * - * Returns a pointer to the first occurance of the given Tcl_UniChar - * in the NULL-terminated UTF-8 string. The NULL terminator is - * considered part of the UTF-8 string. Equivalent to Plan 9 - * utfrune(). + * Returns a pointer to the first occurance of the given Tcl_UniChar in + * the NULL-terminated UTF-8 string. The NULL terminator is considered + * part of the UTF-8 string. Equivalent to Plan 9 utfrune(). * * Results: - * As above. If the Tcl_UniChar does not exist in the given string, - * the return value is NULL. + * As above. If the Tcl_UniChar does not exist in the given string, the + * return value is NULL. * * Side effects: * None. @@ -567,14 +565,13 @@ Tcl_UtfFindFirst(src, ch) * * Tcl_UtfFindLast -- * - * Returns a pointer to the last occurance of the given Tcl_UniChar - * in the NULL-terminated UTF-8 string. The NULL terminator is - * considered part of the UTF-8 string. Equivalent to Plan 9 - * utfrrune(). + * Returns a pointer to the last occurance of the given Tcl_UniChar in + * the NULL-terminated UTF-8 string. The NULL terminator is considered + * part of the UTF-8 string. Equivalent to Plan 9 utfrrune(). * * Results: - * As above. If the Tcl_UniChar does not exist in the given string, - * the return value is NULL. + * As above. If the Tcl_UniChar does not exist in the given string, the + * return value is NULL. * * Side effects: * None. @@ -610,14 +607,13 @@ Tcl_UtfFindLast(src, ch) * * Tcl_UtfNext -- * - * Given a pointer to some current location in a UTF-8 string, - * move forward one character. The caller must ensure that they - * are not asking for the next character after the last character - * in the string. + * Given a pointer to some current location in a UTF-8 string, move + * forward one character. The caller must ensure that they are not asking + * for the next character after the last character in the string. * * Results: - * The return value is the pointer to the next character in - * the UTF-8 string. + * The return value is the pointer to the next character in the UTF-8 + * string. * * Side effects: * None. @@ -639,15 +635,15 @@ Tcl_UtfNext(src) * * Tcl_UtfPrev -- * - * Given a pointer to some current location in a UTF-8 string, - * move backwards one character. This works correctly when the - * pointer is in the middle of a UTF-8 character. + * Given a pointer to some current location in a UTF-8 string, move + * backwards one character. This works correctly when the pointer is in + * the middle of a UTF-8 character. * * Results: - * The return value is a pointer to the previous character in the - * UTF-8 string. If the current location was already at the - * beginning of the string, the return value will also be a - * pointer to the beginning of the string. + * The return value is a pointer to the previous character in the UTF-8 + * string. If the current location was already at the beginning of the + * string, the return value will also be a pointer to the beginning of + * the string. * * Side effects: * None. @@ -657,10 +653,9 @@ Tcl_UtfNext(src) CONST char * Tcl_UtfPrev(src, start) - CONST char *src; /* The current location in the string. */ - CONST char *start; /* Pointer to the beginning of the - * string, to avoid going backwards too - * far. */ + CONST char *src; /* The current location in the string. */ + CONST char *start; /* Pointer to the beginning of the string, to + * avoid going backwards too far. */ { CONST char *look; int i, byte; @@ -691,8 +686,8 @@ Tcl_UtfPrev(src, start) * * Tcl_UniCharAtIndex -- * - * Returns the Unicode character represented at the specified - * character (not byte) position in the UTF-8 string. + * Returns the Unicode character represented at the specified character + * (not byte) position in the UTF-8 string. * * Results: * As above. @@ -722,8 +717,8 @@ Tcl_UniCharAtIndex(src, index) * * Tcl_UtfAtIndex -- * - * Returns a pointer to the specified character (not byte) position - * in the UTF-8 string. + * Returns a pointer to the specified character (not byte) position in + * the UTF-8 string. * * Results: * As above. @@ -757,30 +752,29 @@ Tcl_UtfAtIndex(src, index) * * Results: * Stores the bytes represented by the backslash sequence in dst and - * returns the number of bytes written to dst. At most TCL_UTF_MAX - * bytes are written to dst; dst must have been large enough to accept - * those bytes. If readPtr isn't NULL then it is filled in with a - * count of the number of bytes in the backslash sequence. + * returns the number of bytes written to dst. At most TCL_UTF_MAX bytes + * are written to dst; dst must have been large enough to accept those + * bytes. If readPtr isn't NULL then it is filled in with a count of the + * number of bytes in the backslash sequence. * * Side effects: - * The maximum number of bytes it takes to represent a Unicode - * character in UTF-8 is guaranteed to be less than the number of - * bytes used to express the backslash sequence that represents - * that Unicode character. If the target buffer into which the - * caller is going to store the bytes that represent the Unicode - * character is at least as large as the source buffer from which - * the backslashed sequence was extracted, no buffer overruns should - * occur. + * The maximum number of bytes it takes to represent a Unicode character + * in UTF-8 is guaranteed to be less than the number of bytes used to + * express the backslash sequence that represents that Unicode character. + * If the target buffer into which the caller is going to store the bytes + * that represent the Unicode character is at least as large as the + * source buffer from which the backslashed sequence was extracted, no + * buffer overruns should occur. * *--------------------------------------------------------------------------- */ int Tcl_UtfBackslash(src, readPtr, dst) - CONST char *src; /* Points to the backslash character of - * a backslash sequence. */ - int *readPtr; /* Fill in with number of characters read - * from src, unless NULL. */ + CONST char *src; /* Points to the backslash character of a + * backslash sequence. */ + int *readPtr; /* Fill in with number of characters read from + * src, unless NULL. */ char *dst; /* Filled with the bytes represented by the * backslash sequence. */ { @@ -804,12 +798,12 @@ Tcl_UtfBackslash(src, readPtr, dst) * * Tcl_UtfToUpper -- * - * Convert lowercase characters to uppercase characters in a UTF - * string in place. The conversion may shrink the UTF string. + * Convert lowercase characters to uppercase characters in a UTF string + * in place. The conversion may shrink the UTF string. * * Results: - * Returns the number of bytes in the resulting string - * excluding the trailing null. + * Returns the number of bytes in the resulting string excluding the + * trailing null. * * Side effects: * Writes a terminating null after the last converted character. @@ -831,13 +825,13 @@ Tcl_UtfToUpper(str) src = dst = str; while (*src) { - bytes = TclUtfToUniChar(src, &ch); + bytes = TclUtfToUniChar(src, &ch); upChar = Tcl_UniCharToUpper(ch); /* - * To keep badly formed Utf strings from getting inflated by - * the conversion (thereby causing a segfault), only copy the - * upper case char to dst if its size is <= the original char. + * To keep badly formed Utf strings from getting inflated by the + * conversion (thereby causing a segfault), only copy the upper case + * char to dst if its size is <= the original char. */ if (bytes < UtfCount(upChar)) { @@ -857,12 +851,12 @@ Tcl_UtfToUpper(str) * * Tcl_UtfToLower -- * - * Convert uppercase characters to lowercase characters in a UTF - * string in place. The conversion may shrink the UTF string. + * Convert uppercase characters to lowercase characters in a UTF string + * in place. The conversion may shrink the UTF string. * * Results: - * Returns the number of bytes in the resulting string - * excluding the trailing null. + * Returns the number of bytes in the resulting string excluding the + * trailing null. * * Side effects: * Writes a terminating null after the last converted character. @@ -888,9 +882,9 @@ Tcl_UtfToLower(str) lowChar = Tcl_UniCharToLower(ch); /* - * To keep badly formed Utf strings from getting inflated by - * the conversion (thereby causing a segfault), only copy the - * lower case char to dst if its size is <= the original char. + * To keep badly formed Utf strings from getting inflated by the + * conversion (thereby causing a segfault), only copy the lower case + * char to dst if its size is <= the original char. */ if (bytes < UtfCount(lowChar)) { @@ -910,13 +904,13 @@ Tcl_UtfToLower(str) * * Tcl_UtfToTitle -- * - * Changes the first character of a UTF string to title case or - * uppercase and the rest of the string to lowercase. The - * conversion happens in place and may shrink the UTF string. + * Changes the first character of a UTF string to title case or uppercase + * and the rest of the string to lowercase. The conversion happens in + * place and may shrink the UTF string. * * Results: - * Returns the number of bytes in the resulting string - * excluding the trailing null. + * Returns the number of bytes in the resulting string excluding the + * trailing null. * * Side effects: * Writes a terminating null after the last converted character. @@ -972,8 +966,8 @@ Tcl_UtfToTitle(str) * * TclpUtfNcmp2 -- * - * Compare at most n bytes of utf-8 strings cs and ct. Both cs - * and ct are assumed to be at least n bytes long. + * Compare at most n bytes of utf-8 strings cs and ct. Both cs and ct are + * assumed to be at least n bytes long. * * Results: * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. @@ -991,10 +985,11 @@ TclpUtfNcmp2(cs, ct, n) unsigned long n; /* Number of *bytes* to compare. */ { /* - * We can't simply call 'memcmp(cs, ct, n);' because we need to check - * for Tcl's \xC0\x80 non-utf-8 null encoding. - * Otherwise utf-8 lexes fine in the strcmp manner. + * We can't simply call 'memcmp(cs, ct, n);' because we need to check for + * Tcl's \xC0\x80 non-utf-8 null encoding. Otherwise utf-8 lexes fine in + * the strcmp manner. */ + register int result = 0; for ( ; n != 0; n--, cs++, ct++) { @@ -1005,6 +1000,7 @@ TclpUtfNcmp2(cs, ct, n) } if (n && ((UCHAR(*cs) == 0xC0) || (UCHAR(*ct) == 0xC0))) { unsigned char c1, c2; + c1 = ((UCHAR(*cs) == 0xC0) && (UCHAR(cs[1]) == 0x80)) ? 0 : UCHAR(*cs); c2 = ((UCHAR(*ct) == 0xC0) && (UCHAR(ct[1]) == 0x80)) ? 0 : UCHAR(*ct); result = (c1 - c2); @@ -1017,8 +1013,8 @@ TclpUtfNcmp2(cs, ct, n) * * Tcl_UtfNcmp -- * - * Compare at most numChars UTF chars of string cs to string ct. - * Both cs and ct are assumed to be at least numChars UTF chars long. + * Compare at most numChars UTF chars of string cs to string ct. Both cs + * and ct are assumed to be at least numChars UTF chars long. * * Results: * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. @@ -1036,17 +1032,20 @@ Tcl_UtfNcmp(cs, ct, numChars) unsigned long numChars; /* Number of UTF chars to compare. */ { Tcl_UniChar ch1, ch2; + /* - * Cannot use 'memcmp(cs, ct, n);' as byte representation of - * \u0000 (the pair of bytes 0xc0,0x80) is larger than byte - * representation of \u0001 (the byte 0x01.) + * Cannot use 'memcmp(cs, ct, n);' as byte representation of \u0000 (the + * pair of bytes 0xc0,0x80) is larger than byte representation of \u0001 + * (the byte 0x01.) */ + while (numChars-- > 0) { /* - * n must be interpreted as chars, not bytes. - * This should be called only when both strings are of - * at least n chars long (no need for \0 check) + * n must be interpreted as chars, not bytes. This should be called + * only when both strings are of at least n chars long (no need for \0 + * check) */ + cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { @@ -1251,12 +1250,14 @@ Tcl_UniCharNcmp(ucs, uct, numChars) /* * We are definitely on a big-endian machine; memcmp() is safe */ + return memcmp(ucs, uct, numChars*sizeof(Tcl_UniChar)); #else /* !WORDS_BIGENDIAN */ /* * We can't simply call memcmp() because that is not lexically correct. */ + for ( ; numChars != 0; ucs++, uct++, numChars--) { if (*ucs != *uct) { return (*ucs - *uct); @@ -1556,8 +1557,7 @@ Tcl_UniCharIsUpper(ch) * * Tcl_UniCharIsWordChar -- * - * Test if a character is alphanumeric or a connector punctuation - * mark. + * Test if a character is alphanumeric or a connector punctuation mark. * * Results: * Returns 1 if character is a word character. @@ -1583,17 +1583,16 @@ Tcl_UniCharIsWordChar(ch) * Tcl_UniCharCaseMatch -- * * See if a particular Unicode string matches a particular pattern. - * Allows case insensitivity. This is the Unicode equivalent of - * the char* Tcl_StringCaseMatch. The UniChar strings must be - * NULL-terminated. This has no provision for counted UniChar - * strings, thus should not be used where NULLs are expected in the - * UniChar string. Use TclUniCharMatch where possible. + * Allows case insensitivity. This is the Unicode equivalent of the char* + * Tcl_StringCaseMatch. The UniChar strings must be NULL-terminated. + * This has no provision for counted UniChar strings, thus should not be + * used where NULLs are expected in the UniChar string. Use + * TclUniCharMatch where possible. * * Results: - * The return value is 1 if string matches pattern, and - * 0 otherwise. The matching operation permits the following - * special characters in the pattern: *?\[] (see the manual - * entry for details on what these mean). + * The return value is 1 if string matches pattern, and 0 otherwise. The + * matching operation permits the following special characters in the + * pattern: *?\[] (see the manual entry for details on what these mean). * * Side effects: * None. @@ -1603,9 +1602,10 @@ Tcl_UniCharIsWordChar(ch) int Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase) - CONST Tcl_UniChar *uniStr; /* Unicode String. */ - CONST Tcl_UniChar *uniPattern; /* Pattern, which may contain special - * characters. */ + CONST Tcl_UniChar *uniStr; /* Unicode String. */ + CONST Tcl_UniChar *uniPattern; + /* Pattern, which may contain special + * characters. */ int nocase; /* 0 for case sensitive, 1 for insensitive */ { Tcl_UniChar ch1, p; @@ -1614,9 +1614,9 @@ Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase) p = *uniPattern; /* - * See if we're at the end of both the pattern and the string. If - * so, we succeeded. If we're at the end of the pattern but not at - * the end of the string, we failed. + * See if we're at the end of both the pattern and the string. If so, + * we succeeded. If we're at the end of the pattern but not at the end + * of the string, we failed. */ if (p == 0) { @@ -1627,8 +1627,8 @@ Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase) } /* - * Check for a "*" as the next pattern character. It matches any - * substring. We handle this by skipping all the characters up to the + * Check for a "*" as the next pattern character. It matches any + * substring. We handle this by skipping all the characters up to the * next matching one in the pattern, and then calling ourselves * recursively for each postfix of string, until either we match or we * reach the end of the string. @@ -1638,7 +1638,10 @@ Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase) /* * Skip all successive *'s in the pattern */ - while (*(++uniPattern) == '*') {} + + while (*(++uniPattern) == '*') { + /* empty body */ + } p = *uniPattern; if (p == 0) { return 1; @@ -1652,6 +1655,7 @@ Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase) * quickly if the next char in the pattern isn't a special * character */ + if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { while (*uniStr && (p != *uniStr) @@ -1659,7 +1663,9 @@ Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase) uniStr++; } } else { - while (*uniStr && (p != *uniStr)) { uniStr++; } + while (*uniStr && (p != *uniStr)) { + uniStr++; + } } } if (Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase)) { @@ -1673,8 +1679,8 @@ Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase) } /* - * Check for a "?" as the next pattern character. It matches - * any single character. + * Check for a "?" as the next pattern character. It matches any + * single character. */ if (p == '?') { @@ -1684,9 +1690,9 @@ Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase) } /* - * Check for a "[" as the next pattern character. It is followed - * by a list of characters that are acceptable, or by a range - * (two characters separated by "-"). + * Check for a "[" as the next pattern character. It is followed by a + * list of characters that are acceptable, or by a range (two + * characters separated by "-"). */ if (p == '[') { @@ -1699,7 +1705,8 @@ Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase) if ((*uniPattern == ']') || (*uniPattern == 0)) { return 0; } - startChar = (nocase ? Tcl_UniCharToLower(*uniPattern) : *uniPattern); + startChar = (nocase ? Tcl_UniCharToLower(*uniPattern) + : *uniPattern); uniPattern++; if (*uniPattern == '-') { uniPattern++; @@ -1732,8 +1739,8 @@ Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase) } /* - * If the next pattern character is '\', just strip off the '\' - * so we do exact matching on the character that follows. + * If the next pattern character is '\', just strip off the '\' so we + * do exact matching on the character that follows. */ if (p == '\\') { @@ -1743,12 +1750,13 @@ Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase) } /* - * There's no special character. Just make sure that the next - * bytes of each string match. + * There's no special character. Just make sure that the next bytes + * of each string match. */ if (nocase) { - if (Tcl_UniCharToLower(*uniStr) != Tcl_UniCharToLower(*uniPattern)) { + if (Tcl_UniCharToLower(*uniStr) != + Tcl_UniCharToLower(*uniPattern)) { return 0; } } else if (*uniStr != *uniPattern) { @@ -1765,15 +1773,14 @@ Tcl_UniCharCaseMatch(uniStr, uniPattern, nocase) * TclUniCharMatch -- * * See if a particular Unicode string matches a particular pattern. - * Allows case insensitivity. This is the Unicode equivalent of the - * char* Tcl_StringCaseMatch. This variant of Tcl_UniCharCaseMatch - * uses counted Strings, so embedded NULLs are allowed. + * Allows case insensitivity. This is the Unicode equivalent of the char* + * Tcl_StringCaseMatch. This variant of Tcl_UniCharCaseMatch uses counted + * Strings, so embedded NULLs are allowed. * * Results: - * The return value is 1 if string matches pattern, and - * 0 otherwise. The matching operation permits the following - * special characters in the pattern: *?\[] (see the manual - * entry for details on what these mean). + * The return value is 1 if string matches pattern, and 0 otherwise. The + * matching operation permits the following special characters in the + * pattern: *?\[] (see the manual entry for details on what these mean). * * Side effects: * None. @@ -1793,14 +1800,14 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase) CONST Tcl_UniChar *stringEnd, *patternEnd; Tcl_UniChar p; - stringEnd = string + strLen; + stringEnd = string + strLen; patternEnd = pattern + ptnLen; while (1) { /* - * See if we're at the end of both the pattern and the string. If - * so, we succeeded. If we're at the end of the pattern but not at - * the end of the string, we failed. + * See if we're at the end of both the pattern and the string. If so, + * we succeeded. If we're at the end of the pattern but not at the end + * of the string, we failed. */ if (pattern == patternEnd) { @@ -1812,8 +1819,8 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase) } /* - * Check for a "*" as the next pattern character. It matches any - * substring. We handle this by skipping all the characters up to the + * Check for a "*" as the next pattern character. It matches any + * substring. We handle this by skipping all the characters up to the * next matching one in the pattern, and then calling ourselves * recursively for each postfix of string, until either we match or we * reach the end of the string. @@ -1821,9 +1828,12 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase) if (p == '*') { /* - * Skip all successive *'s in the pattern + * Skip all successive *'s in the pattern. */ - while (*(++pattern) == '*') {} + + while (*(++pattern) == '*') { + /* empty body */ + } if (pattern == patternEnd) { return 1; } @@ -1835,8 +1845,9 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase) /* * Optimization for matching - cruise through the string * quickly if the next char in the pattern isn't a special - * character + * character. */ + if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { while ((string < stringEnd) && (p != *string) @@ -1861,8 +1872,8 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase) } /* - * Check for a "?" as the next pattern character. It matches - * any single character. + * Check for a "?" as the next pattern character. It matches any + * single character. */ if (p == '?') { @@ -1872,9 +1883,9 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase) } /* - * Check for a "[" as the next pattern character. It is followed - * by a list of characters that are acceptable, or by a range - * (two characters separated by "-"). + * Check for a "[" as the next pattern character. It is followed by a + * list of characters that are acceptable, or by a range (two + * characters separated by "-"). */ if (p == '[') { @@ -1920,8 +1931,8 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase) } /* - * If the next pattern character is '\', just strip off the '\' - * so we do exact matching on the character that follows. + * If the next pattern character is '\', just strip off the '\' so we + * do exact matching on the character that follows. */ if (p == '\\') { @@ -1931,8 +1942,8 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase) } /* - * There's no special character. Just make sure that the next - * bytes of each string match. + * There's no special character. Just make sure that the next bytes of + * each string match. */ if (nocase) { @@ -1946,3 +1957,11 @@ TclUniCharMatch(string, strLen, pattern, ptnLen, nocase) pattern++; } } + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ |