From a23a8f73b3f2aba2722a1363e2d822018fbf504c Mon Sep 17 00:00:00 2001 From: stanton Date: Thu, 6 May 1999 18:46:23 +0000 Subject: * doc/Utf.3: * generic/tclStubInit.c: * generic/tclDecls.h: * generic/tclUtf.c: * generic/tcl.decls: Added Tcl_UtfNcmp and Tcl_UtfNcasecmp. --- doc/Utf.3 | 24 ++++++++++++-- generic/tcl.decls | 8 ++++- generic/tclDecls.h | 18 ++++++++++- generic/tclStubInit.c | 4 ++- generic/tclUtf.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 137 insertions(+), 6 deletions(-) diff --git a/doc/Utf.3 b/doc/Utf.3 index 12756bc..0c331a8 100644 --- a/doc/Utf.3 +++ b/doc/Utf.3 @@ -4,7 +4,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" RCS: @(#) $Id: Utf.3,v 1.3 1999/04/30 22:45:01 stanton Exp $ +'\" RCS: @(#) $Id: Utf.3,v 1.4 1999/05/06 18:46:23 stanton Exp $ '\" .so man.macros .TH Utf 3 "8.1" Tcl "Tcl Library Procedures" @@ -36,6 +36,12 @@ int \fBTcl_UniCharNcmp\fR(\fIuniStr, uniStr, num\fR) .sp int +\fBTcl_UtfNcmp\fR(\fIsrc, src, num\fR) +.sp +int +\fBTcl_UtfNcasecmp\fR(\fIsrc, src, num\fR) +.sp +int \fBTcl_UtfCharComplete\fR(\fIsrc, len\fR) .sp int @@ -83,7 +89,7 @@ equal to 0. .AP "Tcl_DString" *dstPtr in/out A pointer to a previously-initialized \fBTcl_DString\fR. .AP size_t n in -The number of Unicode characters to compare in \fBTcl_UniCharNcmp\fR. +The number of characters to compare. .AP "CONST char" *start in Pointer to the beginning of a UTF-8 string. .AP int index in @@ -151,6 +157,20 @@ greater than, equal to, or less than 0 if the first string is greater than, equal to, or less than the second string respectively. .PP +\fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It +accepts two NULL-terminated UTF-8 strings and the number of characters +to compare. (Both strings are assumed to be at least \fIlen\fR +characters long.) \fBTcl_UtfNcmp\fR compares the two strings +character-by-character according to the Unicode character ordering. +It returns an integer greater than, equal to, or less than 0 if the +first string is greater than, equal to, or less than the second string +respectively. +.PP +\fBTcl_UtfNcasecmp\fR corresponds to \fBstrncasecmp\fR for UTF-8 +strings. It is similar to \fBTcl_UtfNcmp\fR except comparisons ignore +differences in case when comparing upper, lower or title case +characters. +.PP \fBTcl_UtfCharComplete\fR returns 1 if the source UTF-8 string \fIsrc\fR of length \fIlen\fR bytes is long enough to be decoded by \fBTcl_UtfToUniChar\fR, or 0 otherwise. This function does not guarantee diff --git a/generic/tcl.decls b/generic/tcl.decls index 92d2bb2..c07db70 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -10,7 +10,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: tcl.decls,v 1.10 1999/04/21 21:50:23 rjohnson Exp $ +# RCS: @(#) $Id: tcl.decls,v 1.11 1999/05/06 18:46:24 stanton Exp $ library tcl @@ -1256,6 +1256,12 @@ declare 367 generic { declare 368 generic { int Tcl_Stat(CONST char *path, struct stat *bufPtr) } +declare 369 generic { + int Tcl_UtfNcmp(CONST char *s1, CONST char *s2, size_t n) +} +declare 370 generic { + int Tcl_UtfNcasecmp(CONST char *s1, CONST char *s2, size_t n) +} ############################################################################## diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 5a2a70c..19428fe 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclDecls.h,v 1.10 1999/04/30 22:45:01 stanton Exp $ + * RCS: @(#) $Id: tclDecls.h,v 1.11 1999/05/06 18:46:24 stanton Exp $ */ #ifndef _TCLDECLS @@ -1123,6 +1123,12 @@ EXTERN int Tcl_Access _ANSI_ARGS_((CONST char * path, int mode)); /* 368 */ EXTERN int Tcl_Stat _ANSI_ARGS_((CONST char * path, struct stat * bufPtr)); +/* 369 */ +EXTERN int Tcl_UtfNcmp _ANSI_ARGS_((CONST char * s1, + CONST char * s2, size_t n)); +/* 370 */ +EXTERN int Tcl_UtfNcasecmp _ANSI_ARGS_((CONST char * s1, + CONST char * s2, size_t n)); typedef struct TclStubHooks { struct TclPlatStubs *tclPlatStubs; @@ -1527,6 +1533,8 @@ typedef struct TclStubs { int (*tcl_Chdir) _ANSI_ARGS_((CONST char * dirName)); /* 366 */ int (*tcl_Access) _ANSI_ARGS_((CONST char * path, int mode)); /* 367 */ int (*tcl_Stat) _ANSI_ARGS_((CONST char * path, struct stat * bufPtr)); /* 368 */ + int (*tcl_UtfNcmp) _ANSI_ARGS_((CONST char * s1, CONST char * s2, size_t n)); /* 369 */ + int (*tcl_UtfNcasecmp) _ANSI_ARGS_((CONST char * s1, CONST char * s2, size_t n)); /* 370 */ } TclStubs; #ifdef __cplusplus @@ -3007,6 +3015,14 @@ extern TclStubs *tclStubsPtr; #define Tcl_Stat \ (tclStubsPtr->tcl_Stat) /* 368 */ #endif +#ifndef Tcl_UtfNcmp +#define Tcl_UtfNcmp \ + (tclStubsPtr->tcl_UtfNcmp) /* 369 */ +#endif +#ifndef Tcl_UtfNcasecmp +#define Tcl_UtfNcasecmp \ + (tclStubsPtr->tcl_UtfNcasecmp) /* 370 */ +#endif #endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */ diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 2ac795b..632f4a8 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclStubInit.c,v 1.12 1999/04/24 01:46:52 stanton Exp $ + * RCS: @(#) $Id: tclStubInit.c,v 1.13 1999/05/06 18:46:25 stanton Exp $ */ #include "tclInt.h" @@ -684,6 +684,8 @@ TclStubs tclStubs = { Tcl_Chdir, /* 366 */ Tcl_Access, /* 367 */ Tcl_Stat, /* 368 */ + Tcl_UtfNcmp, /* 369 */ + Tcl_UtfNcasecmp, /* 370 */ }; /* !END!: Do not edit above this line. */ diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 0c46d26..9881f7d 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtf.c,v 1.4 1999/04/30 16:22:24 hershey Exp $ + * RCS: @(#) $Id: tclUtf.c,v 1.5 1999/05/06 18:46:25 stanton Exp $ */ #include "tclInt.h" @@ -1048,6 +1048,93 @@ Tcl_UtfToTitle(str) /* *---------------------------------------------------------------------- * + * Tcl_UtfNcmp -- + * + * Compare at most n UTF chars of string cs to string ct. Both cs + * and ct are assumed to be at least n UTF chars long. + * + * Results: + * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_UtfNcmp(cs, ct, n) + CONST char *cs; /* UTF string to compare to ct. */ + CONST char *ct; /* UTF string cs is compared to. */ + size_t n; /* Number of UTF chars to compare. */ +{ + Tcl_UniChar ch1, ch2; + /* + * Another approach that should work is: + * return memcmp(cs, ct, (unsigned) (Tcl_UtfAtIndex(cs, n) - cs)); + * That assumes that ct is a properly formed UTF, so we will just + * be comparing the bytes that compromise those strings to the + * char length n. + */ + while (n-- > 0) { + /* + * n must be interpreted as chars, not bytes. + * This should be called only when both strings are of + * at least n chars long (no need for \0 check) + */ + cs += Tcl_UtfToUniChar(cs, &ch1); + ct += Tcl_UtfToUniChar(ct, &ch2); + if (ch1 != ch2) { + return (ch1 - ch2); + } + } + return 0; +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_UtfNcasecmp -- + * + * Compare at most n UTF chars of string cs to string ct case + * insensitive. Both cs and ct are assumed to be at least n + * UTF chars long. + * + * Results: + * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_UtfNcasecmp(cs, ct, n) + CONST char *cs; /* UTF string to compare to ct. */ + CONST char *ct; /* UTF string cs is compared to. */ + size_t n; /* Number of UTF chars to compare. */ +{ + Tcl_UniChar ch1, ch2; + while (n-- > 0) { + /* + * n must be interpreted as chars, not bytes. + * This should be called only when both strings are of + * at least n chars long (no need for \0 check) + */ + cs += Tcl_UtfToUniChar(cs, &ch1); + ct += Tcl_UtfToUniChar(ct, &ch2); + if ((ch1 != ch2) && + (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2))) { + return (ch1 - ch2); + } + } + return 0; +} + +/* + *---------------------------------------------------------------------- + * * Tcl_UniCharToUpper -- * * Compute the uppercase equivalent of the given Unicode character. -- cgit v0.12