summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/Utf.324
-rw-r--r--generic/tcl.decls8
-rw-r--r--generic/tclDecls.h18
-rw-r--r--generic/tclStubInit.c4
-rw-r--r--generic/tclUtf.c89
5 files changed, 137 insertions, 6 deletions
diff --git a/doc/Utf.3 b/doc/Utf.3
index 12756bc..0c331a8 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -4,7 +4,7 @@
'\" See the file "license.terms" for information on usage and redistribution
'\" of this file, and for a DISCLAIMER OF ALL WARRANTIES.
'\"
-'\" RCS: @(#) $Id: Utf.3,v 1.3 1999/04/30 22:45:01 stanton Exp $
+'\" RCS: @(#) $Id: Utf.3,v 1.4 1999/05/06 18:46:23 stanton Exp $
'\"
.so man.macros
.TH Utf 3 "8.1" Tcl "Tcl Library Procedures"
@@ -36,6 +36,12 @@ int
\fBTcl_UniCharNcmp\fR(\fIuniStr, uniStr, num\fR)
.sp
int
+\fBTcl_UtfNcmp\fR(\fIsrc, src, num\fR)
+.sp
+int
+\fBTcl_UtfNcasecmp\fR(\fIsrc, src, num\fR)
+.sp
+int
\fBTcl_UtfCharComplete\fR(\fIsrc, len\fR)
.sp
int
@@ -83,7 +89,7 @@ equal to 0.
.AP "Tcl_DString" *dstPtr in/out
A pointer to a previously-initialized \fBTcl_DString\fR.
.AP size_t n in
-The number of Unicode characters to compare in \fBTcl_UniCharNcmp\fR.
+The number of characters to compare.
.AP "CONST char" *start in
Pointer to the beginning of a UTF-8 string.
.AP int index in
@@ -151,6 +157,20 @@ greater than, equal to,
or less than 0 if the first string is greater than, equal to, or
less than the second string respectively.
.PP
+\fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It
+accepts two NULL-terminated UTF-8 strings and the number of characters
+to compare. (Both strings are assumed to be at least \fIlen\fR
+characters long.) \fBTcl_UtfNcmp\fR compares the two strings
+character-by-character according to the Unicode character ordering.
+It returns an integer greater than, equal to, or less than 0 if the
+first string is greater than, equal to, or less than the second string
+respectively.
+.PP
+\fBTcl_UtfNcasecmp\fR corresponds to \fBstrncasecmp\fR for UTF-8
+strings. It is similar to \fBTcl_UtfNcmp\fR except comparisons ignore
+differences in case when comparing upper, lower or title case
+characters.
+.PP
\fBTcl_UtfCharComplete\fR returns 1 if the source UTF-8 string \fIsrc\fR
of length \fIlen\fR bytes is long enough to be decoded by
\fBTcl_UtfToUniChar\fR, or 0 otherwise. This function does not guarantee
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 92d2bb2..c07db70 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -10,7 +10,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: tcl.decls,v 1.10 1999/04/21 21:50:23 rjohnson Exp $
+# RCS: @(#) $Id: tcl.decls,v 1.11 1999/05/06 18:46:24 stanton Exp $
library tcl
@@ -1256,6 +1256,12 @@ declare 367 generic {
declare 368 generic {
int Tcl_Stat(CONST char *path, struct stat *bufPtr)
}
+declare 369 generic {
+ int Tcl_UtfNcmp(CONST char *s1, CONST char *s2, size_t n)
+}
+declare 370 generic {
+ int Tcl_UtfNcasecmp(CONST char *s1, CONST char *s2, size_t n)
+}
##############################################################################
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 5a2a70c..19428fe 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclDecls.h,v 1.10 1999/04/30 22:45:01 stanton Exp $
+ * RCS: @(#) $Id: tclDecls.h,v 1.11 1999/05/06 18:46:24 stanton Exp $
*/
#ifndef _TCLDECLS
@@ -1123,6 +1123,12 @@ EXTERN int Tcl_Access _ANSI_ARGS_((CONST char * path, int mode));
/* 368 */
EXTERN int Tcl_Stat _ANSI_ARGS_((CONST char * path,
struct stat * bufPtr));
+/* 369 */
+EXTERN int Tcl_UtfNcmp _ANSI_ARGS_((CONST char * s1,
+ CONST char * s2, size_t n));
+/* 370 */
+EXTERN int Tcl_UtfNcasecmp _ANSI_ARGS_((CONST char * s1,
+ CONST char * s2, size_t n));
typedef struct TclStubHooks {
struct TclPlatStubs *tclPlatStubs;
@@ -1527,6 +1533,8 @@ typedef struct TclStubs {
int (*tcl_Chdir) _ANSI_ARGS_((CONST char * dirName)); /* 366 */
int (*tcl_Access) _ANSI_ARGS_((CONST char * path, int mode)); /* 367 */
int (*tcl_Stat) _ANSI_ARGS_((CONST char * path, struct stat * bufPtr)); /* 368 */
+ int (*tcl_UtfNcmp) _ANSI_ARGS_((CONST char * s1, CONST char * s2, size_t n)); /* 369 */
+ int (*tcl_UtfNcasecmp) _ANSI_ARGS_((CONST char * s1, CONST char * s2, size_t n)); /* 370 */
} TclStubs;
#ifdef __cplusplus
@@ -3007,6 +3015,14 @@ extern TclStubs *tclStubsPtr;
#define Tcl_Stat \
(tclStubsPtr->tcl_Stat) /* 368 */
#endif
+#ifndef Tcl_UtfNcmp
+#define Tcl_UtfNcmp \
+ (tclStubsPtr->tcl_UtfNcmp) /* 369 */
+#endif
+#ifndef Tcl_UtfNcasecmp
+#define Tcl_UtfNcasecmp \
+ (tclStubsPtr->tcl_UtfNcasecmp) /* 370 */
+#endif
#endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 2ac795b..632f4a8 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclStubInit.c,v 1.12 1999/04/24 01:46:52 stanton Exp $
+ * RCS: @(#) $Id: tclStubInit.c,v 1.13 1999/05/06 18:46:25 stanton Exp $
*/
#include "tclInt.h"
@@ -684,6 +684,8 @@ TclStubs tclStubs = {
Tcl_Chdir, /* 366 */
Tcl_Access, /* 367 */
Tcl_Stat, /* 368 */
+ Tcl_UtfNcmp, /* 369 */
+ Tcl_UtfNcasecmp, /* 370 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 0c46d26..9881f7d 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtf.c,v 1.4 1999/04/30 16:22:24 hershey Exp $
+ * RCS: @(#) $Id: tclUtf.c,v 1.5 1999/05/06 18:46:25 stanton Exp $
*/
#include "tclInt.h"
@@ -1048,6 +1048,93 @@ Tcl_UtfToTitle(str)
/*
*----------------------------------------------------------------------
*
+ * Tcl_UtfNcmp --
+ *
+ * Compare at most n UTF chars of string cs to string ct. Both cs
+ * and ct are assumed to be at least n UTF chars long.
+ *
+ * Results:
+ * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UtfNcmp(cs, ct, n)
+ CONST char *cs; /* UTF string to compare to ct. */
+ CONST char *ct; /* UTF string cs is compared to. */
+ size_t n; /* Number of UTF chars to compare. */
+{
+ Tcl_UniChar ch1, ch2;
+ /*
+ * Another approach that should work is:
+ * return memcmp(cs, ct, (unsigned) (Tcl_UtfAtIndex(cs, n) - cs));
+ * That assumes that ct is a properly formed UTF, so we will just
+ * be comparing the bytes that compromise those strings to the
+ * char length n.
+ */
+ while (n-- > 0) {
+ /*
+ * n must be interpreted as chars, not bytes.
+ * This should be called only when both strings are of
+ * at least n chars long (no need for \0 check)
+ */
+ cs += Tcl_UtfToUniChar(cs, &ch1);
+ ct += Tcl_UtfToUniChar(ct, &ch2);
+ if (ch1 != ch2) {
+ return (ch1 - ch2);
+ }
+ }
+ return 0;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UtfNcasecmp --
+ *
+ * Compare at most n UTF chars of string cs to string ct case
+ * insensitive. Both cs and ct are assumed to be at least n
+ * UTF chars long.
+ *
+ * Results:
+ * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UtfNcasecmp(cs, ct, n)
+ CONST char *cs; /* UTF string to compare to ct. */
+ CONST char *ct; /* UTF string cs is compared to. */
+ size_t n; /* Number of UTF chars to compare. */
+{
+ Tcl_UniChar ch1, ch2;
+ while (n-- > 0) {
+ /*
+ * n must be interpreted as chars, not bytes.
+ * This should be called only when both strings are of
+ * at least n chars long (no need for \0 check)
+ */
+ cs += Tcl_UtfToUniChar(cs, &ch1);
+ ct += Tcl_UtfToUniChar(ct, &ch2);
+ if ((ch1 != ch2) &&
+ (Tcl_UniCharToLower(ch1) != Tcl_UniCharToLower(ch2))) {
+ return (ch1 - ch2);
+ }
+ }
+ return 0;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharToUpper --
*
* Compute the uppercase equivalent of the given Unicode character.