summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorhobbs <hobbs>2000-05-08 21:59:58 (GMT)
committerhobbs <hobbs>2000-05-08 21:59:58 (GMT)
commit09f4c1de476f86324d54f2e8c31a66870ce1c8bc (patch)
tree025da577bdce141098365ffb242ca0ae0be52104 /generic
parent63adaf2eb6d8949c310ea3f93c699ed6dd1c8839 (diff)
downloadtcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.zip
tcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.tar.gz
tcl-09f4c1de476f86324d54f2e8c31a66870ce1c8bc.tar.bz2
* doc/Utf.3:
* generic/tclStubInit.c: * generic/tcl.decls: * generic/tclDecls.h: * generic/tclUtf.c: Added new functions Tcl_UniCharNcasecmp and Tcl_UniCharCaseMatch (unicode parallel to Tcl_StringCaseMatch) * generic/tclUtil.c: rewrote Tcl_StringCaseMatch algorithm for optimization and made Tcl_StringMatch just call Tcl_StringCaseMatch
Diffstat (limited to 'generic')
-rw-r--r--generic/tcl.decls10
-rw-r--r--generic/tclDecls.h20
-rw-r--r--generic/tclStubInit.c4
-rw-r--r--generic/tclUtf.c217
-rw-r--r--generic/tclUtil.c189
5 files changed, 292 insertions, 148 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 1f09cfa..39a6c21 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -10,7 +10,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: tcl.decls,v 1.34 2000/05/02 22:02:33 kupries Exp $
+# RCS: @(#) $Id: tcl.decls,v 1.35 2000/05/08 21:59:58 hobbs Exp $
library tcl
@@ -1379,6 +1379,14 @@ declare 400 generic {
int Tcl_IsChannelExisting (CONST char* channelName)
}
+declare 401 generic {
+ int Tcl_UniCharNcasecmp(CONST Tcl_UniChar *cs, CONST Tcl_UniChar *ct,\
+ unsigned long n)
+}
+declare 402 generic {
+ int Tcl_UniCharCaseMatch(CONST Tcl_UniChar *ustr, \
+ CONST Tcl_UniChar *pattern, int nocase)
+}
##############################################################################
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index 150d471..56c1d76 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclDecls.h,v 1.35 2000/05/02 22:02:33 kupries Exp $
+ * RCS: @(#) $Id: tclDecls.h,v 1.36 2000/05/08 21:59:59 hobbs Exp $
*/
#ifndef _TCLDECLS
@@ -1246,6 +1246,14 @@ EXTERN void Tcl_ClearChannelHandlers _ANSI_ARGS_((
/* 400 */
EXTERN int Tcl_IsChannelExisting _ANSI_ARGS_((
CONST char* channelName));
+/* 401 */
+EXTERN int Tcl_UniCharNcasecmp _ANSI_ARGS_((
+ CONST Tcl_UniChar * cs,
+ CONST Tcl_UniChar * ct, unsigned long n));
+/* 402 */
+EXTERN int Tcl_UniCharCaseMatch _ANSI_ARGS_((
+ CONST Tcl_UniChar * ustr,
+ CONST Tcl_UniChar * pattern, int nocase));
typedef struct TclStubHooks {
struct TclPlatStubs *tclPlatStubs;
@@ -1714,6 +1722,8 @@ typedef struct TclStubs {
void (*tcl_SpliceChannel) _ANSI_ARGS_((Tcl_Channel channel)); /* 398 */
void (*tcl_ClearChannelHandlers) _ANSI_ARGS_((Tcl_Channel channel)); /* 399 */
int (*tcl_IsChannelExisting) _ANSI_ARGS_((CONST char* channelName)); /* 400 */
+ int (*tcl_UniCharNcasecmp) _ANSI_ARGS_((CONST Tcl_UniChar * cs, CONST Tcl_UniChar * ct, unsigned long n)); /* 401 */
+ int (*tcl_UniCharCaseMatch) _ANSI_ARGS_((CONST Tcl_UniChar * ustr, CONST Tcl_UniChar * pattern, int nocase)); /* 402 */
} TclStubs;
#ifdef __cplusplus
@@ -3363,6 +3373,14 @@ extern TclStubs *tclStubsPtr;
#define Tcl_IsChannelExisting \
(tclStubsPtr->tcl_IsChannelExisting) /* 400 */
#endif
+#ifndef Tcl_UniCharNcasecmp
+#define Tcl_UniCharNcasecmp \
+ (tclStubsPtr->tcl_UniCharNcasecmp) /* 401 */
+#endif
+#ifndef Tcl_UniCharCaseMatch
+#define Tcl_UniCharCaseMatch \
+ (tclStubsPtr->tcl_UniCharCaseMatch) /* 402 */
+#endif
#endif /* defined(USE_TCL_STUBS) && !defined(USE_TCL_STUB_PROCS) */
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index e9d6577..c2ac1e5 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclStubInit.c,v 1.36 2000/05/02 22:02:35 kupries Exp $
+ * RCS: @(#) $Id: tclStubInit.c,v 1.37 2000/05/08 21:59:59 hobbs Exp $
*/
#include "tclInt.h"
@@ -798,6 +798,8 @@ TclStubs tclStubs = {
Tcl_SpliceChannel, /* 398 */
Tcl_ClearChannelHandlers, /* 399 */
Tcl_IsChannelExisting, /* 400 */
+ Tcl_UniCharNcasecmp, /* 401 */
+ Tcl_UniCharCaseMatch, /* 402 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 5fe3c41..b62a26c 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtf.c,v 1.11 2000/01/11 22:09:00 hobbs Exp $
+ * RCS: @(#) $Id: tclUtf.c,v 1.12 2000/05/08 21:59:58 hobbs Exp $
*/
#include "tclInt.h"
@@ -1301,7 +1301,43 @@ Tcl_UniCharNcmp(cs, ct, n)
{
for ( ; n != 0; n--, cs++, ct++) {
if (*cs != *ct) {
- return *cs - *ct;
+ return (*cs - *ct);
+ }
+ if (*cs == '\0') {
+ break;
+ }
+ }
+ return 0;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UniCharNcasecmp --
+ *
+ * Compare at most n unichars of string cs to string ct case
+ * insensitive. Both cs and ct are assumed to be at least n
+ * unichars long.
+ *
+ * Results:
+ * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharNcasecmp(cs, ct, n)
+ CONST Tcl_UniChar *cs; /* Unicode string to compare to ct. */
+ CONST Tcl_UniChar *ct; /* Unicode string cs is compared to. */
+ unsigned long n; /* Number of unichars to compare. */
+{
+ for ( ; n != 0; n--, cs++, ct++) {
+ if ((*cs != *ct) &&
+ (Tcl_UniCharToLower(*cs) != Tcl_UniCharToLower(*ct))) {
+ return (*cs - *ct);
}
if (*cs == '\0') {
break;
@@ -1584,3 +1620,180 @@ Tcl_UniCharIsWordChar(ch)
return (((ALPHA_BITS | DIGIT_BITS | CONNECTOR_BITS) >> category) & 1);
}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UniCharCaseMatch --
+ *
+ * See if a particular Unicode string matches a particular pattern.
+ * Allows case insensitivity. Thie is the Unicode equivalent of
+ * the char* Tcl_StringCaseMatch.
+ *
+ * Results:
+ * The return value is 1 if string matches pattern, and
+ * 0 otherwise. The matching operation permits the following
+ * special characters in the pattern: *?\[] (see the manual
+ * entry for details on what these mean).
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_UniCharCaseMatch(string, pattern, nocase)
+ CONST Tcl_UniChar *string; /* Unicode String. */
+ CONST Tcl_UniChar *pattern; /* Pattern, which may contain special
+ * characters. */
+ int nocase; /* 0 for case sensitive, 1 for insensitive */
+{
+ Tcl_UniChar ch1, p;
+
+ while (1) {
+ p = *pattern;
+
+ /*
+ * See if we're at the end of both the pattern and the string. If
+ * so, we succeeded. If we're at the end of the pattern but not at
+ * the end of the string, we failed.
+ */
+
+ if (p == 0) {
+ return (*string == 0);
+ }
+ if ((*string == 0) && (p != '*')) {
+ return 0;
+ }
+
+ /*
+ * Check for a "*" as the next pattern character. It matches any
+ * substring. We handle this by skipping all the characters up to the
+ * next matching one in the pattern, and then calling ourselves
+ * recursively for each postfix of string, until either we match or we
+ * reach the end of the string.
+ */
+
+ if (p == '*') {
+ int pSpecial;
+ /*
+ * Skip all successive *'s in the pattern
+ */
+ while (*(++pattern) == '*') {}
+ p = *pattern;
+ if (p == 0) {
+ return 1;
+ }
+ while (1) {
+ /*
+ * Optimization for matching - cruise through the string
+ * quickly if the next char in the pattern isn't a special
+ * character
+ */
+ if ((p != '[') && (p != '?') && (p != '\\')) {
+ if (nocase) {
+ while (*string && (p != *string)
+ && (p != Tcl_UniCharToLower(*string))) {
+ string++;
+ }
+ } else {
+ while (*string && (p != *string)) { string++; }
+ }
+ }
+ if (Tcl_UniCharCaseMatch(string, pattern, nocase)) {
+ return 1;
+ }
+ if (*string == 0) {
+ return 0;
+ }
+ string++;
+ }
+ }
+
+ /*
+ * Check for a "?" as the next pattern character. It matches
+ * any single character.
+ */
+
+ if (p == '?') {
+ pattern++;
+ string++;
+ continue;
+ }
+
+ /*
+ * Check for a "[" as the next pattern character. It is followed
+ * by a list of characters that are acceptable, or by a range
+ * (two characters separated by "-").
+ */
+
+ if (p == '[') {
+ Tcl_UniChar startChar, endChar;
+
+ pattern++;
+ ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);
+ string++;
+ while (1) {
+ if ((*pattern == ']') || (*pattern == 0)) {
+ return 0;
+ }
+ startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);
+ pattern++;
+ if (*pattern == '-') {
+ pattern++;
+ if (*pattern == 0) {
+ return 0;
+ }
+ endChar = (nocase ? Tcl_UniCharToLower(*pattern)
+ : *pattern);
+ pattern++;
+ if (((startChar <= ch1) && (ch1 <= endChar))
+ || ((endChar <= ch1) && (ch1 <= startChar))) {
+ /*
+ * Matches ranges of form [a-z] or [z-a].
+ */
+ break;
+ }
+ } else if (startChar == ch1) {
+ break;
+ }
+ }
+ while (*pattern != ']') {
+ if (*pattern == 0) {
+ pattern--;
+ break;
+ }
+ pattern++;
+ }
+ pattern++;
+ continue;
+ }
+
+ /*
+ * If the next pattern character is '\', just strip off the '\'
+ * so we do exact matching on the character that follows.
+ */
+
+ if (p == '\\') {
+ if (*(++pattern) == '\0') {
+ return 0;
+ }
+ }
+
+ /*
+ * There's no special character. Just make sure that the next
+ * bytes of each string match.
+ */
+
+ if (nocase) {
+ if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) {
+ return 0;
+ }
+ } else if (*string != *pattern) {
+ return 0;
+ }
+ string++;
+ pattern++;
+ }
+}
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index 6e99f32..8a2aa94 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtil.c,v 1.17 1999/12/12 02:26:43 hobbs Exp $
+ * RCS: @(#) $Id: tclUtil.c,v 1.18 2000/05/08 21:59:59 hobbs Exp $
*/
#include "tclInt.h"
@@ -1136,131 +1136,7 @@ Tcl_StringMatch(string, pattern)
CONST char *pattern; /* Pattern, which may contain special
* characters. */
{
- int p, s;
- CONST char *pstart = pattern;
-
- while (1) {
- p = *pattern;
- s = *string;
-
- /*
- * See if we're at the end of both the pattern and the string. If
- * so, we succeeded. If we're at the end of the pattern but not at
- * the end of the string, we failed.
- */
-
- if (p == '\0') {
- if (s == '\0') {
- return 1;
- } else {
- return 0;
- }
- }
- if ((s == '\0') && (p != '*')) {
- return 0;
- }
-
- /* Check for a "*" as the next pattern character. It matches
- * any substring. We handle this by calling ourselves
- * recursively for each postfix of string, until either we
- * match or we reach the end of the string.
- */
-
- if (p == '*') {
- pattern++;
- if (*pattern == '\0') {
- return 1;
- }
- while (1) {
- if (Tcl_StringMatch(string, pattern)) {
- return 1;
- }
- if (*string == '\0') {
- return 0;
- }
- string++;
- }
- }
-
- /* Check for a "?" as the next pattern character. It matches
- * any single character.
- */
-
- if (p == '?') {
- Tcl_UniChar ch;
-
- pattern++;
- string += Tcl_UtfToUniChar(string, &ch);
- continue;
- }
-
- /* Check for a "[" as the next pattern character. It is followed
- * by a list of characters that are acceptable, or by a range
- * (two characters separated by "-").
- */
-
- if (p == '[') {
- Tcl_UniChar ch, startChar, endChar;
-
- pattern++;
- string += Tcl_UtfToUniChar(string, &ch);
-
- while (1) {
- if ((*pattern == ']') || (*pattern == '\0')) {
- return 0;
- }
- pattern += Tcl_UtfToUniChar(pattern, &startChar);
- if (*pattern == '-') {
- pattern++;
- if (*pattern == '\0') {
- return 0;
- }
- pattern += Tcl_UtfToUniChar(pattern, &endChar);
- if (((startChar <= ch) && (ch <= endChar))
- || ((endChar <= ch) && (ch <= startChar))) {
- /*
- * Matches ranges of form [a-z] or [z-a].
- */
-
- break;
- }
- } else if (startChar == ch) {
- break;
- }
- }
- while (*pattern != ']') {
- if (*pattern == '\0') {
- pattern = Tcl_UtfPrev(pattern, pstart);
- break;
- }
- pattern++;
- }
- pattern++;
- continue;
- }
-
- /* If the next pattern character is '\', just strip off the '\'
- * so we do exact matching on the character that follows.
- */
-
- if (p == '\\') {
- pattern++;
- p = *pattern;
- if (p == '\0') {
- return 0;
- }
- }
-
- /* There's no special character. Just make sure that the next
- * bytes of each string match.
- */
-
- if (s != p) {
- return 0;
- }
- pattern++;
- string++;
- }
+ return Tcl_StringCaseMatch(string, pattern, 0);
}
/*
@@ -1290,13 +1166,12 @@ Tcl_StringCaseMatch(string, pattern, nocase)
* characters. */
int nocase; /* 0 for case sensitive, 1 for insensitive */
{
- int p, s;
+ int p;
CONST char *pstart = pattern;
Tcl_UniChar ch1, ch2;
while (1) {
p = *pattern;
- s = *string;
/*
* See if we're at the end of both the pattern and the string. If
@@ -1305,35 +1180,61 @@ Tcl_StringCaseMatch(string, pattern, nocase)
*/
if (p == '\0') {
- return (s == '\0');
+ return (*string == '\0');
}
- if ((s == '\0') && (p != '*')) {
+ if ((*string == '\0') && (p != '*')) {
return 0;
}
- /* Check for a "*" as the next pattern character. It matches
+ /*
+ * Check for a "*" as the next pattern character. It matches
* any substring. We handle this by calling ourselves
* recursively for each postfix of string, until either we
* match or we reach the end of the string.
*/
if (p == '*') {
- pattern++;
- if (*pattern == '\0') {
+ /*
+ * Skip all successive *'s in the pattern
+ */
+ while (*(++pattern) == '*') {}
+ p = *pattern;
+ if (p == '\0') {
return 1;
}
while (1) {
+ /*
+ * Optimization for matching - cruise through the string
+ * quickly if the next char in the pattern isn't a special
+ * character
+ */
+ if ((p != '[') && (p != '?') && (p != '\\')) {
+ if (nocase) {
+ while (*string && (p != *string)) {
+ ch2 = Tcl_UtfToUniChar(string, &ch1);
+ if (p == Tcl_UniCharToLower(ch1)) {
+ break;
+ }
+ string += ch2;
+ }
+ } else {
+ while (*string && (p != *string)) {
+ string += Tcl_UtfToUniChar(string, &ch1);
+ }
+ }
+ }
if (Tcl_StringCaseMatch(string, pattern, nocase)) {
return 1;
}
if (*string == '\0') {
return 0;
}
- string++;
+ string += Tcl_UtfToUniChar(string, &ch1);
}
}
- /* Check for a "?" as the next pattern character. It matches
+ /*
+ * Check for a "?" as the next pattern character. It matches
* any single character.
*/
@@ -1343,11 +1244,12 @@ Tcl_StringCaseMatch(string, pattern, nocase)
continue;
}
- /* Check for a "[" as the next pattern character. It is followed
+ /*
+ * Check for a "[" as the next pattern character. It is followed
* by a list of characters that are acceptable, or by a range
* (two characters separated by "-").
*/
-
+
if (p == '[') {
Tcl_UniChar startChar, endChar;
@@ -1396,22 +1298,23 @@ Tcl_StringCaseMatch(string, pattern, nocase)
continue;
}
- /* If the next pattern character is '\', just strip off the '\'
+ /*
+ * If the next pattern character is '\', just strip off the '\'
* so we do exact matching on the character that follows.
*/
-
+
if (p == '\\') {
pattern++;
- p = *pattern;
- if (p == '\0') {
+ if (*pattern == '\0') {
return 0;
}
}
- /* There's no special character. Just make sure that the next
+ /*
+ * There's no special character. Just make sure that the next
* bytes of each string match.
*/
-
+
string += Tcl_UtfToUniChar(string, &ch1);
pattern += Tcl_UtfToUniChar(pattern, &ch2);
if (nocase) {