summaryrefslogtreecommitdiffstats
path: root/generic/tclUtf.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r--generic/tclUtf.c49
1 files changed, 40 insertions, 9 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index fcdf80a..169f240 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -1137,7 +1137,7 @@ Tcl_UniCharAtIndex(
i = TclUtfToUniChar(src, &ch);
src += i;
}
-#if TCL_UTF_MAX <= 3
+#if TCL_UTF_MAX < 4
if ((ch >= 0xD800) && (i < 3)) {
/* Index points at character following high Surrogate */
return -1;
@@ -1153,7 +1153,7 @@ Tcl_UniCharAtIndex(
* Tcl_UtfAtIndex --
*
* Returns a pointer to the specified character (not byte) position in
- * the UTF-8 string. If TCL_UTF_MAX <= 3, characters > U+FFFF count as
+ * the UTF-8 string. If TCL_UTF_MAX < 4, characters > U+FFFF count as
* 2 positions, but then the pointer should never be placed between
* the two positions.
*
@@ -1178,7 +1178,7 @@ Tcl_UtfAtIndex(
len = TclUtfToUniChar(src, &ch);
src += len;
}
-#if TCL_UTF_MAX <= 3
+#if TCL_UTF_MAX < 4
if ((ch >= 0xD800) && (len < 3)) {
/* Index points at character following high Surrogate */
src += TclUtfToUniChar(src, &ch);
@@ -1500,7 +1500,7 @@ Tcl_UtfNcmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX <= 3
+#if TCL_UTF_MAX < 4
/* Surrogates always report higher than non-surrogates */
if (((ch1 & 0xFC00) == 0xD800)) {
if ((ch2 & 0xFC00) != 0xD800) {
@@ -1551,7 +1551,7 @@ Tcl_UtfNcasecmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX <= 3
+#if TCL_UTF_MAX < 4
/* Surrogates always report higher than non-surrogates */
if (((ch1 & 0xFC00) == 0xD800)) {
if ((ch2 & 0xFC00) != 0xD800) {
@@ -1600,7 +1600,7 @@ TclUtfCmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX <= 3
+#if TCL_UTF_MAX < 4
/* Surrogates always report higher than non-surrogates */
if (((ch1 & 0xFC00) == 0xD800)) {
if ((ch2 & 0xFC00) != 0xD800) {
@@ -1646,7 +1646,7 @@ TclUtfCasecmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
if (ch1 != ch2) {
-#if TCL_UTF_MAX <= 3
+#if TCL_UTF_MAX < 4
/* Surrogates always report higher than non-surrogates */
if (((ch1 & 0xFC00) == 0xD800)) {
if ((ch2 & 0xFC00) != 0xD800) {
@@ -1773,6 +1773,36 @@ Tcl_UniCharToTitle(
/*
*----------------------------------------------------------------------
*
+ * Tcl_Char16Len --
+ *
+ * Find the length of a UniChar string. The str input must be null
+ * terminated.
+ *
+ * Results:
+ * Returns the length of str in UniChars (not bytes).
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+Tcl_Char16Len(
+ const unsigned short *uniStr) /* Unicode string to find length of. */
+{
+ int len = 0;
+
+ while (*uniStr != '\0') {
+ len++;
+ uniStr++;
+ }
+ return len;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
* Tcl_UniCharLen --
*
* Find the length of a UniChar string. The str input must be null
@@ -1787,9 +1817,10 @@ Tcl_UniCharToTitle(
*----------------------------------------------------------------------
*/
+#undef Tcl_UniCharLen
int
Tcl_UniCharLen(
- const Tcl_UniChar *uniStr) /* Unicode string to find length of. */
+ const int *uniStr) /* Unicode string to find length of. */
{
int len = 0;
@@ -2642,7 +2673,7 @@ TclUniCharMatch(
*---------------------------------------------------------------------------
*/
-#if TCL_UTF_MAX <= 3
+#if TCL_UTF_MAX < 4
int
TclUtfToUCS4(
const char *src, /* The UTF-8 string. */