summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2017-11-29 11:49:49 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2017-11-29 11:49:49 (GMT)
commit7ecb945209c9068c6438b0a1bed50e9ed49c2453 (patch)
tree71de6f63de12b987cde771a43b210e313f524d02
parent03c66864aa2ffa9871ce216b00cd661eaf1be688 (diff)
parent79b5a0a71662460559e6c6df778d3fff7ac2f3d5 (diff)
downloadtcl-7ecb945209c9068c6438b0a1bed50e9ed49c2453.zip
tcl-7ecb945209c9068c6438b0a1bed50e9ed49c2453.tar.gz
tcl-7ecb945209c9068c6438b0a1bed50e9ed49c2453.tar.bz2
Merge core-8-branch. Also, use a different value for TCL_STUB_MAGIC when TCL_UTF_MAX>4.
-rw-r--r--doc/ToUpper.32
-rw-r--r--doc/UniCharIsAlpha.32
-rw-r--r--doc/Utf.32
-rw-r--r--generic/tcl.h4
-rw-r--r--generic/tclUtf.c32
5 files changed, 28 insertions, 14 deletions
diff --git a/doc/ToUpper.3 b/doc/ToUpper.3
index b06b793..1c7a0c2 100644
--- a/doc/ToUpper.3
+++ b/doc/ToUpper.3
@@ -33,7 +33,7 @@ int
.SH ARGUMENTS
.AS char *str in/out
.AP int ch in
-The character to be converted.
+The Unicode character to be converted.
.AP char *str in/out
Pointer to UTF-8 string to be converted in place.
.BE
diff --git a/doc/UniCharIsAlpha.3 b/doc/UniCharIsAlpha.3
index e1d23ab..16b1517 100644
--- a/doc/UniCharIsAlpha.3
+++ b/doc/UniCharIsAlpha.3
@@ -53,7 +53,7 @@ The character to be examined.
.SH DESCRIPTION
.PP
-All of the routines described examine characters and return a
+All of the routines described examine Unicode characters and return a
boolean value. A non-zero return value means that the character does
belong to the character class associated with the called routine. The
rest of this document just describes the character classes associated
diff --git a/doc/Utf.3 b/doc/Utf.3
index de9545d..78d795e 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -77,7 +77,7 @@ int
Buffer in which the UTF-8 representation of the Tcl_UniChar is stored. At most
\fBTCL_UTF_MAX\fR bytes are stored in the buffer.
.AP int ch in
-The character to be converted or examined.
+The Unicode character to be converted or examined.
.AP Tcl_UniChar *chPtr out
Filled with the Tcl_UniChar represented by the head of the UTF-8 string.
.AP "const char" *src in
diff --git a/generic/tcl.h b/generic/tcl.h
index 36850d5..77dc51b 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2391,10 +2391,10 @@ typedef int (Tcl_NRPostProc) (ClientData data[], Tcl_Interp *interp,
/*
*----------------------------------------------------------------------------
* The following constant is used to test for older versions of Tcl in the
- * stubs tables.
+ * stubs tables. If TCL_UTF_MAX>4 use a different value.
*/
-#define TCL_STUB_MAGIC ((int) 0xFCA3BACF)
+#define TCL_STUB_MAGIC ((int) 0xFCA3BACF + (TCL_UTF_MAX>4))
/*
* The following function is required to be defined in all stubs aware
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index aff10c1..23acc8a 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -562,10 +562,10 @@ Tcl_UtfFindFirst(
#if TCL_UTF_MAX == 4
if (!len) {
len += TclUtfToUniChar(src, &find);
- fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
+ fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
}
#endif
- if (find == fullchar) {
+ if (fullchar == ch) {
return src;
}
if (*src == '\0') {
@@ -610,10 +610,10 @@ Tcl_UtfFindLast(
#if TCL_UTF_MAX == 4
if (!len) {
len += TclUtfToUniChar(src, &find);
- fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
+ fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000;
}
#endif
- if (find == fullchar) {
+ if (fullchar == ch) {
last = src;
}
if (*src == '\0') {
@@ -730,12 +730,27 @@ Tcl_UniCharAtIndex(
register int index) /* The position of the desired character. */
{
Tcl_UniChar ch = 0;
+ int fullchar = 0;
+#if TCL_UTF_MAX == 4
+ int len = 1;
+#endif
- while (index >= 0) {
- index--;
+ while (index-- >= 0) {
+#if TCL_UTF_MAX == 4
+ src += (len = TclUtfToUniChar(src, &ch));
+#else
src += TclUtfToUniChar(src, &ch);
+#endif
+ }
+ fullchar = ch;
+#if TCL_UTF_MAX == 4
+ if (!len) {
+ /* If last Tcl_UniChar was an upper surrogate, combine with lower surrogate */
+ (void)TclUtfToUniChar(src, &ch);
+ fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
}
- return ch;
+#endif
+ return fullchar;
}
/*
@@ -762,8 +777,7 @@ Tcl_UtfAtIndex(
{
Tcl_UniChar ch = 0;
- while (index > 0) {
- index--;
+ while (index-- > 0) {
src += TclUtfToUniChar(src, &ch);
}
return src;