diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2017-11-29 11:49:49 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2017-11-29 11:49:49 (GMT) |
commit | 7ecb945209c9068c6438b0a1bed50e9ed49c2453 (patch) | |
tree | 71de6f63de12b987cde771a43b210e313f524d02 | |
parent | 03c66864aa2ffa9871ce216b00cd661eaf1be688 (diff) | |
parent | 79b5a0a71662460559e6c6df778d3fff7ac2f3d5 (diff) | |
download | tcl-7ecb945209c9068c6438b0a1bed50e9ed49c2453.zip tcl-7ecb945209c9068c6438b0a1bed50e9ed49c2453.tar.gz tcl-7ecb945209c9068c6438b0a1bed50e9ed49c2453.tar.bz2 |
Merge core-8-branch. Also, use a different value for TCL_STUB_MAGIC when TCL_UTF_MAX>4.
-rw-r--r-- | doc/ToUpper.3 | 2 | ||||
-rw-r--r-- | doc/UniCharIsAlpha.3 | 2 | ||||
-rw-r--r-- | doc/Utf.3 | 2 | ||||
-rw-r--r-- | generic/tcl.h | 4 | ||||
-rw-r--r-- | generic/tclUtf.c | 32 |
5 files changed, 28 insertions, 14 deletions
diff --git a/doc/ToUpper.3 b/doc/ToUpper.3 index b06b793..1c7a0c2 100644 --- a/doc/ToUpper.3 +++ b/doc/ToUpper.3 @@ -33,7 +33,7 @@ int .SH ARGUMENTS .AS char *str in/out .AP int ch in -The character to be converted. +The Unicode character to be converted. .AP char *str in/out Pointer to UTF-8 string to be converted in place. .BE diff --git a/doc/UniCharIsAlpha.3 b/doc/UniCharIsAlpha.3 index e1d23ab..16b1517 100644 --- a/doc/UniCharIsAlpha.3 +++ b/doc/UniCharIsAlpha.3 @@ -53,7 +53,7 @@ The character to be examined. .SH DESCRIPTION .PP -All of the routines described examine characters and return a +All of the routines described examine Unicode characters and return a boolean value. A non-zero return value means that the character does belong to the character class associated with the called routine. The rest of this document just describes the character classes associated @@ -77,7 +77,7 @@ int Buffer in which the UTF-8 representation of the Tcl_UniChar is stored. At most \fBTCL_UTF_MAX\fR bytes are stored in the buffer. .AP int ch in -The character to be converted or examined. +The Unicode character to be converted or examined. .AP Tcl_UniChar *chPtr out Filled with the Tcl_UniChar represented by the head of the UTF-8 string. .AP "const char" *src in diff --git a/generic/tcl.h b/generic/tcl.h index 36850d5..77dc51b 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2391,10 +2391,10 @@ typedef int (Tcl_NRPostProc) (ClientData data[], Tcl_Interp *interp, /* *---------------------------------------------------------------------------- * The following constant is used to test for older versions of Tcl in the - * stubs tables. + * stubs tables. If TCL_UTF_MAX>4 use a different value. */ -#define TCL_STUB_MAGIC ((int) 0xFCA3BACF) +#define TCL_STUB_MAGIC ((int) 0xFCA3BACF + (TCL_UTF_MAX>4)) /* * The following function is required to be defined in all stubs aware diff --git a/generic/tclUtf.c b/generic/tclUtf.c index aff10c1..23acc8a 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -562,10 +562,10 @@ Tcl_UtfFindFirst( #if TCL_UTF_MAX == 4 if (!len) { len += TclUtfToUniChar(src, &find); - fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; + fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000; } #endif - if (find == fullchar) { + if (fullchar == ch) { return src; } if (*src == '\0') { @@ -610,10 +610,10 @@ Tcl_UtfFindLast( #if TCL_UTF_MAX == 4 if (!len) { len += TclUtfToUniChar(src, &find); - fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; + fullchar = (((fullchar & 0x3ff) << 10) | (find & 0x3ff)) + 0x10000; } #endif - if (find == fullchar) { + if (fullchar == ch) { last = src; } if (*src == '\0') { @@ -730,12 +730,27 @@ Tcl_UniCharAtIndex( register int index) /* The position of the desired character. */ { Tcl_UniChar ch = 0; + int fullchar = 0; +#if TCL_UTF_MAX == 4 + int len = 1; +#endif - while (index >= 0) { - index--; + while (index-- >= 0) { +#if TCL_UTF_MAX == 4 + src += (len = TclUtfToUniChar(src, &ch)); +#else src += TclUtfToUniChar(src, &ch); +#endif + } + fullchar = ch; +#if TCL_UTF_MAX == 4 + if (!len) { + /* If last Tcl_UniChar was an upper surrogate, combine with lower surrogate */ + (void)TclUtfToUniChar(src, &ch); + fullchar = (((fullchar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000; } - return ch; +#endif + return fullchar; } /* @@ -762,8 +777,7 @@ Tcl_UtfAtIndex( { Tcl_UniChar ch = 0; - while (index > 0) { - index--; + while (index-- > 0) { src += TclUtfToUniChar(src, &ch); } return src; |