summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/Utf.349
-rw-r--r--generic/regc_locale.c2
-rw-r--r--generic/tcl.decls34
-rw-r--r--generic/tcl.h10
-rw-r--r--generic/tclBinary.c4
-rw-r--r--generic/tclDecls.h88
-rw-r--r--generic/tclEncoding.c102
-rw-r--r--generic/tclScan.c2
-rw-r--r--generic/tclStubInit.c65
-rw-r--r--generic/tclUtf.c22
10 files changed, 228 insertions, 150 deletions
diff --git a/doc/Utf.3 b/doc/Utf.3
index afcff79..f638f84 100644
--- a/doc/Utf.3
+++ b/doc/Utf.3
@@ -8,7 +8,7 @@
.so man.macros
.BS
.SH NAME
-Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_UniCharLen, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_UniCharCaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
+Tcl_UniChar, Tcl_UniCharToUtf, Tcl_UtfToUniChar, Tcl_UtfToUtf16, Tcl_UniCharToUtfDString, Tcl_UtfToUniCharDString, Tcl_Utf16ToUtfDString, Tcl_UtfToUtf16DString, Tcl_UniCharLen, Tcl_Utf16Len, Tcl_UniCharNcmp, Tcl_UniCharNcasecmp, Tcl_Utf16Ncmp, Tcl_Utf16Ncasecmp, Tcl_UniCharCaseMatch, Tcl_Utf16CaseMatch, Tcl_UtfNcmp, Tcl_UtfNcasecmp, Tcl_UtfCharComplete, Tcl_NumUtfChars, Tcl_UtfFindFirst, Tcl_UtfFindLast, Tcl_UtfNext, Tcl_UtfPrev, Tcl_UniCharAtIndex, Tcl_UtfAtIndex, Tcl_UtfBackslash \- routines for manipulating UTF-8 strings
.SH SYNOPSIS
.nf
\fB#include <tcl.h>\fR
@@ -21,25 +21,46 @@ int
int
\fBTcl_UtfToUniChar\fR(\fIsrc, chPtr\fR)
.sp
+int
+\fBTcl_UtfToUtf16\fR(\fIsrc, utf16Ptr\fR)
+.sp
char *
\fBTcl_UniCharToUtfDString\fR(\fIuniStr, uniLength, dsPtr\fR)
.sp
+char *
+\fBTcl_Utf16ToUtfDString\fR(\fIutf16Str, uniLength, dsPtr\fR)
+.sp
Tcl_UniChar *
\fBTcl_UtfToUniCharDString\fR(\fIsrc, length, dsPtr\fR)
.sp
+unsigned short *
+\fBTcl_UtfToUtf16DString\fR(\fIsrc, length, dsPtr\fR)
+.sp
int
\fBTcl_UniCharLen\fR(\fIuniStr\fR)
.sp
int
+\fBTcl_Utf16Len\fR(\fIutf16Str\fR)
+.sp
+int
\fBTcl_UniCharNcmp\fR(\fIucs, uct, numChars\fR)
.sp
int
\fBTcl_UniCharNcasecmp\fR(\fIucs, uct, numChars\fR)
.sp
int
+\fBTcl_Utf16Ncmp\fR(\fIutf16s, tf16t, numChars\fR)
+.sp
+int
+\fBTcl_Utf16Ncasecmp\fR(\fIutf16s, utf16t, numChars\fR)
+.sp
+int
\fBTcl_UniCharCaseMatch\fR(\fIuniStr, uniPattern, nocase\fR)
.sp
int
+\fBTcl_Utf16CaseMatch\fR(\fIutf16Str, utf16Pattern, nocase\fR)
+.sp
+int
\fBTcl_UtfNcmp\fR(\fIcs, ct, numChars\fR)
.sp
int
@@ -80,6 +101,8 @@ Buffer in which the UTF-8 representation of the Tcl_UniChar is stored. At most
The Unicode character to be converted or examined.
.AP Tcl_UniChar *chPtr out
Filled with the Tcl_UniChar represented by the head of the UTF-8 string.
+.AP unsigned short *utf16Ptr out
+Filled with the utf-16 represented by the head of the UTF-8 string.
.AP "const char" *src in
Pointer to a UTF-8 string.
.AP "const char" *cs in
@@ -94,6 +117,14 @@ A null-terminated Unicode string.
A null-terminated Unicode string.
.AP "const Tcl_UniChar" *uniPattern in
A null-terminated Unicode string.
+.AP "const unsigned short" *utf16Str in
+A null-terminated utf-16 string.
+.AP "const unsigned short" *utf16s in
+A null-terminated utf-16 string.
+.AP "const unsigned short" *utf16t in
+A null-terminated utf-16 string.
+.AP "const unsigned short" *utf16Pattern in
+A null-terminated utf-16 string.
.AP int length in
The length of the UTF-8 string in bytes (not UTF-8 characters). If
negative, all bytes up to the first null byte are used.
@@ -121,8 +152,8 @@ case-insensitive (1).
.SH DESCRIPTION
.PP
-These routines convert between UTF-8 strings and Unicode characters. An
-Unicode character represented as an unsigned, fixed-size
+These routines convert between UTF-8 strings and Unicode/Utf-16 characters.
+An Unicode character represented as an unsigned, fixed-size
quantity. A UTF-8 character is a Unicode character represented as
a varying-length sequence of up to \fBTCL_UTF_MAX\fR bytes. A multibyte UTF-8
sequence consists of a lead byte followed by some number of trail bytes.
@@ -133,9 +164,10 @@ represent one Unicode character in the UTF-8 representation.
\fBTcl_UniCharToUtf\fR stores the character \fIch\fR as a UTF-8 string
in starting at \fIbuf\fR. The return value is the number of bytes stored
in \fIbuf\fR. If ch is a high surrogate (range U+D800 - U+DBFF), then
-the return value will be 0 and nothing will be stored. If you still
-want to produce UTF-8 output for it (even though knowing it's an illegal
-code-point on its own), just call \fBTcl_UniCharToUtf\fR again using ch = -1.
+the return value will be 1 and a single byte in the range 0xF0 - 0xF4
+will be stored. If you still want to produce UTF-8 output for it (even
+though knowing it's an illegal code-point on its own), just call
+\fBTcl_UniCharToUtf\fR again specifying ch = -1.
.PP
\fBTcl_UtfToUniChar\fR reads one UTF-8 character starting at \fIsrc\fR
and stores it as a Tcl_UniChar in \fI*chPtr\fR. The return value is the
@@ -187,6 +219,11 @@ is the Unicode case insensitive version.
a Unicode pattern, and a boolean value specifying whether the match should
be case sensitive and returns whether the string matches the pattern.
.PP
+\fBTcl_Utf16CaseMatch\fR is the utf-16 equivalent to
+\fBTcl_StringCaseMatch\fR. It accepts a null-terminated utf-16 string,
+a utf-16 pattern, and a boolean value specifying whether the match should
+be case sensitive and returns whether the string matches the pattern.
+.PP
\fBTcl_UtfNcmp\fR corresponds to \fBstrncmp\fR for UTF-8 strings. It
accepts two null-terminated UTF-8 strings and the number of characters
to compare. (Both strings are assumed to be at least \fInumChars\fR
diff --git a/generic/regc_locale.c b/generic/regc_locale.c
index 3fa9b04..afe6298 100644
--- a/generic/regc_locale.c
+++ b/generic/regc_locale.c
@@ -833,7 +833,7 @@ element(
*/
Tcl_DStringInit(&ds);
- np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
+ np = Tcl_UniCharToUtfDString(startp, len, &ds);
for (cn=cnames; cn->name!=NULL; cn++) {
if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) {
break; /* NOTE BREAK OUT */
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 2c21b91..ca47f11 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -2381,44 +2381,44 @@ declare 643 {
# TIP #???
declare 644 {
- int *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr)
+ Tcl_Obj *Tcl_NewUnicodeObj(const int *unicode, int numChars)
}
declare 645 {
- Tcl_Obj *Tcl_NewUnicodeObj(const int *unicode, int numChars)
+ void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const int *unicode,
+ int numChars)
}
declare 646 {
- int Tcl_UtfToUniChar(const char *src, int *chPtr)
+ int *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr)
}
declare 647 {
- int Tcl_UniCharLen(const int *uniStr)
+ void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const int *unicode,
+ int length)
}
declare 648 {
- int Tcl_UniCharNcmp(const int *ucs, const int *uct,
- unsigned long numChars)
+ int Tcl_UtfToUniChar(const char *src, int *chPtr)
}
declare 649 {
- int Tcl_UniCharNcasecmp(const int *ucs, const int *uct,
- unsigned long numChars)
-}
-declare 650 {
char *Tcl_UniCharToUtfDString(const int *uniStr,
int uniLength, Tcl_DString *dsPtr)
}
-declare 651 {
+declare 650 {
int *Tcl_UtfToUniCharDString(const char *src,
int length, Tcl_DString *dsPtr)
}
+declare 651 {
+ int Tcl_UniCharLen(const int *uniStr)
+}
declare 652 {
- int Tcl_UniCharCaseMatch(const int *uniStr,
- const int *uniPattern, int nocase)
+ int Tcl_UniCharNcmp(const int *ucs, const int *uct,
+ unsigned long numChars)
}
declare 653 {
- void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const int *unicode,
- int length)
+ int Tcl_UniCharNcasecmp(const int *ucs, const int *uct,
+ unsigned long numChars)
}
declare 654 {
- void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const int *unicode,
- int numChars)
+ int Tcl_UniCharCaseMatch(const int *uniStr,
+ const int *uniPattern, int nocase)
}
diff --git a/generic/tcl.h b/generic/tcl.h
index 63d845d..e168c60 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -2152,13 +2152,9 @@ typedef struct Tcl_EncodingType {
#if TCL_UTF_MAX > 3
/*
- * unsigned int isn't 100% accurate as it should be a strict 4-byte value
- * (perhaps wchar_t). 64-bit systems may have troubles. The size of this
- * value must be reflected correctly in regcustom.h and
- * in tclEncoding.c.
- * XXX: Tcl is currently UCS-2 and planning UTF-16 for the Unicode
- * XXX: string rep that Tcl_UniChar represents. Changing the size
- * XXX: of Tcl_UniChar is /not/ supported.
+ * int isn't 100% accurate as it should be a strict 4-byte value
+ * (perhaps wchar_t). ILP64 systems may have troubles. The size of this
+ * value must be reflected correctly in regcustom.h.
*/
typedef int Tcl_UniChar;
#else
diff --git a/generic/tclBinary.c b/generic/tclBinary.c
index 8600b3f..1f78d18 100644
--- a/generic/tclBinary.c
+++ b/generic/tclBinary.c
@@ -1354,7 +1354,7 @@ BinaryFormatCmd(
badField:
{
Tcl_UniChar ch = 0;
- char buf[5] = "";
+ char buf[TCL_UTF_MAX + 1] = "";
TclUtfToUniChar(errorString, &ch);
buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
@@ -1724,7 +1724,7 @@ BinaryScanCmd(
badField:
{
Tcl_UniChar ch = 0;
- char buf[5] = "";
+ char buf[TCL_UTF_MAX + 1] = "";
TclUtfToUniChar(errorString, &ch);
buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index f0d0b4c..7a8ad87 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -1897,35 +1897,35 @@ EXTERN void Tcl_DecrRefCount(Tcl_Obj *objPtr);
/* 643 */
EXTERN int Tcl_IsShared(Tcl_Obj *objPtr);
/* 644 */
-EXTERN int * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr,
- int *lengthPtr);
-/* 645 */
EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const int *unicode, int numChars);
+/* 645 */
+EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr,
+ const int *unicode, int numChars);
/* 646 */
-EXTERN int Tcl_UtfToUniChar(const char *src, int *chPtr);
+EXTERN int * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr,
+ int *lengthPtr);
/* 647 */
-EXTERN int Tcl_UniCharLen(const int *uniStr);
+EXTERN void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr,
+ const int *unicode, int length);
/* 648 */
-EXTERN int Tcl_UniCharNcmp(const int *ucs, const int *uct,
- unsigned long numChars);
+EXTERN int Tcl_UtfToUniChar(const char *src, int *chPtr);
/* 649 */
-EXTERN int Tcl_UniCharNcasecmp(const int *ucs, const int *uct,
- unsigned long numChars);
-/* 650 */
EXTERN char * Tcl_UniCharToUtfDString(const int *uniStr,
int uniLength, Tcl_DString *dsPtr);
-/* 651 */
+/* 650 */
EXTERN int * Tcl_UtfToUniCharDString(const char *src, int length,
Tcl_DString *dsPtr);
+/* 651 */
+EXTERN int Tcl_UniCharLen(const int *uniStr);
/* 652 */
-EXTERN int Tcl_UniCharCaseMatch(const int *uniStr,
- const int *uniPattern, int nocase);
+EXTERN int Tcl_UniCharNcmp(const int *ucs, const int *uct,
+ unsigned long numChars);
/* 653 */
-EXTERN void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr,
- const int *unicode, int length);
+EXTERN int Tcl_UniCharNcasecmp(const int *ucs, const int *uct,
+ unsigned long numChars);
/* 654 */
-EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr,
- const int *unicode, int numChars);
+EXTERN int Tcl_UniCharCaseMatch(const int *uniStr,
+ const int *uniPattern, int nocase);
typedef struct {
const struct TclPlatStubs *tclPlatStubs;
@@ -2605,17 +2605,17 @@ typedef struct TclStubs {
void (*tcl_IncrRefCount) (Tcl_Obj *objPtr); /* 641 */
void (*tcl_DecrRefCount) (Tcl_Obj *objPtr); /* 642 */
int (*tcl_IsShared) (Tcl_Obj *objPtr); /* 643 */
- int * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 644 */
- Tcl_Obj * (*tcl_NewUnicodeObj) (const int *unicode, int numChars); /* 645 */
- int (*tcl_UtfToUniChar) (const char *src, int *chPtr); /* 646 */
- int (*tcl_UniCharLen) (const int *uniStr); /* 647 */
- int (*tcl_UniCharNcmp) (const int *ucs, const int *uct, unsigned long numChars); /* 648 */
- int (*tcl_UniCharNcasecmp) (const int *ucs, const int *uct, unsigned long numChars); /* 649 */
- char * (*tcl_UniCharToUtfDString) (const int *uniStr, int uniLength, Tcl_DString *dsPtr); /* 650 */
- int * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 651 */
- int (*tcl_UniCharCaseMatch) (const int *uniStr, const int *uniPattern, int nocase); /* 652 */
- void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const int *unicode, int length); /* 653 */
- void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const int *unicode, int numChars); /* 654 */
+ Tcl_Obj * (*tcl_NewUnicodeObj) (const int *unicode, int numChars); /* 644 */
+ void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const int *unicode, int numChars); /* 645 */
+ int * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 646 */
+ void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const int *unicode, int length); /* 647 */
+ int (*tcl_UtfToUniChar) (const char *src, int *chPtr); /* 648 */
+ char * (*tcl_UniCharToUtfDString) (const int *uniStr, int uniLength, Tcl_DString *dsPtr); /* 649 */
+ int * (*tcl_UtfToUniCharDString) (const char *src, int length, Tcl_DString *dsPtr); /* 650 */
+ int (*tcl_UniCharLen) (const int *uniStr); /* 651 */
+ int (*tcl_UniCharNcmp) (const int *ucs, const int *uct, unsigned long numChars); /* 652 */
+ int (*tcl_UniCharNcasecmp) (const int *ucs, const int *uct, unsigned long numChars); /* 653 */
+ int (*tcl_UniCharCaseMatch) (const int *uniStr, const int *uniPattern, int nocase); /* 654 */
} TclStubs;
extern const TclStubs *tclStubsPtr;
@@ -3934,28 +3934,28 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_DecrRefCount) /* 642 */
#define Tcl_IsShared \
(tclStubsPtr->tcl_IsShared) /* 643 */
-#define Tcl_GetUnicodeFromObj \
- (tclStubsPtr->tcl_GetUnicodeFromObj) /* 644 */
#define Tcl_NewUnicodeObj \
- (tclStubsPtr->tcl_NewUnicodeObj) /* 645 */
+ (tclStubsPtr->tcl_NewUnicodeObj) /* 644 */
+#define Tcl_SetUnicodeObj \
+ (tclStubsPtr->tcl_SetUnicodeObj) /* 645 */
+#define Tcl_GetUnicodeFromObj \
+ (tclStubsPtr->tcl_GetUnicodeFromObj) /* 646 */
+#define Tcl_AppendUnicodeToObj \
+ (tclStubsPtr->tcl_AppendUnicodeToObj) /* 647 */
#define Tcl_UtfToUniChar \
- (tclStubsPtr->tcl_UtfToUniChar) /* 646 */
+ (tclStubsPtr->tcl_UtfToUniChar) /* 648 */
+#define Tcl_UniCharToUtfDString \
+ (tclStubsPtr->tcl_UniCharToUtfDString) /* 649 */
+#define Tcl_UtfToUniCharDString \
+ (tclStubsPtr->tcl_UtfToUniCharDString) /* 650 */
#define Tcl_UniCharLen \
- (tclStubsPtr->tcl_UniCharLen) /* 647 */
+ (tclStubsPtr->tcl_UniCharLen) /* 651 */
#define Tcl_UniCharNcmp \
- (tclStubsPtr->tcl_UniCharNcmp) /* 648 */
+ (tclStubsPtr->tcl_UniCharNcmp) /* 652 */
#define Tcl_UniCharNcasecmp \
- (tclStubsPtr->tcl_UniCharNcasecmp) /* 649 */
-#define Tcl_UniCharToUtfDString \
- (tclStubsPtr->tcl_UniCharToUtfDString) /* 650 */
-#define Tcl_UtfToUniCharDString \
- (tclStubsPtr->tcl_UtfToUniCharDString) /* 651 */
+ (tclStubsPtr->tcl_UniCharNcasecmp) /* 653 */
#define Tcl_UniCharCaseMatch \
- (tclStubsPtr->tcl_UniCharCaseMatch) /* 652 */
-#define Tcl_AppendUnicodeToObj \
- (tclStubsPtr->tcl_AppendUnicodeToObj) /* 653 */
-#define Tcl_SetUnicodeObj \
- (tclStubsPtr->tcl_SetUnicodeObj) /* 654 */
+ (tclStubsPtr->tcl_UniCharCaseMatch) /* 654 */
#endif /* defined(USE_TCL_STUBS) */
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 3c73c68..34fd551 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -569,11 +569,16 @@ TclInitEncodingSubsystem(void)
TableEncodingData *dataPtr;
unsigned size;
unsigned short i;
+ union {
+ char c;
+ short s;
+ } isLe;
if (encodingsInitialized) {
return;
}
+ isLe.s = 1;
Tcl_MutexLock(&encodingMutex);
Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
Tcl_MutexUnlock(&encodingMutex);
@@ -600,20 +605,32 @@ TclInitEncodingSubsystem(void)
type.clientData = NULL;
Tcl_CreateEncoding(&type);
- type.encodingName = "ucs-2";
type.toUtfProc = Utf16ToUtfProc;
type.fromUtfProc = UtfToUcs2Proc;
type.freeProc = NULL;
type.nullSize = 2;
- type.clientData = NULL;
+ type.encodingName = "ucs-2le";
+ type.clientData = INT2PTR(1);
+ Tcl_CreateEncoding(&type);
+ type.encodingName = "ucs-2be";
+ type.clientData = INT2PTR(0);
+ Tcl_CreateEncoding(&type);
+ type.encodingName = "ucs-2";
+ type.clientData = INT2PTR(isLe.c);
Tcl_CreateEncoding(&type);
- type.encodingName = "utf-16";
type.toUtfProc = Utf16ToUtfProc;
type.fromUtfProc = UtfToUtf16Proc;
type.freeProc = NULL;
type.nullSize = 2;
- type.clientData = NULL;
+ type.encodingName = "utf-16le";
+ type.clientData = INT2PTR(1);;
+ Tcl_CreateEncoding(&type);
+ type.encodingName = "utf-16be";
+ type.clientData = INT2PTR(0);
+ Tcl_CreateEncoding(&type);
+ type.encodingName = "utf-16";
+ type.clientData = INT2PTR(isLe.c);;
Tcl_CreateEncoding(&type);
#ifndef TCL_NO_DEPRECATED
@@ -2434,7 +2451,7 @@ UtfToUtfProc(
static int
Utf16ToUtfProc(
- ClientData clientData, /* Not used. */
+ ClientData clientData, /* != NULL means LE, == NUL means BE */
const char *src, /* Source string in Unicode. */
int srcLen, /* Source string length in bytes. */
int flags, /* Conversion control flags. */
@@ -2486,12 +2503,15 @@ Utf16ToUtfProc(
break;
}
+ if (clientData) {
+ ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
+ } else {
+ ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
+ }
/*
* Special case for 1-byte utf chars for speed. Make sure we work with
* unsigned short-size data.
*/
-
- ch = *(unsigned short *)src;
if (ch && ch < 0x80) {
*dst++ = (ch & 0xFF);
} else {
@@ -2524,8 +2544,7 @@ Utf16ToUtfProc(
static int
UtfToUtf16Proc(
- ClientData clientData, /* TableEncodingData that specifies
- * encoding. */
+ ClientData clientData, /* != NULL means LE, == NUL means BE */
const char *src, /* Source string in UTF-8. */
int srcLen, /* Source string length in bytes. */
int flags, /* Conversion control flags. */
@@ -2589,37 +2608,37 @@ UtfToUtf16Proc(
* casting dst to a Tcl_UniChar. [Bug 1122671]
*/
-#ifdef WORDS_BIGENDIAN
+ if (clientData) {
#if TCL_UTF_MAX > 3
- if (*chPtr <= 0xFFFF) {
- *dst++ = (*chPtr >> 8);
- *dst++ = (*chPtr & 0xFF);
- } else {
- *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
- *dst++ = (*chPtr & 0xFF);
- *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
- *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
- }
-#else
- *dst++ = (*chPtr >> 8);
- *dst++ = (*chPtr & 0xFF);
-#endif
+ if (*chPtr <= 0xFFFF) {
+ *dst++ = (*chPtr & 0xFF);
+ *dst++ = (*chPtr >> 8);
+ } else {
+ *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
+ *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
+ *dst++ = (*chPtr & 0xFF);
+ *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
+ }
#else
-#if TCL_UTF_MAX > 3
- if (*chPtr <= 0xFFFF) {
*dst++ = (*chPtr & 0xFF);
*dst++ = (*chPtr >> 8);
+#endif
} else {
- *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
- *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
- *dst++ = (*chPtr & 0xFF);
- *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
- }
+#if TCL_UTF_MAX > 3
+ if (*chPtr <= 0xFFFF) {
+ *dst++ = (*chPtr >> 8);
+ *dst++ = (*chPtr & 0xFF);
+ } else {
+ *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
+ *dst++ = (*chPtr & 0xFF);
+ *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
+ *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
+ }
#else
- *dst++ = (*chPtr & 0xFF);
- *dst++ = (*chPtr >> 8);
-#endif
+ *dst++ = (*chPtr >> 8);
+ *dst++ = (*chPtr & 0xFF);
#endif
+ }
}
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
@@ -2645,8 +2664,7 @@ UtfToUtf16Proc(
static int
UtfToUcs2Proc(
- ClientData clientData, /* TableEncodingData that specifies
- * encoding. */
+ ClientData clientData, /* != NULL means LE, == NUL means BE */
const char *src, /* Source string in UTF-8. */
int srcLen, /* Source string length in bytes. */
int flags, /* Conversion control flags. */
@@ -2721,13 +2739,13 @@ UtfToUcs2Proc(
* casting dst to a Tcl_UniChar. [Bug 1122671]
*/
-#ifdef WORDS_BIGENDIAN
- *dst++ = (ch >> 8);
- *dst++ = (ch & 0xFF);
-#else
- *dst++ = (ch & 0xFF);
- *dst++ = (ch >> 8);
-#endif
+ if (clientData) {
+ *dst++ = (ch & 0xFF);
+ *dst++ = (ch >> 8);
+ } else {
+ *dst++ = (ch >> 8);
+ *dst++ = (ch & 0xFF);
+ }
}
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
diff --git a/generic/tclScan.c b/generic/tclScan.c
index 068450c..b03664f 100644
--- a/generic/tclScan.c
+++ b/generic/tclScan.c
@@ -261,7 +261,7 @@ ValidateFormat(
Tcl_UniChar ch = 0;
int objIndex, xpgSize, nspace = numVars;
int *nassign = TclStackAlloc(interp, nspace * sizeof(int));
- char buf[5] = "";
+ char buf[TCL_UTF_MAX + 1] = "";
Tcl_Obj *errorMsg; /* Place to build an error messages. Note that
* these are messy operations because we do
* not want to use the formatting engine;
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index ca26e8d..34619c2 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -80,15 +80,21 @@ static void uniCodePanic() {
}
#if TCL_UTF_MAX == 3
-#ifdef TCL_NO_DEPRECATED
-# define Tcl_GetUnicode 0
-#endif
# define Tcl_GetUnicodeFromObj (int *(*)(Tcl_Obj *, int *)) uniCodePanic
# define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const int *, int)) uniCodePanic
-# define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *,const int *, int)) uniCodePanic
-# define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const int *, int)) uniCodePanic
+# define Tcl_SetUnicodeObj (void (*)(Tcl_Obj *,const int *, int)) uniCodePanic
+# define Tcl_AppendUnicodeToObj (void (*)(Tcl_Obj *, const int *, int)) uniCodePanic
+# define Tcl_UtfToUniChar (int (*)(const char *, int *)) uniCodePanic
+# define Tcl_UniCharToUtfDString (char *(*)(const int *, int, Tcl_DString *)) uniCodePanic
+# define Tcl_UtfToUniCharDString (int *(*)(const char *, int, Tcl_DString *)) uniCodePanic
+# define Tcl_UniCharCaseMatch (int (*)(const int *, const int *, int)) uniCodePanic
+# define Tcl_UniCharLen (int (*)(const int *)) uniCodePanic
+# define Tcl_UniCharNcmp (int (*)(const int *, const int *, unsigned long)) uniCodePanic
+# define Tcl_UniCharNcasecmp (int (*)(const int *, const int *, unsigned long)) uniCodePanic
#else
-# define Tcl_GetUnicode (unsigned short *(*)(Tcl_Obj *)) uniCodePanic
+#if !defined(TCL_NO_DEPRECATED) && TCL_MAJOR_VERSION < 9
+# define Tcl_GetUnicode (unsigned short *(*)(Tcl_Obj *)) uniCodePanic
+# endif
# define Tcl_GetUtf16FromObj (unsigned short *(*)(Tcl_Obj *, int *)) uniCodePanic
# define Tcl_NewUtf16Obj (Tcl_Obj *(*)(const unsigned short *, int)) uniCodePanic
# define Tcl_SetUtf16Obj (void(*)(Tcl_Obj *, const unsigned short *, int)) uniCodePanic
@@ -137,6 +143,8 @@ static int TclSockMinimumBuffersOld(int sock, int size)
# define Tcl_NewLongObj 0
# define Tcl_DbNewLongObj 0
# define Tcl_BackgroundError 0
+# define Tcl_GetUnicode 0
+
#else
#define TclBNInitBignumFromLong initBignumFromLong
static void TclBNInitBignumFromLong(mp_int *a, long b)
@@ -341,10 +349,6 @@ static int exprIntObj(Tcl_Interp *interp, Tcl_Obj*expr, int *ptr){
return result;
}
#define Tcl_ExprLongObj (int(*)(Tcl_Interp*,Tcl_Obj*,long*))exprIntObj
-static int uniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){
- return Tcl_UniCharNcmp(ucs, uct, (unsigned long)n);
-}
-#define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))uniCharNcmp
static int utfNcmp(const char *s1, const char *s2, unsigned int n){
return Tcl_UtfNcmp(s1, s2, (unsigned long)n);
}
@@ -353,10 +357,25 @@ static int utfNcasecmp(const char *s1, const char *s2, unsigned int n){
return Tcl_UtfNcasecmp(s1, s2, (unsigned long)n);
}
#define Tcl_UtfNcasecmp (int(*)(const char*,const char*,unsigned long))utfNcasecmp
-static int uniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned int n){
+#if TCL_UTF_MAX > 3
+static int uniCharNcmp(const int *ucs, const int *uct, unsigned int n){
+ return Tcl_UniCharNcmp(ucs, uct, (unsigned long)n);
+}
+#define Tcl_UniCharNcmp (int(*)(const int*,const int*,unsigned long))uniCharNcmp
+static int uniCharNcasecmp(const int *ucs, const int *uct, unsigned int n){
return Tcl_UniCharNcasecmp(ucs, uct, (unsigned long)n);
}
-#define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar*,const Tcl_UniChar*,unsigned long))uniCharNcasecmp
+#define Tcl_UniCharNcasecmp (int(*)(const int*,const int*,unsigned long))uniCharNcasecmp
+#else
+static int utf16Ncmp(const unsigned short *ucs, const unsigned short *uct, unsigned int n){
+ return Tcl_Utf16Ncmp(ucs, uct, (unsigned long)n);
+}
+#define Tcl_Utf16Ncmp (int(*)(const unsigned short*,const unsigned short*,unsigned long))utf16Ncmp
+static int utf16Ncasecmp(const unsigned short *ucs, const unsigned short *uct, unsigned int n){
+ return Tcl_Utf16Ncasecmp(ucs, uct, (unsigned long)n);
+}
+#define Tcl_Utf16Ncasecmp (int(*)(const unsigned short*,const unsigned short*,unsigned long))utf16Ncasecmp
+#endif
#endif /* TCL_WIDE_INT_IS_LONG */
@@ -1659,17 +1678,17 @@ const TclStubs tclStubs = {
Tcl_IncrRefCount, /* 641 */
Tcl_DecrRefCount, /* 642 */
Tcl_IsShared, /* 643 */
- Tcl_GetUnicodeFromObj, /* 644 */
- Tcl_NewUnicodeObj, /* 645 */
- Tcl_UtfToUniChar, /* 646 */
- Tcl_UniCharLen, /* 647 */
- Tcl_UniCharNcmp, /* 648 */
- Tcl_UniCharNcasecmp, /* 649 */
- Tcl_UniCharToUtfDString, /* 650 */
- Tcl_UtfToUniCharDString, /* 651 */
- Tcl_UniCharCaseMatch, /* 652 */
- Tcl_AppendUnicodeToObj, /* 653 */
- Tcl_SetUnicodeObj, /* 654 */
+ Tcl_NewUnicodeObj, /* 644 */
+ Tcl_SetUnicodeObj, /* 645 */
+ Tcl_GetUnicodeFromObj, /* 646 */
+ Tcl_AppendUnicodeToObj, /* 647 */
+ Tcl_UtfToUniChar, /* 648 */
+ Tcl_UniCharToUtfDString, /* 649 */
+ Tcl_UtfToUniCharDString, /* 650 */
+ Tcl_UniCharLen, /* 651 */
+ Tcl_UniCharNcmp, /* 652 */
+ Tcl_UniCharNcasecmp, /* 653 */
+ Tcl_UniCharCaseMatch, /* 654 */
};
/* !END!: Do not edit above this line. */
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index c5a2ca5..7866afd 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -223,7 +223,7 @@ three:
*---------------------------------------------------------------------------
*/
-#undef Tcl_UniCharToUtfDString
+#if TCL_UTF_MAX > 3
char *
Tcl_UniCharToUtfDString(
const int *uniStr, /* Unicode string to convert to UTF-8. */
@@ -253,6 +253,7 @@ Tcl_UniCharToUtfDString(
return string;
}
+#endif /* TCL_UTF_MAX > 3 */
char *
Tcl_Utf16ToUtfDString(
@@ -337,7 +338,7 @@ static const unsigned short cp1252[32] = {
0x2DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x9D, 0x017E, 0x0178
};
-#undef Tcl_UtfToUniChar
+#if TCL_UTF_MAX > 3
int
Tcl_UtfToUniChar(
const char *src, /* The UTF-8 string. */
@@ -421,6 +422,7 @@ Tcl_UtfToUniChar(
*chPtr = byte;
return 1;
}
+#endif /* TCL_UTF_MAX > 3 */
int
Tcl_UtfToUtf16(
@@ -540,7 +542,7 @@ Tcl_UtfToUtf16(
*---------------------------------------------------------------------------
*/
-#undef Tcl_UtfToUniCharDString
+#if TCL_UTF_MAX > 3
int *
Tcl_UtfToUniCharDString(
const char *src, /* UTF-8 string to convert to Unicode. */
@@ -593,6 +595,7 @@ Tcl_UtfToUniCharDString(
return wString;
}
+#endif /* TCL_UTF_MAX > 3 */
unsigned short *
Tcl_UtfToUtf16DString(
@@ -1636,7 +1639,7 @@ Tcl_UniCharToTitle(
*----------------------------------------------------------------------
*/
-#undef Tcl_UniCharLen
+#if TCL_UTF_MAX > 3
int
Tcl_UniCharLen(
const int *uniStr) /* Unicode string to find length of. */
@@ -1649,6 +1652,7 @@ Tcl_UniCharLen(
}
return len;
}
+#endif /* TCL_UTF_MAX > 3 */
int
Tcl_Utf16Len(
@@ -1680,7 +1684,7 @@ Tcl_Utf16Len(
*----------------------------------------------------------------------
*/
-#undef Tcl_UniCharNcmp
+#if TCL_UTF_MAX > 3
int
Tcl_UniCharNcmp(
const int *ucs, /* Unicode string to compare to uct. */
@@ -1707,6 +1711,7 @@ Tcl_UniCharNcmp(
return 0;
#endif /* WORDS_BIGENDIAN */
}
+#endif /* TCL_UTF_MAX > 3 */
int
Tcl_Utf16Ncmp(
@@ -1753,7 +1758,7 @@ Tcl_Utf16Ncmp(
*----------------------------------------------------------------------
*/
-#undef Tcl_UniCharNcasecmp
+#if TCL_UTF_MAX > 3
int
Tcl_UniCharNcasecmp(
const int *ucs, /* Unicode string to compare to uct. */
@@ -1772,6 +1777,8 @@ Tcl_UniCharNcasecmp(
}
return 0;
}
+#endif /* TCL_UTF_MAX > 3 */
+
int
Tcl_Utf16Ncasecmp(
const unsigned short *ucs, /* Utf16 string to compare to uct. */
@@ -2123,7 +2130,7 @@ Tcl_UniCharIsWordChar(
*----------------------------------------------------------------------
*/
-#undef Tcl_UniCharCaseMatch
+#if TCL_UTF_MAX > 3
int
Tcl_UniCharCaseMatch(
const int *uniStr, /* Unicode String. */
@@ -2290,6 +2297,7 @@ Tcl_UniCharCaseMatch(
uniPattern++;
}
}
+#endif /* TCL_UTF_MAX > 3 */
int
Tcl_Utf16CaseMatch(