summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2016-08-30 13:07:10 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2016-08-30 13:07:10 (GMT)
commit5b7e79f851f3ec2a106bc7520161e934b75ea3b7 (patch)
tree4c13a17c50a6283243312fe0ed36e17d16d4dfc9
parent0cce018c67cb5200e8d82476ae2540d398a5d9d3 (diff)
parent3b2cb79ab18e9506dfb66e2786a7ee04dd30a780 (diff)
downloadtcl-5b7e79f851f3ec2a106bc7520161e934b75ea3b7.zip
tcl-5b7e79f851f3ec2a106bc7520161e934b75ea3b7.tar.gz
tcl-5b7e79f851f3ec2a106bc7520161e934b75ea3b7.tar.bz2
Don't ever allow UTF-8 sequences of more than 4 characters to be generated or parsed, even when TCL_UTF_MAX>4: According to current Unicode standard, a byte string of >4 characters can never form a single UTF-8 character.
And a few minor micro-optimizations related to UTF-8 handling.
-rw-r--r--generic/tclExecute.c2
-rw-r--r--generic/tclUtf.c68
-rw-r--r--generic/tclVar.c4
-rwxr-xr-xwin/tclWinFile.c8
-rw-r--r--win/tclWinPipe.c2
-rw-r--r--win/tclWinPort.h2
6 files changed, 33 insertions, 53 deletions
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index a2a465a..704e494 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -3173,7 +3173,7 @@ TEBCresume(
Tcl_Obj *copyPtr = Tcl_NewListObj(objc - opnd + 1, NULL);
Tcl_ListObjAppendElement(NULL, copyPtr, objPtr);
- Tcl_ListObjReplace(NULL, copyPtr, LIST_MAX, 0,
+ Tcl_ListObjReplace(NULL, copyPtr, LIST_MAX, 0,
objc - opnd, objv + opnd);
Tcl_DecrRefCount(objPtr);
objPtr = copyPtr;
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 6c4cb7f..b33bf6a 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -73,16 +73,7 @@ static const unsigned char totalBytes[256] = {
#else
1,1,1,1,1,1,1,1,
#endif
-#if TCL_UTF_MAX > 4
- 5,5,5,5,
-#else
- 1,1,1,1,
-#endif
-#if TCL_UTF_MAX > 5
- 6,6,6,6
-#else
- 1,1,1,1
-#endif
+ 1,1,1,1,1,1,1,1
};
/*
@@ -105,14 +96,14 @@ int
TclUtfCount(
int ch) /* The Tcl_UniChar whose size is returned. */
{
- if ((ch > 0) && (ch < UNICODE_SELF)) {
+ if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) {
return 1;
}
if (ch <= 0x7FF) {
return 2;
}
#if TCL_UTF_MAX > 3
- if ((ch > 0xFFFF) && (ch <= 0x10FFFF)) {
+ if (((unsigned)(ch - 0x10000) <= 0xfffff)) {
return 4;
}
#endif
@@ -146,7 +137,7 @@ Tcl_UniCharToUtf(
* large enough to hold the UTF-8 character
* (at most TCL_UTF_MAX bytes). */
{
- if ((ch > 0) && (ch < UNICODE_SELF)) {
+ if ((unsigned)(ch - 1) < (UNICODE_SELF - 1)) {
buf[0] = (char) ch;
return 1;
}
@@ -174,11 +165,7 @@ Tcl_UniCharToUtf(
}
}
#endif
- three:
- buf[2] = (char) ((ch | 0x80) & 0xBF);
- buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
- buf[0] = (char) ((ch >> 12) | 0xE0);
- return 3;
+ goto three;
}
#if TCL_UTF_MAX > 3
@@ -193,7 +180,11 @@ Tcl_UniCharToUtf(
}
ch = 0xFFFD;
- goto three;
+three:
+ buf[2] = (char) ((ch | 0x80) & 0xBF);
+ buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[0] = (char) ((ch >> 12) | 0xE0);
+ return 3;
}
/*
@@ -308,9 +299,6 @@ Tcl_UtfToUniChar(
* A two-byte-character lead-byte not followed by trail-byte
* represents itself.
*/
-
- *chPtr = (Tcl_UniChar) byte;
- return 1;
} else if (byte < 0xF0) {
if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80)) {
/*
@@ -326,31 +314,23 @@ Tcl_UtfToUniChar(
* A three-byte-character lead-byte not followed by two trail-bytes
* represents itself.
*/
-
- *chPtr = (Tcl_UniChar) byte;
- return 1;
}
#if TCL_UTF_MAX > 3
- {
- int ch, total, trail;
-
- total = totalBytes[byte];
- trail = total - 1;
- if (trail > 0) {
- ch = byte & (0x3F >> trail);
- do {
- src++;
- if ((*src & 0xC0) != 0x80) {
- *chPtr = byte;
- return 1;
- }
- ch <<= 6;
- ch |= (*src & 0x3F);
- trail--;
- } while (trail > 0);
- *chPtr = ch;
- return total;
+ else if (byte < 0xF8) {
+ if (((src[1] & 0xC0) == 0x80) && ((src[2] & 0xC0) == 0x80) && ((src[3] & 0xC0) == 0x80)) {
+ /*
+ * Four-byte-character lead byte followed by three trail bytes.
+ */
+
+ *chPtr = (Tcl_UniChar) (((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12)
+ | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
+ return 4;
}
+
+ /*
+ * A three-byte-character lead-byte not followed by two trail-bytes
+ * represents itself.
+ */
}
#endif
diff --git a/generic/tclVar.c b/generic/tclVar.c
index 55eb436..20bc208 100644
--- a/generic/tclVar.c
+++ b/generic/tclVar.c
@@ -2983,7 +2983,7 @@ ArrayAnyMoreCmd(
if (varPtr == NULL) {
return TCL_ERROR;
}
-
+
/*
* Get the search.
*/
@@ -3056,7 +3056,7 @@ ArrayNextElementCmd(
varPtr = VerifyArray(interp, varNameObj);
if (varPtr == NULL) {
return TCL_ERROR;
- }
+ }
/*
* Get the search.
diff --git a/win/tclWinFile.c b/win/tclWinFile.c
index 3a856a1..9458933 100755
--- a/win/tclWinFile.c
+++ b/win/tclWinFile.c
@@ -3166,8 +3166,8 @@ TclWinFileOwned(
case we are in all likelihood not the owner */
return 0;
}
-
- /*
+
+ /*
* Getting the current process SID is a multi-step process.
* We make the assumption that if a call fails, this process is
* so underprivileged it could not possibly own anything. Normally
@@ -3191,10 +3191,10 @@ TclWinFileOwned(
LocalFree(secd); /* Also frees ownerSid */
if (buf)
ckfree(buf);
-
+
return (owned != 0); /* Convert non-0 to 1 */
}
-
+
/*
* Local Variables:
* mode: c
diff --git a/win/tclWinPipe.c b/win/tclWinPipe.c
index 382addd..4666deb 100644
--- a/win/tclWinPipe.c
+++ b/win/tclWinPipe.c
@@ -1337,7 +1337,7 @@ ApplicationType(
Tcl_DStringFree(&ds);
ext = strrchr(fullName, '.');
- if ((ext != NULL) &&
+ if ((ext != NULL) &&
(strcasecmp(ext, ".cmd") == 0 || strcasecmp(ext, ".bat") == 0)) {
applType = APPL_DOS;
break;
diff --git a/win/tclWinPort.h b/win/tclWinPort.h
index b486466..159a708 100644
--- a/win/tclWinPort.h
+++ b/win/tclWinPort.h
@@ -360,7 +360,7 @@ typedef DWORD_PTR * PDWORD_PTR;
# define S_IFLNK 0120000 /* Symbolic Link */
#endif
-/*
+/*
* Windows compilers do not define S_IFBLK. However, Tcl uses it in
* GetTypeFromMode to identify blockSpecial devices based on the
* value in the statsbuf st_mode field. We have no other way to pass this