summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhobbs <hobbs>2001-06-28 01:10:15 (GMT)
committerhobbs <hobbs>2001-06-28 01:10:15 (GMT)
commit2b6587fc6cf0bc144393a46fcf12967e8cdcbde6 (patch)
treead78a6f3f9022c60ffbdeac7973dab98b18e8b6d
parent1243c67dae865c0eb285d4faabaeb897e9d5ea68 (diff)
downloadtcl-2b6587fc6cf0bc144393a46fcf12967e8cdcbde6.zip
tcl-2b6587fc6cf0bc144393a46fcf12967e8cdcbde6.tar.gz
tcl-2b6587fc6cf0bc144393a46fcf12967e8cdcbde6.tar.bz2
* tests/subst.test:
* generic/tclUtf.c (Tcl_UtfBackslash): Corrected backslash handling of multibyte utf-8 chars. [Bug #217987]
-rw-r--r--generic/tclUtf.c23
-rw-r--r--tests/subst.test7
2 files changed, 23 insertions, 7 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 2a6c217..8b39d5f 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclUtf.c,v 1.15 2001/04/06 10:50:00 dkf Exp $
+ * RCS: @(#) $Id: tclUtf.c,v 1.16 2001/06/28 01:10:15 hobbs Exp $
*/
#include "tclInt.h"
@@ -111,7 +111,7 @@ static int UtfCount _ANSI_ARGS_((int ch));
*---------------------------------------------------------------------------
*/
-static int
+INLINE static int
UtfCount(ch)
int ch; /* The Tcl_UniChar whose size is returned. */
{
@@ -781,7 +781,8 @@ Tcl_UtfBackslash(src, readPtr, dst)
* backslash sequence. */
{
register CONST char *p = src+1;
- int result, count, n;
+ Tcl_UniChar result;
+ int count, n;
char buf[TCL_UTF_MAX];
if (dst == NULL) {
@@ -883,15 +884,25 @@ Tcl_UtfBackslash(src, readPtr, dst)
result = (unsigned char)((result << 3) + (*p - '0'));
break;
}
- result = *p;
- count = 2;
+ if (UCHAR(*p) < UNICODE_SELF) {
+ result = *p;
+ count = 2;
+ } else {
+ /*
+ * We have to convert here because the user has put a
+ * backslash in front of a multi-byte utf-8 character.
+ * While this means nothing special, we shouldn't break up
+ * a correct utf-8 character. [Bug #217987] test subst-3.2
+ */
+ count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */
+ }
break;
}
if (readPtr != NULL) {
*readPtr = count;
}
- return Tcl_UniCharToUtf(result, dst);
+ return Tcl_UniCharToUtf((int) result, dst);
}
/*
diff --git a/tests/subst.test b/tests/subst.test
index 1ebceb7..21b0d7e 100644
--- a/tests/subst.test
+++ b/tests/subst.test
@@ -11,7 +11,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: subst.test,v 1.7 2000/11/01 22:13:39 hobbs Exp $
+# RCS: @(#) $Id: subst.test,v 1.8 2001/06/28 01:10:15 hobbs Exp $
if {[lsearch [namespace children] ::tcltest] == -1} {
package require tcltest
@@ -38,6 +38,11 @@ test subst-2.3 {simple strings} {
test subst-3.1 {backslash substitutions} {
subst {\x\$x\[foo bar]\\}
} "x\$x\[foo bar]\\"
+test subst-3.2 {backslash substitutions with utf chars} {
+ # 'j' is just a char that doesn't mean anything, and \344 is 'ä'
+ # that also doesn't mean anything, but is multi-byte in UTF-8.
+ list [subst \j] [subst \\j] [subst \\344] [subst \\\344]
+} "j j \344 \344"
test subst-4.1 {variable substitutions} {
set a 44