diff options
author | hobbs <hobbs> | 2001-06-28 01:10:15 (GMT) |
---|---|---|
committer | hobbs <hobbs> | 2001-06-28 01:10:15 (GMT) |
commit | 2b6587fc6cf0bc144393a46fcf12967e8cdcbde6 (patch) | |
tree | ad78a6f3f9022c60ffbdeac7973dab98b18e8b6d | |
parent | 1243c67dae865c0eb285d4faabaeb897e9d5ea68 (diff) | |
download | tcl-2b6587fc6cf0bc144393a46fcf12967e8cdcbde6.zip tcl-2b6587fc6cf0bc144393a46fcf12967e8cdcbde6.tar.gz tcl-2b6587fc6cf0bc144393a46fcf12967e8cdcbde6.tar.bz2 |
* tests/subst.test:
* generic/tclUtf.c (Tcl_UtfBackslash): Corrected backslash
handling of multibyte utf-8 chars. [Bug #217987]
-rw-r--r-- | generic/tclUtf.c | 23 | ||||
-rw-r--r-- | tests/subst.test | 7 |
2 files changed, 23 insertions, 7 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 2a6c217..8b39d5f 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtf.c,v 1.15 2001/04/06 10:50:00 dkf Exp $ + * RCS: @(#) $Id: tclUtf.c,v 1.16 2001/06/28 01:10:15 hobbs Exp $ */ #include "tclInt.h" @@ -111,7 +111,7 @@ static int UtfCount _ANSI_ARGS_((int ch)); *--------------------------------------------------------------------------- */ -static int +INLINE static int UtfCount(ch) int ch; /* The Tcl_UniChar whose size is returned. */ { @@ -781,7 +781,8 @@ Tcl_UtfBackslash(src, readPtr, dst) * backslash sequence. */ { register CONST char *p = src+1; - int result, count, n; + Tcl_UniChar result; + int count, n; char buf[TCL_UTF_MAX]; if (dst == NULL) { @@ -883,15 +884,25 @@ Tcl_UtfBackslash(src, readPtr, dst) result = (unsigned char)((result << 3) + (*p - '0')); break; } - result = *p; - count = 2; + if (UCHAR(*p) < UNICODE_SELF) { + result = *p; + count = 2; + } else { + /* + * We have to convert here because the user has put a + * backslash in front of a multi-byte utf-8 character. + * While this means nothing special, we shouldn't break up + * a correct utf-8 character. [Bug #217987] test subst-3.2 + */ + count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */ + } break; } if (readPtr != NULL) { *readPtr = count; } - return Tcl_UniCharToUtf(result, dst); + return Tcl_UniCharToUtf((int) result, dst); } /* diff --git a/tests/subst.test b/tests/subst.test index 1ebceb7..21b0d7e 100644 --- a/tests/subst.test +++ b/tests/subst.test @@ -11,7 +11,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: subst.test,v 1.7 2000/11/01 22:13:39 hobbs Exp $ +# RCS: @(#) $Id: subst.test,v 1.8 2001/06/28 01:10:15 hobbs Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -38,6 +38,11 @@ test subst-2.3 {simple strings} { test subst-3.1 {backslash substitutions} { subst {\x\$x\[foo bar]\\} } "x\$x\[foo bar]\\" +test subst-3.2 {backslash substitutions with utf chars} { + # 'j' is just a char that doesn't mean anything, and \344 is 'ä' + # that also doesn't mean anything, but is multi-byte in UTF-8. + list [subst \j] [subst \\j] [subst \\344] [subst \\\344] +} "j j \344 \344" test subst-4.1 {variable substitutions} { set a 44 |