From 2b6587fc6cf0bc144393a46fcf12967e8cdcbde6 Mon Sep 17 00:00:00 2001 From: hobbs Date: Thu, 28 Jun 2001 01:10:15 +0000 Subject: * tests/subst.test: * generic/tclUtf.c (Tcl_UtfBackslash): Corrected backslash handling of multibyte utf-8 chars. [Bug #217987] --- generic/tclUtf.c | 23 +++++++++++++++++------ tests/subst.test | 7 ++++++- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 2a6c217..8b39d5f 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtf.c,v 1.15 2001/04/06 10:50:00 dkf Exp $ + * RCS: @(#) $Id: tclUtf.c,v 1.16 2001/06/28 01:10:15 hobbs Exp $ */ #include "tclInt.h" @@ -111,7 +111,7 @@ static int UtfCount _ANSI_ARGS_((int ch)); *--------------------------------------------------------------------------- */ -static int +INLINE static int UtfCount(ch) int ch; /* The Tcl_UniChar whose size is returned. */ { @@ -781,7 +781,8 @@ Tcl_UtfBackslash(src, readPtr, dst) * backslash sequence. */ { register CONST char *p = src+1; - int result, count, n; + Tcl_UniChar result; + int count, n; char buf[TCL_UTF_MAX]; if (dst == NULL) { @@ -883,15 +884,25 @@ Tcl_UtfBackslash(src, readPtr, dst) result = (unsigned char)((result << 3) + (*p - '0')); break; } - result = *p; - count = 2; + if (UCHAR(*p) < UNICODE_SELF) { + result = *p; + count = 2; + } else { + /* + * We have to convert here because the user has put a + * backslash in front of a multi-byte utf-8 character. + * While this means nothing special, we shouldn't break up + * a correct utf-8 character. [Bug #217987] test subst-3.2 + */ + count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */ + } break; } if (readPtr != NULL) { *readPtr = count; } - return Tcl_UniCharToUtf(result, dst); + return Tcl_UniCharToUtf((int) result, dst); } /* diff --git a/tests/subst.test b/tests/subst.test index 1ebceb7..21b0d7e 100644 --- a/tests/subst.test +++ b/tests/subst.test @@ -11,7 +11,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: subst.test,v 1.7 2000/11/01 22:13:39 hobbs Exp $ +# RCS: @(#) $Id: subst.test,v 1.8 2001/06/28 01:10:15 hobbs Exp $ if {[lsearch [namespace children] ::tcltest] == -1} { package require tcltest @@ -38,6 +38,11 @@ test subst-2.3 {simple strings} { test subst-3.1 {backslash substitutions} { subst {\x\$x\[foo bar]\\} } "x\$x\[foo bar]\\" +test subst-3.2 {backslash substitutions with utf chars} { + # 'j' is just a char that doesn't mean anything, and \344 is 'ä' + # that also doesn't mean anything, but is multi-byte in UTF-8. + list [subst \j] [subst \\j] [subst \\344] [subst \\\344] +} "j j \344 \344" test subst-4.1 {variable substitutions} { set a 44 -- cgit v0.12