diff options
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r-- | generic/tclUtf.c | 132 |
1 files changed, 11 insertions, 121 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 897d743..0fccf95 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclUtf.c,v 1.27 2002/07/19 12:31:10 dkf Exp $ + * RCS: @(#) $Id: tclUtf.c,v 1.28 2002/08/05 03:24:41 dgp Exp $ */ #include "tclInt.h" @@ -778,129 +778,19 @@ Tcl_UtfBackslash(src, readPtr, dst) char *dst; /* Filled with the bytes represented by the * backslash sequence. */ { - register CONST char *p = src+1; - Tcl_UniChar result; - int count, n; - char buf[TCL_UTF_MAX]; - - if (dst == NULL) { - dst = buf; +#define LINE_LENGTH 128 + int numRead; + int result; + + result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst); + if (numRead == LINE_LENGTH) { + /* We ate a whole line. Pay the price of a strlen() */ + result = TclParseBackslash(src, (int)strlen(src), &numRead, dst); } - - count = 2; - switch (*p) { - /* - * Note: in the conversions below, use absolute values (e.g., - * 0xa) rather than symbolic values (e.g. \n) that get converted - * by the compiler. It's possible that compilers on some - * platforms will do the symbolic conversions differently, which - * could result in non-portable Tcl scripts. - */ - - case 'a': - result = 0x7; - break; - case 'b': - result = 0x8; - break; - case 'f': - result = 0xc; - break; - case 'n': - result = 0xa; - break; - case 'r': - result = 0xd; - break; - case 't': - result = 0x9; - break; - case 'v': - result = 0xb; - break; - case 'x': - if (isxdigit(UCHAR(p[1]))) { /* INTL: digit */ - char *end; - - result = (unsigned char) strtoul(p+1, &end, 16); - count = end - src; - } else { - count = 2; - result = 'x'; - } - break; - case 'u': - result = 0; - for (count = 0; count < 4; count++) { - p++; - if (!isxdigit(UCHAR(*p))) { /* INTL: digit */ - break; - } - n = *p - '0'; - if (n > 9) { - n = n + '0' + 10 - 'A'; - } - if (n > 16) { - n = n + 'A' - 'a'; - } - result = (result << 4) + n; - } - if (count == 0) { - result = 'u'; - } - count += 2; - break; - - case '\n': - do { - p++; - } while ((*p == ' ') || (*p == '\t')); - result = ' '; - count = p - src; - break; - case 0: - result = '\\'; - count = 1; - break; - default: - /* - * Check for an octal number \oo?o? - */ - if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) { /* INTL: digit */ - result = (unsigned char)(*p - '0'); - p++; - if (!isdigit(UCHAR(*p)) || (UCHAR(*p) >= '8')) { /* INTL: digit */ - break; - } - count = 3; - result = (unsigned char)((result << 3) + (*p - '0')); - p++; - if (!isdigit(UCHAR(*p)) || (UCHAR(*p) >= '8')) { /* INTL: digit */ - break; - } - count = 4; - result = (unsigned char)((result << 3) + (*p - '0')); - break; - } - if (UCHAR(*p) < UNICODE_SELF) { - result = *p; - count = 2; - } else { - /* - * We have to convert here because the user has put a - * backslash in front of a multi-byte utf-8 character. - * While this means nothing special, we shouldn't break up - * a correct utf-8 character. [Bug #217987] test subst-3.2 - */ - count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */ - } - break; - } - if (readPtr != NULL) { - *readPtr = count; + *readPtr = numRead; } - return Tcl_UniCharToUtf((int) result, dst); + return result; } /* |