diff options
Diffstat (limited to 'generic/regc_lex.c')
-rw-r--r-- | generic/regc_lex.c | 65 |
1 files changed, 25 insertions, 40 deletions
diff --git a/generic/regc_lex.c b/generic/regc_lex.c index 0cc62a2..d96d22f 100644 --- a/generic/regc_lex.c +++ b/generic/regc_lex.c @@ -832,27 +832,23 @@ lexescape( RETV(PLAIN, CHR('\t')); break; case CHR('u'): - c = (uchr) lexdigits(v, 16, 4, 4); + c = (uchr) lexdigits(v, 16, 1, 4); if (ISERR()) { FAILW(REG_EESCAPE); } RETV(PLAIN, c); break; case CHR('U'): - i = lexdigits(v, 16, 8, 8); + i = lexdigits(v, 16, 1, 8); if (ISERR()) { FAILW(REG_EESCAPE); } -#if CHRBITS > 16 - if ((unsigned)i > 0x10FFFF) { - i = 0xFFFD; - } -#else - if ((unsigned)i & ~0xFFFF) { + if (i > 0xFFFF) { + /* TODO: output a Surrogate pair + */ i = 0xFFFD; } -#endif - RETV(PLAIN, (uchr)i); + RETV(PLAIN, (uchr) i); break; case CHR('v'): RETV(PLAIN, CHR('\v')); @@ -867,7 +863,7 @@ lexescape( break; case CHR('x'): NOTE(REG_UUNPORT); - c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */ + c = (uchr) lexdigits(v, 16, 1, 2); if (ISERR()) { FAILW(REG_EESCAPE); } @@ -889,7 +885,7 @@ lexescape( case CHR('9'): save = v->now; v->now--; /* put first digit back */ - c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */ + c = (uchr) lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */ if (ISERR()) { FAILW(REG_EESCAPE); } @@ -909,17 +905,20 @@ lexescape( v->now = save; - /* - * And fall through into octal number. - */ + /* FALLTHRU */ case CHR('0'): NOTE(REG_UUNPORT); v->now--; /* put first digit back */ - c = lexdigits(v, 8, 1, 3); + c = (uchr) lexdigits(v, 8, 1, 3); if (ISERR()) { FAILW(REG_EESCAPE); } + if (c > 0xFF) { + /* out of range, so we handled one digit too much */ + v->now--; + c >>= 3; + } RETV(PLAIN, c); break; default: @@ -932,23 +931,27 @@ lexescape( /* - lexdigits - slurp up digits and return chr value - ^ static chr lexdigits(struct vars *, int, int, int); + ^ static int lexdigits(struct vars *, int, int, int); */ -static chr /* chr value; errors signalled via ERR */ +static int /* chr value; errors signalled via ERR */ lexdigits( struct vars *v, int base, int minlen, int maxlen) { - uchr n; /* unsigned to avoid overflow misbehavior */ + int n; int len; chr c; int d; - CONST uchr ub = (uchr) base; + const uchr ub = (uchr) base; n = 0; for (len = 0; len < maxlen && !ATEOS(); len++) { + if (n > 0x10FFF) { + /* Stop when continuing would otherwise overflow */ + break; + } c = *v->now++; switch (c) { case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): @@ -981,7 +984,7 @@ lexdigits( ERR(REG_EESCAPE); } - return (chr)n; + return n; } /* @@ -1103,7 +1106,7 @@ brenext( /* - skip - skip white space and comments in expanded form - ^ static VOID skip(struct vars *); + ^ static void skip(struct vars *); */ static void skip( @@ -1147,24 +1150,6 @@ newline(void) } /* - - ch - return the chr sequence for regc_locale.c's fake collating element ch - * This helps confine use of CHR to this source file. Beware that the caller - * knows how long the sequence is. - ^ #ifdef REG_DEBUG - ^ static const chr *ch(NOPARMS); - ^ #endif - */ -#ifdef REG_DEBUG -static const chr * -ch(void) -{ - static const chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') }; - - return chstr; -} -#endif - -/* - chrnamed - return the chr known by a given (chr string) name * The code is a bit clumsy, but this routine gets only such specialized * use that it hardly matters. |