summaryrefslogtreecommitdiffstats
path: root/generic/regc_lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/regc_lex.c')
-rw-r--r--generic/regc_lex.c65
1 files changed, 25 insertions, 40 deletions
diff --git a/generic/regc_lex.c b/generic/regc_lex.c
index 0cc62a2..d96d22f 100644
--- a/generic/regc_lex.c
+++ b/generic/regc_lex.c
@@ -832,27 +832,23 @@ lexescape(
RETV(PLAIN, CHR('\t'));
break;
case CHR('u'):
- c = (uchr) lexdigits(v, 16, 4, 4);
+ c = (uchr) lexdigits(v, 16, 1, 4);
if (ISERR()) {
FAILW(REG_EESCAPE);
}
RETV(PLAIN, c);
break;
case CHR('U'):
- i = lexdigits(v, 16, 8, 8);
+ i = lexdigits(v, 16, 1, 8);
if (ISERR()) {
FAILW(REG_EESCAPE);
}
-#if CHRBITS > 16
- if ((unsigned)i > 0x10FFFF) {
- i = 0xFFFD;
- }
-#else
- if ((unsigned)i & ~0xFFFF) {
+ if (i > 0xFFFF) {
+ /* TODO: output a Surrogate pair
+ */
i = 0xFFFD;
}
-#endif
- RETV(PLAIN, (uchr)i);
+ RETV(PLAIN, (uchr) i);
break;
case CHR('v'):
RETV(PLAIN, CHR('\v'));
@@ -867,7 +863,7 @@ lexescape(
break;
case CHR('x'):
NOTE(REG_UUNPORT);
- c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
+ c = (uchr) lexdigits(v, 16, 1, 2);
if (ISERR()) {
FAILW(REG_EESCAPE);
}
@@ -889,7 +885,7 @@ lexescape(
case CHR('9'):
save = v->now;
v->now--; /* put first digit back */
- c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
+ c = (uchr) lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
if (ISERR()) {
FAILW(REG_EESCAPE);
}
@@ -909,17 +905,20 @@ lexescape(
v->now = save;
- /*
- * And fall through into octal number.
- */
+ /* FALLTHRU */
case CHR('0'):
NOTE(REG_UUNPORT);
v->now--; /* put first digit back */
- c = lexdigits(v, 8, 1, 3);
+ c = (uchr) lexdigits(v, 8, 1, 3);
if (ISERR()) {
FAILW(REG_EESCAPE);
}
+ if (c > 0xFF) {
+ /* out of range, so we handled one digit too much */
+ v->now--;
+ c >>= 3;
+ }
RETV(PLAIN, c);
break;
default:
@@ -932,23 +931,27 @@ lexescape(
/*
- lexdigits - slurp up digits and return chr value
- ^ static chr lexdigits(struct vars *, int, int, int);
+ ^ static int lexdigits(struct vars *, int, int, int);
*/
-static chr /* chr value; errors signalled via ERR */
+static int /* chr value; errors signalled via ERR */
lexdigits(
struct vars *v,
int base,
int minlen,
int maxlen)
{
- uchr n; /* unsigned to avoid overflow misbehavior */
+ int n;
int len;
chr c;
int d;
- CONST uchr ub = (uchr) base;
+ const uchr ub = (uchr) base;
n = 0;
for (len = 0; len < maxlen && !ATEOS(); len++) {
+ if (n > 0x10FFF) {
+ /* Stop when continuing would otherwise overflow */
+ break;
+ }
c = *v->now++;
switch (c) {
case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
@@ -981,7 +984,7 @@ lexdigits(
ERR(REG_EESCAPE);
}
- return (chr)n;
+ return n;
}
/*
@@ -1103,7 +1106,7 @@ brenext(
/*
- skip - skip white space and comments in expanded form
- ^ static VOID skip(struct vars *);
+ ^ static void skip(struct vars *);
*/
static void
skip(
@@ -1147,24 +1150,6 @@ newline(void)
}
/*
- - ch - return the chr sequence for regc_locale.c's fake collating element ch
- * This helps confine use of CHR to this source file. Beware that the caller
- * knows how long the sequence is.
- ^ #ifdef REG_DEBUG
- ^ static const chr *ch(NOPARMS);
- ^ #endif
- */
-#ifdef REG_DEBUG
-static const chr *
-ch(void)
-{
- static const chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
-
- return chstr;
-}
-#endif
-
-/*
- chrnamed - return the chr known by a given (chr string) name
* The code is a bit clumsy, but this routine gets only such specialized
* use that it hardly matters.