1 files changed, 25 insertions, 40 deletions
diff --git a/generic/regc_lex.c b/generic/regc_lex.c
index 0cc62a2..d96d22f 100644
--- a/generic/regc_lex.c
+++ b/generic/regc_lex.c
@@ -832,27 +832,23 @@ lexescape(
 	RETV(PLAIN, CHR('\t'));
 	break;
     case CHR('u'):
-	c = (uchr) lexdigits(v, 16, 4, 4);
+	c = (uchr) lexdigits(v, 16, 1, 4);
 	if (ISERR()) {
 	    FAILW(REG_EESCAPE);
 	}
 	RETV(PLAIN, c);
 	break;
     case CHR('U'):
-	i = lexdigits(v, 16, 8, 8);
+	i = lexdigits(v, 16, 1, 8);
 	if (ISERR()) {
 	    FAILW(REG_EESCAPE);
 	}
-#if CHRBITS > 16
-	if ((unsigned)i > 0x10FFFF) {
-	    i = 0xFFFD;
-	}
-#else
-	if ((unsigned)i & ~0xFFFF) {
+	if (i > 0xFFFF) {
+	    /* TODO: output a Surrogate pair
+	     */
 	    i = 0xFFFD;
 	}
-#endif
-	RETV(PLAIN, (uchr)i);
+	RETV(PLAIN, (uchr) i);
 	break;
     case CHR('v'):
 	RETV(PLAIN, CHR('\v'));
@@ -867,7 +863,7 @@ lexescape(
 	break;
     case CHR('x'):
 	NOTE(REG_UUNPORT);
-	c = lexdigits(v, 16, 1, 255);	/* REs >255 long outside spec */
+	c = (uchr) lexdigits(v, 16, 1, 2);
 	if (ISERR()) {
 	    FAILW(REG_EESCAPE);
 	}
@@ -889,7 +885,7 @@ lexescape(
     case CHR('9'):
 	save = v->now;
 	v->now--;		/* put first digit back */
-	c = lexdigits(v, 10, 1, 255);	/* REs >255 long outside spec */
+	c = (uchr) lexdigits(v, 10, 1, 255);	/* REs >255 long outside spec */
 	if (ISERR()) {
 	    FAILW(REG_EESCAPE);
 	}
@@ -909,17 +905,20 @@ lexescape(
 
 	v->now = save;
 
-	/*
-	 * And fall through into octal number.
-	 */
+	/* FALLTHRU */
 
     case CHR('0'):
 	NOTE(REG_UUNPORT);
 	v->now--;		/* put first digit back */
-	c = lexdigits(v, 8, 1, 3);
+	c = (uchr) lexdigits(v, 8, 1, 3);
 	if (ISERR()) {
 	    FAILW(REG_EESCAPE);
 	}
+	if (c > 0xFF) {
+	    /* out of range, so we handled one digit too much */
+	    v->now--;
+	    c >>= 3;
+	}
 	RETV(PLAIN, c);
 	break;
     default:
@@ -932,23 +931,27 @@ lexescape(
 
 /*
  - lexdigits - slurp up digits and return chr value
- ^ static chr lexdigits(struct vars *, int, int, int);
+ ^ static int lexdigits(struct vars *, int, int, int);
  */
-static chr			/* chr value; errors signalled via ERR */
+static int			/* chr value; errors signalled via ERR */
 lexdigits(
     struct vars *v,
     int base,
     int minlen,
     int maxlen)
 {
-    uchr n;			/* unsigned to avoid overflow misbehavior */
+    int n;
     int len;
     chr c;
     int d;
-    CONST uchr ub = (uchr) base;
+    const uchr ub = (uchr) base;
 
     n = 0;
     for (len = 0; len < maxlen && !ATEOS(); len++) {
+	if (n > 0x10FFF) {
+	    /* Stop when continuing would otherwise overflow */
+	    break;
+	}
 	c = *v->now++;
 	switch (c) {
 	case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
@@ -981,7 +984,7 @@ lexdigits(
 	ERR(REG_EESCAPE);
     }
 
-    return (chr)n;
+    return n;
 }
 
 /*
@@ -1103,7 +1106,7 @@ brenext(
 
 /*
  - skip - skip white space and comments in expanded form
- ^ static VOID skip(struct vars *);
+ ^ static void skip(struct vars *);
  */
 static void
 skip(
@@ -1147,24 +1150,6 @@ newline(void)
 }
 
 /*
- - ch - return the chr sequence for regc_locale.c's fake collating element ch
- * This helps confine use of CHR to this source file.  Beware that the caller
- * knows how long the sequence is.
- ^ #ifdef REG_DEBUG
- ^ static const chr *ch(NOPARMS);
- ^ #endif
- */
-#ifdef REG_DEBUG
-static const chr *
-ch(void)
-{
-    static const chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
-
-    return chstr;
-}
-#endif
-
-/*
  - chrnamed - return the chr known by a given (chr string) name
  * The code is a bit clumsy, but this routine gets only such specialized
  * use that it hardly matters.