summaryrefslogtreecommitdiffstats
path: root/generic/regc_lex.c
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2005-11-09 16:36:14 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2005-11-09 16:36:14 (GMT)
commit133fb4a09f75b408471dd65c981eb8842ae92c76 (patch)
tree31171954bc88cbb4b8756f647f2f878267ce4207 /generic/regc_lex.c
parent93b10873c6edf504fe3aa5358bf31b80a6bd97b1 (diff)
downloadtcl-133fb4a09f75b408471dd65c981eb8842ae92c76.zip
tcl-133fb4a09f75b408471dd65c981eb8842ae92c76.tar.gz
tcl-133fb4a09f75b408471dd65c981eb8842ae92c76.tar.bz2
ANSIfy the RE compiler.
Diffstat (limited to 'generic/regc_lex.c')
-rw-r--r--generic/regc_lex.c1804
1 files changed, 964 insertions, 840 deletions
diff --git a/generic/regc_lex.c b/generic/regc_lex.c
index 1acc3f4..cc02e9d 100644
--- a/generic/regc_lex.c
+++ b/generic/regc_lex.c
@@ -3,20 +3,20 @@
* This file is #included by regcomp.c.
*
* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
- *
+ *
* Development of this software was funded, in part, by Cray Research Inc.,
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
* Corporation, none of whom are responsible for the results. The author
- * thanks all of them.
- *
+ * thanks all of them.
+ *
* Redistribution and use in source and binary forms -- with or without
* modification -- are permitted for any purpose, provided that
* redistributions in source form retain this entire copyright notice and
* indicate the origin and nature of any modifications.
- *
- * I'd appreciate being given credit for this package in the documentation
- * of software which uses it, but that is not a requirement.
- *
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
@@ -27,7 +27,6 @@
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
/* scanning macros (know about v) */
@@ -35,9 +34,10 @@
#define HAVE(n) (v->stop - v->now >= (n))
#define NEXT1(c) (!ATEOS() && *v->now == CHR(c))
#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
-#define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \
- *(v->now+1) == CHR(b) && \
- *(v->now+2) == CHR(c))
+#define NEXT3(a,b,c) \
+ (HAVE(3) && *v->now == CHR(a) && \
+ *(v->now+1) == CHR(b) && \
+ *(v->now+2) == CHR(c))
#define SET(c) (v->nexttype = (c))
#define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n))
#define RET(c) return (SET(c), 1)
@@ -60,804 +60,907 @@
/* construct pointer past end of chr array */
#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr))
-
+
/*
- lexstart - set up lexical stuff, scan leading options
^ static VOID lexstart(struct vars *);
*/
-static VOID
-lexstart(v)
-struct vars *v;
+static void
+lexstart(
+ struct vars *v)
{
- prefixes(v); /* may turn on new type bits etc. */
- NOERR();
+ prefixes(v); /* may turn on new type bits etc. */
+ NOERR();
- if (v->cflags&REG_QUOTE) {
- assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
- INTOCON(L_Q);
- } else if (v->cflags&REG_EXTENDED) {
- assert(!(v->cflags&REG_QUOTE));
- INTOCON(L_ERE);
- } else {
- assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
- INTOCON(L_BRE);
- }
+ if (v->cflags&REG_QUOTE) {
+ assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
+ INTOCON(L_Q);
+ } else if (v->cflags&REG_EXTENDED) {
+ assert(!(v->cflags&REG_QUOTE));
+ INTOCON(L_ERE);
+ } else {
+ assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
+ INTOCON(L_BRE);
+ }
- v->nexttype = EMPTY; /* remember we were at the start */
- next(v); /* set up the first token */
+ v->nexttype = EMPTY; /* remember we were at the start */
+ next(v); /* set up the first token */
}
-
+
/*
- prefixes - implement various special prefixes
^ static VOID prefixes(struct vars *);
*/
-static VOID
-prefixes(v)
-struct vars *v;
+static void
+prefixes(
+ struct vars *v)
{
- /* literal string doesn't get any of this stuff */
- if (v->cflags&REG_QUOTE)
- return;
+ /*
+ * Literal string doesn't get any of this stuff.
+ */
- /* initial "***" gets special things */
- if (HAVE(4) && NEXT3('*', '*', '*'))
- switch (*(v->now + 3)) {
- case CHR('?'): /* "***?" error, msg shows version */
- ERR(REG_BADPAT);
- return; /* proceed no further */
- break;
- case CHR('='): /* "***=" shifts to literal string */
- NOTE(REG_UNONPOSIX);
- v->cflags |= REG_QUOTE;
- v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE);
- v->now += 4;
- return; /* and there can be no more prefixes */
- break;
- case CHR(':'): /* "***:" shifts to AREs */
- NOTE(REG_UNONPOSIX);
- v->cflags |= REG_ADVANCED;
- v->now += 4;
- break;
- default: /* otherwise *** is just an error */
- ERR(REG_BADRPT);
- return;
- break;
- }
+ if (v->cflags&REG_QUOTE) {
+ return;
+ }
- /* BREs and EREs don't get embedded options */
- if ((v->cflags&REG_ADVANCED) != REG_ADVANCED)
- return;
+ /*
+ * Initial "***" gets special things.
+ */
- /* embedded options (AREs only) */
- if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
- NOTE(REG_UNONPOSIX);
- v->now += 2;
- for (; !ATEOS() && iscalpha(*v->now); v->now++)
- switch (*v->now) {
- case CHR('b'): /* BREs (but why???) */
- v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
- break;
- case CHR('c'): /* case sensitive */
- v->cflags &= ~REG_ICASE;
- break;
- case CHR('e'): /* plain EREs */
- v->cflags |= REG_EXTENDED;
- v->cflags &= ~(REG_ADVF|REG_QUOTE);
- break;
- case CHR('i'): /* case insensitive */
- v->cflags |= REG_ICASE;
- break;
- case CHR('m'): /* Perloid synonym for n */
- case CHR('n'): /* \n affects ^ $ . [^ */
- v->cflags |= REG_NEWLINE;
- break;
- case CHR('p'): /* ~Perl, \n affects . [^ */
- v->cflags |= REG_NLSTOP;
- v->cflags &= ~REG_NLANCH;
- break;
- case CHR('q'): /* literal string */
- v->cflags |= REG_QUOTE;
- v->cflags &= ~REG_ADVANCED;
- break;
- case CHR('s'): /* single line, \n ordinary */
- v->cflags &= ~REG_NEWLINE;
- break;
- case CHR('t'): /* tight syntax */
- v->cflags &= ~REG_EXPANDED;
- break;
- case CHR('w'): /* weird, \n affects ^ $ only */
- v->cflags &= ~REG_NLSTOP;
- v->cflags |= REG_NLANCH;
- break;
- case CHR('x'): /* expanded syntax */
- v->cflags |= REG_EXPANDED;
- break;
- default:
- ERR(REG_BADOPT);
- return;
- }
- if (!NEXT1(')')) {
- ERR(REG_BADOPT);
- return;
- }
- v->now++;
- if (v->cflags&REG_QUOTE)
- v->cflags &= ~(REG_EXPANDED|REG_NEWLINE);
+ if (HAVE(4) && NEXT3('*', '*', '*')) {
+ switch (*(v->now + 3)) {
+ case CHR('?'): /* "***?" error, msg shows version */
+ ERR(REG_BADPAT);
+ return; /* proceed no further */
+ break;
+ case CHR('='): /* "***=" shifts to literal string */
+ NOTE(REG_UNONPOSIX);
+ v->cflags |= REG_QUOTE;
+ v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE);
+ v->now += 4;
+ return; /* and there can be no more prefixes */
+ break;
+ case CHR(':'): /* "***:" shifts to AREs */
+ NOTE(REG_UNONPOSIX);
+ v->cflags |= REG_ADVANCED;
+ v->now += 4;
+ break;
+ default: /* otherwise *** is just an error */
+ ERR(REG_BADRPT);
+ return;
+ break;
}
-}
+ }
+ /*
+ * BREs and EREs don't get embedded options.
+ */
+
+ if ((v->cflags&REG_ADVANCED) != REG_ADVANCED) {
+ return;
+ }
+
+ /*
+ * Embedded options (AREs only).
+ */
+
+ if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
+ NOTE(REG_UNONPOSIX);
+ v->now += 2;
+ for (; !ATEOS() && iscalpha(*v->now); v->now++) {
+ switch (*v->now) {
+ case CHR('b'): /* BREs (but why???) */
+ v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
+ break;
+ case CHR('c'): /* case sensitive */
+ v->cflags &= ~REG_ICASE;
+ break;
+ case CHR('e'): /* plain EREs */
+ v->cflags |= REG_EXTENDED;
+ v->cflags &= ~(REG_ADVF|REG_QUOTE);
+ break;
+ case CHR('i'): /* case insensitive */
+ v->cflags |= REG_ICASE;
+ break;
+ case CHR('m'): /* Perloid synonym for n */
+ case CHR('n'): /* \n affects ^ $ . [^ */
+ v->cflags |= REG_NEWLINE;
+ break;
+ case CHR('p'): /* ~Perl, \n affects . [^ */
+ v->cflags |= REG_NLSTOP;
+ v->cflags &= ~REG_NLANCH;
+ break;
+ case CHR('q'): /* literal string */
+ v->cflags |= REG_QUOTE;
+ v->cflags &= ~REG_ADVANCED;
+ break;
+ case CHR('s'): /* single line, \n ordinary */
+ v->cflags &= ~REG_NEWLINE;
+ break;
+ case CHR('t'): /* tight syntax */
+ v->cflags &= ~REG_EXPANDED;
+ break;
+ case CHR('w'): /* weird, \n affects ^ $ only */
+ v->cflags &= ~REG_NLSTOP;
+ v->cflags |= REG_NLANCH;
+ break;
+ case CHR('x'): /* expanded syntax */
+ v->cflags |= REG_EXPANDED;
+ break;
+ default:
+ ERR(REG_BADOPT);
+ return;
+ }
+ }
+ if (!NEXT1(')')) {
+ ERR(REG_BADOPT);
+ return;
+ }
+ v->now++;
+ if (v->cflags&REG_QUOTE) {
+ v->cflags &= ~(REG_EXPANDED|REG_NEWLINE);
+ }
+ }
+}
+
/*
- lexnest - "call a subroutine", interpolating string at the lexical level
* Note, this is not a very general facility. There are a number of
* implicit assumptions about what sorts of strings can be subroutines.
^ static VOID lexnest(struct vars *, chr *, chr *);
*/
-static VOID
-lexnest(v, beginp, endp)
-struct vars *v;
-chr *beginp; /* start of interpolation */
-chr *endp; /* one past end of interpolation */
+static void
+lexnest(
+ struct vars *v,
+ chr *beginp, /* start of interpolation */
+ chr *endp) /* one past end of interpolation */
{
- assert(v->savenow == NULL); /* only one level of nesting */
- v->savenow = v->now;
- v->savestop = v->stop;
- v->now = beginp;
- v->stop = endp;
+ assert(v->savenow == NULL); /* only one level of nesting */
+ v->savenow = v->now;
+ v->savestop = v->stop;
+ v->now = beginp;
+ v->stop = endp;
}
-
+
/*
* string constants to interpolate as expansions of things like \d
*/
+
static chr backd[] = { /* \d */
- CHR('['), CHR('['), CHR(':'),
- CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
- CHR(':'), CHR(']'), CHR(']')
+ CHR('['), CHR('['), CHR(':'),
+ CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
+ CHR(':'), CHR(']'), CHR(']')
};
static chr backD[] = { /* \D */
- CHR('['), CHR('^'), CHR('['), CHR(':'),
- CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
- CHR(':'), CHR(']'), CHR(']')
+ CHR('['), CHR('^'), CHR('['), CHR(':'),
+ CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
+ CHR(':'), CHR(']'), CHR(']')
};
static chr brbackd[] = { /* \d within brackets */
- CHR('['), CHR(':'),
- CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
- CHR(':'), CHR(']')
+ CHR('['), CHR(':'),
+ CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
+ CHR(':'), CHR(']')
};
static chr backs[] = { /* \s */
- CHR('['), CHR('['), CHR(':'),
- CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
- CHR(':'), CHR(']'), CHR(']')
+ CHR('['), CHR('['), CHR(':'),
+ CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
+ CHR(':'), CHR(']'), CHR(']')
};
static chr backS[] = { /* \S */
- CHR('['), CHR('^'), CHR('['), CHR(':'),
- CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
- CHR(':'), CHR(']'), CHR(']')
+ CHR('['), CHR('^'), CHR('['), CHR(':'),
+ CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
+ CHR(':'), CHR(']'), CHR(']')
};
static chr brbacks[] = { /* \s within brackets */
- CHR('['), CHR(':'),
- CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
- CHR(':'), CHR(']')
+ CHR('['), CHR(':'),
+ CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
+ CHR(':'), CHR(']')
};
static chr backw[] = { /* \w */
- CHR('['), CHR('['), CHR(':'),
- CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
- CHR(':'), CHR(']'), CHR('_'), CHR(']')
+ CHR('['), CHR('['), CHR(':'),
+ CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
+ CHR(':'), CHR(']'), CHR('_'), CHR(']')
};
static chr backW[] = { /* \W */
- CHR('['), CHR('^'), CHR('['), CHR(':'),
- CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
- CHR(':'), CHR(']'), CHR('_'), CHR(']')
+ CHR('['), CHR('^'), CHR('['), CHR(':'),
+ CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
+ CHR(':'), CHR(']'), CHR('_'), CHR(']')
};
static chr brbackw[] = { /* \w within brackets */
- CHR('['), CHR(':'),
- CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
- CHR(':'), CHR(']'), CHR('_')
+ CHR('['), CHR(':'),
+ CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
+ CHR(':'), CHR(']'), CHR('_')
};
-
+
/*
- lexword - interpolate a bracket expression for word characters
* Possibly ought to inquire whether there is a "word" character class.
^ static VOID lexword(struct vars *);
*/
-static VOID
-lexword(v)
-struct vars *v;
+static void
+lexword(
+ struct vars *v)
{
- lexnest(v, backw, ENDOF(backw));
+ lexnest(v, backw, ENDOF(backw));
}
-
+
/*
- next - get next token
^ static int next(struct vars *);
*/
static int /* 1 normal, 0 failure */
-next(v)
-struct vars *v;
+next(
+ struct vars *v)
{
- chr c;
+ chr c;
- /* errors yield an infinite sequence of failures */
- if (ISERR())
- return 0; /* the error has set nexttype to EOS */
+ /*
+ * Errors yield an infinite sequence of failures.
+ */
- /* remember flavor of last token */
- v->lasttype = v->nexttype;
+ if (ISERR()) {
+ return 0; /* the error has set nexttype to EOS */
+ }
- /* REG_BOSONLY */
- if (v->nexttype == EMPTY && (v->cflags&REG_BOSONLY)) {
- /* at start of a REG_BOSONLY RE */
- RETV(SBEGIN, 0); /* same as \A */
- }
+ /*
+ * Remember flavor of last token.
+ */
- /* if we're nested and we've hit end, return to outer level */
- if (v->savenow != NULL && ATEOS()) {
- v->now = v->savenow;
- v->stop = v->savestop;
- v->savenow = v->savestop = NULL;
- }
+ v->lasttype = v->nexttype;
- /* skip white space etc. if appropriate (not in literal or []) */
- if (v->cflags&REG_EXPANDED)
- switch (v->lexcon) {
- case L_ERE:
- case L_BRE:
- case L_EBND:
- case L_BBND:
- skip(v);
- break;
- }
+ /*
+ * REG_BOSONLY
+ */
- /* handle EOS, depending on context */
- if (ATEOS()) {
- switch (v->lexcon) {
- case L_ERE:
- case L_BRE:
- case L_Q:
- RET(EOS);
- break;
- case L_EBND:
- case L_BBND:
- FAILW(REG_EBRACE);
- break;
- case L_BRACK:
- case L_CEL:
- case L_ECL:
- case L_CCL:
- FAILW(REG_EBRACK);
- break;
- }
- assert(NOTREACHED);
+ if (v->nexttype == EMPTY && (v->cflags&REG_BOSONLY)) {
+ /* at start of a REG_BOSONLY RE */
+ RETV(SBEGIN, 0); /* same as \A */
+ }
+
+ /*
+ * If we're nested and we've hit end, return to outer level.
+ */
+
+ if (v->savenow != NULL && ATEOS()) {
+ v->now = v->savenow;
+ v->stop = v->savestop;
+ v->savenow = v->savestop = NULL;
+ }
+
+ /*
+ * Skip white space etc. if appropriate (not in literal or [])
+ */
+
+ if (v->cflags&REG_EXPANDED) {
+ switch (v->lexcon) {
+ case L_ERE:
+ case L_BRE:
+ case L_EBND:
+ case L_BBND:
+ skip(v);
+ break;
}
+ }
- /* okay, time to actually get a character */
- c = *v->now++;
+ /*
+ * Handle EOS, depending on context.
+ */
- /* deal with the easy contexts, punt EREs to code below */
+ if (ATEOS()) {
switch (v->lexcon) {
- case L_BRE: /* punt BREs to separate function */
- return brenext(v, c);
- break;
- case L_ERE: /* see below */
- break;
- case L_Q: /* literal strings are easy */
- RETV(PLAIN, c);
- break;
- case L_BBND: /* bounds are fairly simple */
+ case L_ERE:
+ case L_BRE:
+ case L_Q:
+ RET(EOS);
+ break;
case L_EBND:
- switch (c) {
- case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
- case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
- case CHR('8'): case CHR('9'):
- RETV(DIGIT, (chr)DIGITVAL(c));
- break;
- case CHR(','):
- RET(',');
- break;
- case CHR('}'): /* ERE bound ends with } */
- if (INCON(L_EBND)) {
- INTOCON(L_ERE);
- if ((v->cflags&REG_ADVF) && NEXT1('?')) {
- v->now++;
- NOTE(REG_UNONPOSIX);
- RETV('}', 0);
- }
- RETV('}', 1);
- } else
- FAILW(REG_BADBR);
- break;
- case CHR('\\'): /* BRE bound ends with \} */
- if (INCON(L_BBND) && NEXT1('}')) {
- v->now++;
- INTOCON(L_BRE);
- RET('}');
- } else
- FAILW(REG_BADBR);
- break;
- default:
- FAILW(REG_BADBR);
- break;
- }
- assert(NOTREACHED);
- break;
- case L_BRACK: /* brackets are not too hard */
- switch (c) {
- case CHR(']'):
- if (LASTTYPE('['))
- RETV(PLAIN, c);
- else {
- INTOCON((v->cflags&REG_EXTENDED) ?
- L_ERE : L_BRE);
- RET(']');
- }
- break;
- case CHR('\\'):
- NOTE(REG_UBBS);
- if (!(v->cflags&REG_ADVF))
- RETV(PLAIN, c);
- NOTE(REG_UNONPOSIX);
- if (ATEOS())
- FAILW(REG_EESCAPE);
- (DISCARD)lexescape(v);
- switch (v->nexttype) { /* not all escapes okay here */
- case PLAIN:
- return 1;
- break;
- case CCLASS:
- switch (v->nextvalue) {
- case 'd':
- lexnest(v, brbackd, ENDOF(brbackd));
- break;
- case 's':
- lexnest(v, brbacks, ENDOF(brbacks));
- break;
- case 'w':
- lexnest(v, brbackw, ENDOF(brbackw));
- break;
- default:
- FAILW(REG_EESCAPE);
- break;
- }
- /* lexnest done, back up and try again */
- v->nexttype = v->lasttype;
- return next(v);
- break;
- }
- /* not one of the acceptable escapes */
- FAILW(REG_EESCAPE);
- break;
- case CHR('-'):
- if (LASTTYPE('[') || NEXT1(']'))
- RETV(PLAIN, c);
- else
- RETV(RANGE, c);
- break;
- case CHR('['):
- if (ATEOS())
- FAILW(REG_EBRACK);
- switch (*v->now++) {
- case CHR('.'):
- INTOCON(L_CEL);
- /* might or might not be locale-specific */
- RET(COLLEL);
- break;
- case CHR('='):
- INTOCON(L_ECL);
- NOTE(REG_ULOCALE);
- RET(ECLASS);
- break;
- case CHR(':'):
- INTOCON(L_CCL);
- NOTE(REG_ULOCALE);
- RET(CCLASS);
- break;
- default: /* oops */
- v->now--;
- RETV(PLAIN, c);
- break;
- }
- assert(NOTREACHED);
- break;
- default:
- RETV(PLAIN, c);
- break;
- }
- assert(NOTREACHED);
- break;
- case L_CEL: /* collating elements are easy */
- if (c == CHR('.') && NEXT1(']')) {
- v->now++;
- INTOCON(L_BRACK);
- RETV(END, '.');
- } else
- RETV(PLAIN, c);
- break;
- case L_ECL: /* ditto equivalence classes */
- if (c == CHR('=') && NEXT1(']')) {
- v->now++;
- INTOCON(L_BRACK);
- RETV(END, '=');
- } else
- RETV(PLAIN, c);
- break;
- case L_CCL: /* ditto character classes */
- if (c == CHR(':') && NEXT1(']')) {
- v->now++;
- INTOCON(L_BRACK);
- RETV(END, ':');
- } else
- RETV(PLAIN, c);
- break;
- default:
- assert(NOTREACHED);
- break;
+ case L_BBND:
+ FAILW(REG_EBRACE);
+ break;
+ case L_BRACK:
+ case L_CEL:
+ case L_ECL:
+ case L_CCL:
+ FAILW(REG_EBRACK);
+ break;
}
+ assert(NOTREACHED);
+ }
+
+ /*
+ * Okay, time to actually get a character.
+ */
+
+ c = *v->now++;
- /* that got rid of everything except EREs and AREs */
- assert(INCON(L_ERE));
+ /*
+ * Deal with the easy contexts, punt EREs to code below.
+ */
- /* deal with EREs and AREs, except for backslashes */
+ switch (v->lexcon) {
+ case L_BRE: /* punt BREs to separate function */
+ return brenext(v, c);
+ break;
+ case L_ERE: /* see below */
+ break;
+ case L_Q: /* literal strings are easy */
+ RETV(PLAIN, c);
+ break;
+ case L_BBND: /* bounds are fairly simple */
+ case L_EBND:
switch (c) {
- case CHR('|'):
- RET('|');
- break;
- case CHR('*'):
+ case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
+ case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
+ case CHR('8'): case CHR('9'):
+ RETV(DIGIT, (chr)DIGITVAL(c));
+ break;
+ case CHR(','):
+ RET(',');
+ break;
+ case CHR('}'): /* ERE bound ends with } */
+ if (INCON(L_EBND)) {
+ INTOCON(L_ERE);
if ((v->cflags&REG_ADVF) && NEXT1('?')) {
- v->now++;
- NOTE(REG_UNONPOSIX);
- RETV('*', 0);
+ v->now++;
+ NOTE(REG_UNONPOSIX);
+ RETV('}', 0);
}
- RETV('*', 1);
+ RETV('}', 1);
+ } else {
+ FAILW(REG_BADBR);
+ }
+ break;
+ case CHR('\\'): /* BRE bound ends with \} */
+ if (INCON(L_BBND) && NEXT1('}')) {
+ v->now++;
+ INTOCON(L_BRE);
+ RET('}');
+ } else {
+ FAILW(REG_BADBR);
+ }
+ break;
+ default:
+ FAILW(REG_BADBR);
+ break;
+ }
+ assert(NOTREACHED);
+ break;
+ case L_BRACK: /* brackets are not too hard */
+ switch (c) {
+ case CHR(']'):
+ if (LASTTYPE('[')) {
+ RETV(PLAIN, c);
+ } else {
+ INTOCON((v->cflags&REG_EXTENDED) ? L_ERE : L_BRE);
+ RET(']');
+ }
+ break;
+ case CHR('\\'):
+ NOTE(REG_UBBS);
+ if (!(v->cflags&REG_ADVF)) {
+ RETV(PLAIN, c);
+ }
+ NOTE(REG_UNONPOSIX);
+ if (ATEOS()) {
+ FAILW(REG_EESCAPE);
+ }
+ (DISCARD)lexescape(v);
+ switch (v->nexttype) { /* not all escapes okay here */
+ case PLAIN:
+ return 1;
break;
- case CHR('+'):
- if ((v->cflags&REG_ADVF) && NEXT1('?')) {
- v->now++;
- NOTE(REG_UNONPOSIX);
- RETV('+', 0);
+ case CCLASS:
+ switch (v->nextvalue) {
+ case 'd':
+ lexnest(v, brbackd, ENDOF(brbackd));
+ break;
+ case 's':
+ lexnest(v, brbacks, ENDOF(brbacks));
+ break;
+ case 'w':
+ lexnest(v, brbackw, ENDOF(brbackw));
+ break;
+ default:
+ FAILW(REG_EESCAPE);
+ break;
}
- RETV('+', 1);
+
+ /*
+ * lexnest() done, back up and try again.
+ */
+
+ v->nexttype = v->lasttype;
+ return next(v);
break;
- case CHR('?'):
- if ((v->cflags&REG_ADVF) && NEXT1('?')) {
- v->now++;
- NOTE(REG_UNONPOSIX);
- RETV('?', 0);
- }
- RETV('?', 1);
+ }
+
+ /*
+ * Not one of the acceptable escapes.
+ */
+
+ FAILW(REG_EESCAPE);
+ break;
+ case CHR('-'):
+ if (LASTTYPE('[') || NEXT1(']')) {
+ RETV(PLAIN, c);
+ } else {
+ RETV(RANGE, c);
+ }
+ break;
+ case CHR('['):
+ if (ATEOS()) {
+ FAILW(REG_EBRACK);
+ }
+ switch (*v->now++) {
+ case CHR('.'):
+ INTOCON(L_CEL);
+
+ /*
+ * Might or might not be locale-specific.
+ */
+
+ RET(COLLEL);
break;
- case CHR('{'): /* bounds start or plain character */
- if (v->cflags&REG_EXPANDED)
- skip(v);
- if (ATEOS() || !iscdigit(*v->now)) {
- NOTE(REG_UBRACES);
- NOTE(REG_UUNSPEC);
- RETV(PLAIN, c);
- } else {
- NOTE(REG_UBOUNDS);
- INTOCON(L_EBND);
- RET('{');
- }
- assert(NOTREACHED);
+ case CHR('='):
+ INTOCON(L_ECL);
+ NOTE(REG_ULOCALE);
+ RET(ECLASS);
break;
- case CHR('('): /* parenthesis, or advanced extension */
- if ((v->cflags&REG_ADVF) && NEXT1('?')) {
- NOTE(REG_UNONPOSIX);
- v->now++;
- switch (*v->now++) {
- case CHR(':'): /* non-capturing paren */
- RETV('(', 0);
- break;
- case CHR('#'): /* comment */
- while (!ATEOS() && *v->now != CHR(')'))
- v->now++;
- if (!ATEOS())
- v->now++;
- assert(v->nexttype == v->lasttype);
- return next(v);
- break;
- case CHR('='): /* positive lookahead */
- NOTE(REG_ULOOKAHEAD);
- RETV(LACON, 1);
- break;
- case CHR('!'): /* negative lookahead */
- NOTE(REG_ULOOKAHEAD);
- RETV(LACON, 0);
- break;
- default:
- FAILW(REG_BADRPT);
- break;
- }
- assert(NOTREACHED);
- }
- if (v->cflags&REG_NOSUB)
- RETV('(', 0); /* all parens non-capturing */
- else
- RETV('(', 1);
+ case CHR(':'):
+ INTOCON(L_CCL);
+ NOTE(REG_ULOCALE);
+ RET(CCLASS);
break;
- case CHR(')'):
- if (LASTTYPE('(')) {
- NOTE(REG_UUNSPEC);
- }
- RETV(')', c);
+ default: /* oops */
+ v->now--;
+ RETV(PLAIN, c);
break;
- case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
- if (HAVE(6) && *(v->now+0) == CHR('[') &&
- *(v->now+1) == CHR(':') &&
- (*(v->now+2) == CHR('<') ||
- *(v->now+2) == CHR('>')) &&
- *(v->now+3) == CHR(':') &&
- *(v->now+4) == CHR(']') &&
- *(v->now+5) == CHR(']')) {
- c = *(v->now+2);
- v->now += 6;
- NOTE(REG_UNONPOSIX);
- RET((c == CHR('<')) ? '<' : '>');
+ }
+ assert(NOTREACHED);
+ break;
+ default:
+ RETV(PLAIN, c);
+ break;
+ }
+ assert(NOTREACHED);
+ break;
+ case L_CEL: /* collating elements are easy */
+ if (c == CHR('.') && NEXT1(']')) {
+ v->now++;
+ INTOCON(L_BRACK);
+ RETV(END, '.');
+ } else {
+ RETV(PLAIN, c);
+ }
+ break;
+ case L_ECL: /* ditto equivalence classes */
+ if (c == CHR('=') && NEXT1(']')) {
+ v->now++;
+ INTOCON(L_BRACK);
+ RETV(END, '=');
+ } else {
+ RETV(PLAIN, c);
+ }
+ break;
+ case L_CCL: /* ditto character classes */
+ if (c == CHR(':') && NEXT1(']')) {
+ v->now++;
+ INTOCON(L_BRACK);
+ RETV(END, ':');
+ } else {
+ RETV(PLAIN, c);
+ }
+ break;
+ default:
+ assert(NOTREACHED);
+ break;
+ }
+
+ /*
+ * That got rid of everything except EREs and AREs.
+ */
+
+ assert(INCON(L_ERE));
+
+ /*
+ * Deal with EREs and AREs, except for backslashes.
+ */
+
+ switch (c) {
+ case CHR('|'):
+ RET('|');
+ break;
+ case CHR('*'):
+ if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+ v->now++;
+ NOTE(REG_UNONPOSIX);
+ RETV('*', 0);
+ }
+ RETV('*', 1);
+ break;
+ case CHR('+'):
+ if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+ v->now++;
+ NOTE(REG_UNONPOSIX);
+ RETV('+', 0);
+ }
+ RETV('+', 1);
+ break;
+ case CHR('?'):
+ if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+ v->now++;
+ NOTE(REG_UNONPOSIX);
+ RETV('?', 0);
+ }
+ RETV('?', 1);
+ break;
+ case CHR('{'): /* bounds start or plain character */
+ if (v->cflags&REG_EXPANDED) {
+ skip(v);
+ }
+ if (ATEOS() || !iscdigit(*v->now)) {
+ NOTE(REG_UBRACES);
+ NOTE(REG_UUNSPEC);
+ RETV(PLAIN, c);
+ } else {
+ NOTE(REG_UBOUNDS);
+ INTOCON(L_EBND);
+ RET('{');
+ }
+ assert(NOTREACHED);
+ break;
+ case CHR('('): /* parenthesis, or advanced extension */
+ if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+ NOTE(REG_UNONPOSIX);
+ v->now++;
+ switch (*v->now++) {
+ case CHR(':'): /* non-capturing paren */
+ RETV('(', 0);
+ break;
+ case CHR('#'): /* comment */
+ while (!ATEOS() && *v->now != CHR(')')) {
+ v->now++;
}
- INTOCON(L_BRACK);
- if (NEXT1('^')) {
- v->now++;
- RETV('[', 0);
+ if (!ATEOS()) {
+ v->now++;
}
- RETV('[', 1);
- break;
- case CHR('.'):
- RET('.');
+ assert(v->nexttype == v->lasttype);
+ return next(v);
break;
- case CHR('^'):
- RET('^');
+ case CHR('='): /* positive lookahead */
+ NOTE(REG_ULOOKAHEAD);
+ RETV(LACON, 1);
break;
- case CHR('$'):
- RET('$');
+ case CHR('!'): /* negative lookahead */
+ NOTE(REG_ULOOKAHEAD);
+ RETV(LACON, 0);
break;
- case CHR('\\'): /* mostly punt backslashes to code below */
- if (ATEOS())
- FAILW(REG_EESCAPE);
- break;
- default: /* ordinary character */
- RETV(PLAIN, c);
+ default:
+ FAILW(REG_BADRPT);
break;
+ }
+ assert(NOTREACHED);
+ }
+ if (v->cflags&REG_NOSUB) {
+ RETV('(', 0); /* all parens non-capturing */
+ } else {
+ RETV('(', 1);
+ }
+ break;
+ case CHR(')'):
+ if (LASTTYPE('(')) {
+ NOTE(REG_UUNSPEC);
+ }
+ RETV(')', c);
+ break;
+ case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
+ if (HAVE(6) && *(v->now+0) == CHR('[') &&
+ *(v->now+1) == CHR(':') &&
+ (*(v->now+2) == CHR('<') || *(v->now+2) == CHR('>')) &&
+ *(v->now+3) == CHR(':') &&
+ *(v->now+4) == CHR(']') &&
+ *(v->now+5) == CHR(']')) {
+ c = *(v->now+2);
+ v->now += 6;
+ NOTE(REG_UNONPOSIX);
+ RET((c == CHR('<')) ? '<' : '>');
+ }
+ INTOCON(L_BRACK);
+ if (NEXT1('^')) {
+ v->now++;
+ RETV('[', 0);
+ }
+ RETV('[', 1);
+ break;
+ case CHR('.'):
+ RET('.');
+ break;
+ case CHR('^'):
+ RET('^');
+ break;
+ case CHR('$'):
+ RET('$');
+ break;
+ case CHR('\\'): /* mostly punt backslashes to code below */
+ if (ATEOS()) {
+ FAILW(REG_EESCAPE);
}
+ break;
+ default: /* ordinary character */
+ RETV(PLAIN, c);
+ break;
+ }
- /* ERE/ARE backslash handling; backslash already eaten */
- assert(!ATEOS());
- if (!(v->cflags&REG_ADVF)) { /* only AREs have non-trivial escapes */
- if (iscalnum(*v->now)) {
- NOTE(REG_UBSALNUM);
- NOTE(REG_UUNSPEC);
- }
- RETV(PLAIN, *v->now++);
+ /*
+ * ERE/ARE backslash handling; backslash already eaten.
+ */
+
+ assert(!ATEOS());
+ if (!(v->cflags&REG_ADVF)) {/* only AREs have non-trivial escapes */
+ if (iscalnum(*v->now)) {
+ NOTE(REG_UBSALNUM);
+ NOTE(REG_UUNSPEC);
}
- (DISCARD)lexescape(v);
- if (ISERR())
- FAILW(REG_EESCAPE);
- if (v->nexttype == CCLASS) { /* fudge at lexical level */
- switch (v->nextvalue) {
- case 'd': lexnest(v, backd, ENDOF(backd)); break;
- case 'D': lexnest(v, backD, ENDOF(backD)); break;
- case 's': lexnest(v, backs, ENDOF(backs)); break;
- case 'S': lexnest(v, backS, ENDOF(backS)); break;
- case 'w': lexnest(v, backw, ENDOF(backw)); break;
- case 'W': lexnest(v, backW, ENDOF(backW)); break;
- default:
- assert(NOTREACHED);
- FAILW(REG_ASSERT);
- break;
- }
- /* lexnest done, back up and try again */
- v->nexttype = v->lasttype;
- return next(v);
+ RETV(PLAIN, *v->now++);
+ }
+ (DISCARD)lexescape(v);
+ if (ISERR()) {
+ FAILW(REG_EESCAPE);
+ }
+ if (v->nexttype == CCLASS) {/* fudge at lexical level */
+ switch (v->nextvalue) {
+ case 'd': lexnest(v, backd, ENDOF(backd)); break;
+ case 'D': lexnest(v, backD, ENDOF(backD)); break;
+ case 's': lexnest(v, backs, ENDOF(backs)); break;
+ case 'S': lexnest(v, backS, ENDOF(backS)); break;
+ case 'w': lexnest(v, backw, ENDOF(backw)); break;
+ case 'W': lexnest(v, backW, ENDOF(backW)); break;
+ default:
+ assert(NOTREACHED);
+ FAILW(REG_ASSERT);
+ break;
}
- /* otherwise, lexescape has already done the work */
- return !ISERR();
-}
+ /* lexnest done, back up and try again */
+ v->nexttype = v->lasttype;
+ return next(v);
+ }
+
+ /*
+ * Otherwise, lexescape has already done the work.
+ */
+ return !ISERR();
+}
+
/*
- lexescape - parse an ARE backslash escape (backslash already eaten)
* Note slightly nonstandard use of the CCLASS type code.
^ static int lexescape(struct vars *);
*/
static int /* not actually used, but convenient for RETV */
-lexescape(v)
-struct vars *v;
+lexescape(
+ struct vars *v)
{
- chr c;
- static chr alert[] = {
- CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
- };
- static chr esc[] = {
- CHR('E'), CHR('S'), CHR('C')
- };
- chr *save;
-
- assert(v->cflags&REG_ADVF);
-
- assert(!ATEOS());
- c = *v->now++;
- if (!iscalnum(c))
- RETV(PLAIN, c);
+ chr c;
+ static chr alert[] = {
+ CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
+ };
+ static chr esc[] = {
+ CHR('E'), CHR('S'), CHR('C')
+ };
+ chr *save;
- NOTE(REG_UNONPOSIX);
- switch (c) {
- case CHR('a'):
- RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
- break;
- case CHR('A'):
- RETV(SBEGIN, 0);
- break;
- case CHR('b'):
- RETV(PLAIN, CHR('\b'));
- break;
- case CHR('B'):
- RETV(PLAIN, CHR('\\'));
- break;
- case CHR('c'):
- NOTE(REG_UUNPORT);
- if (ATEOS())
- FAILW(REG_EESCAPE);
- RETV(PLAIN, (chr)(*v->now++ & 037));
- break;
- case CHR('d'):
- NOTE(REG_ULOCALE);
- RETV(CCLASS, 'd');
- break;
- case CHR('D'):
- NOTE(REG_ULOCALE);
- RETV(CCLASS, 'D');
- break;
- case CHR('e'):
- NOTE(REG_UUNPORT);
- RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
- break;
- case CHR('f'):
- RETV(PLAIN, CHR('\f'));
- break;
- case CHR('m'):
- RET('<');
- break;
- case CHR('M'):
- RET('>');
- break;
- case CHR('n'):
- RETV(PLAIN, CHR('\n'));
- break;
- case CHR('r'):
- RETV(PLAIN, CHR('\r'));
- break;
- case CHR('s'):
- NOTE(REG_ULOCALE);
- RETV(CCLASS, 's');
- break;
- case CHR('S'):
- NOTE(REG_ULOCALE);
- RETV(CCLASS, 'S');
- break;
- case CHR('t'):
- RETV(PLAIN, CHR('\t'));
- break;
- case CHR('u'):
- c = lexdigits(v, 16, 4, 4);
- if (ISERR())
- FAILW(REG_EESCAPE);
- RETV(PLAIN, c);
- break;
- case CHR('U'):
- c = lexdigits(v, 16, 8, 8);
- if (ISERR())
- FAILW(REG_EESCAPE);
- RETV(PLAIN, c);
- break;
- case CHR('v'):
- RETV(PLAIN, CHR('\v'));
- break;
- case CHR('w'):
- NOTE(REG_ULOCALE);
- RETV(CCLASS, 'w');
- break;
- case CHR('W'):
- NOTE(REG_ULOCALE);
- RETV(CCLASS, 'W');
- break;
- case CHR('x'):
- NOTE(REG_UUNPORT);
- c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
- if (ISERR())
- FAILW(REG_EESCAPE);
- RETV(PLAIN, c);
- break;
- case CHR('y'):
- NOTE(REG_ULOCALE);
- RETV(WBDRY, 0);
- break;
- case CHR('Y'):
- NOTE(REG_ULOCALE);
- RETV(NWBDRY, 0);
- break;
- case CHR('Z'):
- RETV(SEND, 0);
- break;
- case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
- case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
- case CHR('9'):
- save = v->now;
- v->now--; /* put first digit back */
- c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
- if (ISERR())
- FAILW(REG_EESCAPE);
- /* ugly heuristic (first test is "exactly 1 digit?") */
- if (v->now - save == 0 || (int)c <= v->nsubexp) {
- NOTE(REG_UBACKREF);
- RETV(BACKREF, (chr)c);
- }
- /* oops, doesn't look like it's a backref after all... */
- v->now = save;
- /* and fall through into octal number */
- case CHR('0'):
- NOTE(REG_UUNPORT);
- v->now--; /* put first digit back */
- c = lexdigits(v, 8, 1, 3);
- if (ISERR())
- FAILW(REG_EESCAPE);
- RETV(PLAIN, c);
- break;
- default:
- assert(iscalpha(c));
- FAILW(REG_EESCAPE); /* unknown alphabetic escape */
- break;
+ assert(v->cflags&REG_ADVF);
+
+ assert(!ATEOS());
+ c = *v->now++;
+ if (!iscalnum(c)) {
+ RETV(PLAIN, c);
+ }
+
+ NOTE(REG_UNONPOSIX);
+ switch (c) {
+ case CHR('a'):
+ RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
+ break;
+ case CHR('A'):
+ RETV(SBEGIN, 0);
+ break;
+ case CHR('b'):
+ RETV(PLAIN, CHR('\b'));
+ break;
+ case CHR('B'):
+ RETV(PLAIN, CHR('\\'));
+ break;
+ case CHR('c'):
+ NOTE(REG_UUNPORT);
+ if (ATEOS()) {
+ FAILW(REG_EESCAPE);
}
- assert(NOTREACHED);
-}
+ RETV(PLAIN, (chr)(*v->now++ & 037));
+ break;
+ case CHR('d'):
+ NOTE(REG_ULOCALE);
+ RETV(CCLASS, 'd');
+ break;
+ case CHR('D'):
+ NOTE(REG_ULOCALE);
+ RETV(CCLASS, 'D');
+ break;
+ case CHR('e'):
+ NOTE(REG_UUNPORT);
+ RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
+ break;
+ case CHR('f'):
+ RETV(PLAIN, CHR('\f'));
+ break;
+ case CHR('m'):
+ RET('<');
+ break;
+ case CHR('M'):
+ RET('>');
+ break;
+ case CHR('n'):
+ RETV(PLAIN, CHR('\n'));
+ break;
+ case CHR('r'):
+ RETV(PLAIN, CHR('\r'));
+ break;
+ case CHR('s'):
+ NOTE(REG_ULOCALE);
+ RETV(CCLASS, 's');
+ break;
+ case CHR('S'):
+ NOTE(REG_ULOCALE);
+ RETV(CCLASS, 'S');
+ break;
+ case CHR('t'):
+ RETV(PLAIN, CHR('\t'));
+ break;
+ case CHR('u'):
+ c = lexdigits(v, 16, 4, 4);
+ if (ISERR()) {
+ FAILW(REG_EESCAPE);
+ }
+ RETV(PLAIN, c);
+ break;
+ case CHR('U'):
+ c = lexdigits(v, 16, 8, 8);
+ if (ISERR()) {
+ FAILW(REG_EESCAPE);
+ }
+ RETV(PLAIN, c);
+ break;
+ case CHR('v'):
+ RETV(PLAIN, CHR('\v'));
+ break;
+ case CHR('w'):
+ NOTE(REG_ULOCALE);
+ RETV(CCLASS, 'w');
+ break;
+ case CHR('W'):
+ NOTE(REG_ULOCALE);
+ RETV(CCLASS, 'W');
+ break;
+ case CHR('x'):
+ NOTE(REG_UUNPORT);
+ c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
+ if (ISERR()) {
+ FAILW(REG_EESCAPE);
+ }
+ RETV(PLAIN, c);
+ break;
+ case CHR('y'):
+ NOTE(REG_ULOCALE);
+ RETV(WBDRY, 0);
+ break;
+ case CHR('Y'):
+ NOTE(REG_ULOCALE);
+ RETV(NWBDRY, 0);
+ break;
+ case CHR('Z'):
+ RETV(SEND, 0);
+ break;
+ case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
+ case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
+ case CHR('9'):
+ save = v->now;
+ v->now--; /* put first digit back */
+ c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
+ if (ISERR()) {
+ FAILW(REG_EESCAPE);
+ }
+
+ /*
+ * Ugly heuristic (first test is "exactly 1 digit?")
+ */
+
+ if (v->now - save == 0 || (int)c <= v->nsubexp) {
+ NOTE(REG_UBACKREF);
+ RETV(BACKREF, (chr)c);
+ }
+
+ /*
+ * Oops, doesn't look like it's a backref after all...
+ */
+ v->now = save;
+
+ /*
+ * And fall through into octal number.
+ */
+
+ case CHR('0'):
+ NOTE(REG_UUNPORT);
+ v->now--; /* put first digit back */
+ c = lexdigits(v, 8, 1, 3);
+ if (ISERR()) {
+ FAILW(REG_EESCAPE);
+ }
+ RETV(PLAIN, c);
+ break;
+ default:
+ assert(iscalpha(c));
+ FAILW(REG_EESCAPE); /* unknown alphabetic escape */
+ break;
+ }
+ assert(NOTREACHED);
+}
+
/*
- lexdigits - slurp up digits and return chr value
^ static chr lexdigits(struct vars *, int, int, int);
*/
static chr /* chr value; errors signalled via ERR */
-lexdigits(v, base, minlen, maxlen)
-struct vars *v;
-int base;
-int minlen;
-int maxlen;
+lexdigits(
+ struct vars *v,
+ int base,
+ int minlen,
+ int maxlen)
{
- uchr n; /* unsigned to avoid overflow misbehavior */
- int len;
- chr c;
- int d;
- CONST uchr ub = (uchr) base;
-
- n = 0;
- for (len = 0; len < maxlen && !ATEOS(); len++) {
- c = *v->now++;
- switch (c) {
- case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
- case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
- case CHR('8'): case CHR('9'):
- d = DIGITVAL(c);
- break;
- case CHR('a'): case CHR('A'): d = 10; break;
- case CHR('b'): case CHR('B'): d = 11; break;
- case CHR('c'): case CHR('C'): d = 12; break;
- case CHR('d'): case CHR('D'): d = 13; break;
- case CHR('e'): case CHR('E'): d = 14; break;
- case CHR('f'): case CHR('F'): d = 15; break;
- default:
- v->now--; /* oops, not a digit at all */
- d = -1;
- break;
- }
+ uchr n; /* unsigned to avoid overflow misbehavior */
+ int len;
+ chr c;
+ int d;
+ CONST uchr ub = (uchr) base;
- if (d >= base) { /* not a plausible digit */
- v->now--;
- d = -1;
- }
- if (d < 0)
- break; /* NOTE BREAK OUT */
- n = n*ub + (uchr)d;
+ n = 0;
+ for (len = 0; len < maxlen && !ATEOS(); len++) {
+ c = *v->now++;
+ switch (c) {
+ case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
+ case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
+ case CHR('8'): case CHR('9'):
+ d = DIGITVAL(c);
+ break;
+ case CHR('a'): case CHR('A'): d = 10; break;
+ case CHR('b'): case CHR('B'): d = 11; break;
+ case CHR('c'): case CHR('C'): d = 12; break;
+ case CHR('d'): case CHR('D'): d = 13; break;
+ case CHR('e'): case CHR('E'): d = 14; break;
+ case CHR('f'): case CHR('F'): d = 15; break;
+ default:
+ v->now--; /* oops, not a digit at all */
+ d = -1;
+ break;
}
- if (len < minlen)
- ERR(REG_EESCAPE);
- return (chr)n;
-}
+ if (d >= base) { /* not a plausible digit */
+ v->now--;
+ d = -1;
+ }
+ if (d < 0) {
+ break; /* NOTE BREAK OUT */
+ }
+ n = n*ub + (uchr)d;
+ }
+ if (len < minlen) {
+ ERR(REG_EESCAPE);
+ }
+ return (chr)n;
+}
+
/*
- brenext - get next BRE token
* This is much like EREs except for all the stupid backslashes and the
@@ -865,150 +968,161 @@ int maxlen;
^ static int brenext(struct vars *, pchr);
*/
static int /* 1 normal, 0 failure */
-brenext(v, pc)
-struct vars *v;
-pchr pc;
+brenext(
+ struct vars *v,
+ pchr pc)
{
- chr c = (chr)pc;
+ chr c = (chr)pc;
- switch (c) {
- case CHR('*'):
- if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
- RETV(PLAIN, c);
- RET('*');
- break;
- case CHR('['):
- if (HAVE(6) && *(v->now+0) == CHR('[') &&
- *(v->now+1) == CHR(':') &&
- (*(v->now+2) == CHR('<') ||
- *(v->now+2) == CHR('>')) &&
- *(v->now+3) == CHR(':') &&
- *(v->now+4) == CHR(']') &&
- *(v->now+5) == CHR(']')) {
- c = *(v->now+2);
- v->now += 6;
- NOTE(REG_UNONPOSIX);
- RET((c == CHR('<')) ? '<' : '>');
- }
- INTOCON(L_BRACK);
- if (NEXT1('^')) {
- v->now++;
- RETV('[', 0);
- }
- RETV('[', 1);
- break;
- case CHR('.'):
- RET('.');
- break;
- case CHR('^'):
- if (LASTTYPE(EMPTY))
- RET('^');
- if (LASTTYPE('(')) {
- NOTE(REG_UUNSPEC);
- RET('^');
- }
- RETV(PLAIN, c);
- break;
- case CHR('$'):
- if (v->cflags&REG_EXPANDED)
- skip(v);
- if (ATEOS())
- RET('$');
- if (NEXT2('\\', ')')) {
- NOTE(REG_UUNSPEC);
- RET('$');
- }
- RETV(PLAIN, c);
- break;
- case CHR('\\'):
- break; /* see below */
- default:
- RETV(PLAIN, c);
- break;
+ switch (c) {
+ case CHR('*'):
+ if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) {
+ RETV(PLAIN, c);
+ }
+ RET('*');
+ break;
+ case CHR('['):
+ if (HAVE(6) && *(v->now+0) == CHR('[') &&
+ *(v->now+1) == CHR(':') &&
+ (*(v->now+2) == CHR('<') || *(v->now+2) == CHR('>')) &&
+ *(v->now+3) == CHR(':') &&
+ *(v->now+4) == CHR(']') &&
+ *(v->now+5) == CHR(']')) {
+ c = *(v->now+2);
+ v->now += 6;
+ NOTE(REG_UNONPOSIX);
+ RET((c == CHR('<')) ? '<' : '>');
+ }
+ INTOCON(L_BRACK);
+ if (NEXT1('^')) {
+ v->now++;
+ RETV('[', 0);
+ }
+ RETV('[', 1);
+ break;
+ case CHR('.'):
+ RET('.');
+ break;
+ case CHR('^'):
+ if (LASTTYPE(EMPTY)) {
+ RET('^');
+ }
+ if (LASTTYPE('(')) {
+ NOTE(REG_UUNSPEC);
+ RET('^');
}
+ RETV(PLAIN, c);
+ break;
+ case CHR('$'):
+ if (v->cflags&REG_EXPANDED) {
+ skip(v);
+ }
+ if (ATEOS()) {
+ RET('$');
+ }
+ if (NEXT2('\\', ')')) {
+ NOTE(REG_UUNSPEC);
+ RET('$');
+ }
+ RETV(PLAIN, c);
+ break;
+ case CHR('\\'):
+ break; /* see below */
+ default:
+ RETV(PLAIN, c);
+ break;
+ }
- assert(c == CHR('\\'));
+ assert(c == CHR('\\'));
- if (ATEOS())
- FAILW(REG_EESCAPE);
+ if (ATEOS()) {
+ FAILW(REG_EESCAPE);
+ }
- c = *v->now++;
- switch (c) {
- case CHR('{'):
- INTOCON(L_BBND);
- NOTE(REG_UBOUNDS);
- RET('{');
- break;
- case CHR('('):
- RETV('(', 1);
- break;
- case CHR(')'):
- RETV(')', c);
- break;
- case CHR('<'):
- NOTE(REG_UNONPOSIX);
- RET('<');
- break;
- case CHR('>'):
- NOTE(REG_UNONPOSIX);
- RET('>');
- break;
- case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
- case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
- case CHR('9'):
- NOTE(REG_UBACKREF);
- RETV(BACKREF, (chr)DIGITVAL(c));
- break;
- default:
- if (iscalnum(c)) {
- NOTE(REG_UBSALNUM);
- NOTE(REG_UUNSPEC);
- }
- RETV(PLAIN, c);
- break;
+ c = *v->now++;
+ switch (c) {
+ case CHR('{'):
+ INTOCON(L_BBND);
+ NOTE(REG_UBOUNDS);
+ RET('{');
+ break;
+ case CHR('('):
+ RETV('(', 1);
+ break;
+ case CHR(')'):
+ RETV(')', c);
+ break;
+ case CHR('<'):
+ NOTE(REG_UNONPOSIX);
+ RET('<');
+ break;
+ case CHR('>'):
+ NOTE(REG_UNONPOSIX);
+ RET('>');
+ break;
+ case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
+ case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
+ case CHR('9'):
+ NOTE(REG_UBACKREF);
+ RETV(BACKREF, (chr)DIGITVAL(c));
+ break;
+ default:
+ if (iscalnum(c)) {
+ NOTE(REG_UBSALNUM);
+ NOTE(REG_UUNSPEC);
}
+ RETV(PLAIN, c);
+ break;
+ }
- assert(NOTREACHED);
+ assert(NOTREACHED);
}
-
+
/*
- skip - skip white space and comments in expanded form
^ static VOID skip(struct vars *);
*/
-static VOID
-skip(v)
-struct vars *v;
+static void
+skip(
+ struct vars *v)
{
- chr *start = v->now;
-
- assert(v->cflags&REG_EXPANDED);
-
- for (;;) {
- while (!ATEOS() && iscspace(*v->now))
- v->now++;
- if (ATEOS() || *v->now != CHR('#'))
- break; /* NOTE BREAK OUT */
- assert(NEXT1('#'));
- while (!ATEOS() && *v->now != CHR('\n'))
- v->now++;
- /* leave the newline to be picked up by the iscspace loop */
+ chr *start = v->now;
+
+ assert(v->cflags&REG_EXPANDED);
+
+ for (;;) {
+ while (!ATEOS() && iscspace(*v->now)) {
+ v->now++;
+ }
+ if (ATEOS() || *v->now != CHR('#')) {
+ break; /* NOTE BREAK OUT */
+ }
+ assert(NEXT1('#'));
+ while (!ATEOS() && *v->now != CHR('\n')) {
+ v->now++;
}
- if (v->now != start)
- NOTE(REG_UNONPOSIX);
-}
+ /*
+ * Leave the newline to be picked up by the iscspace loop.
+ */
+ }
+ if (v->now != start) {
+ NOTE(REG_UNONPOSIX);
+ }
+}
+
/*
- newline - return the chr for a newline
* This helps confine use of CHR to this source file.
^ static chr newline(NOPARMS);
*/
static chr
-newline()
+newline(void)
{
- return CHR('\n');
+ return CHR('\n');
}
-
+
/*
- ch - return the chr sequence for regc_locale.c's fake collating element ch
* This helps confine use of CHR to this source file. Beware that the caller
@@ -1019,14 +1133,14 @@ newline()
*/
#ifdef REG_DEBUG
static chr *
-ch()
+ch(void)
{
- static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
+ static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
- return chstr;
+ return chstr;
}
#endif
-
+
/*
- chrnamed - return the chr known by a given (chr string) name
* The code is a bit clumsy, but this routine gets only such specialized
@@ -1034,28 +1148,38 @@ ch()
^ static chr chrnamed(struct vars *, chr *, chr *, pchr);
*/
static chr
-chrnamed(v, startp, endp, lastresort)
-struct vars *v;
-chr *startp; /* start of name */
-chr *endp; /* just past end of name */
-pchr lastresort; /* what to return if name lookup fails */
+chrnamed(
+ struct vars *v,
+ chr *startp, /* start of name */
+ chr *endp, /* just past end of name */
+ pchr lastresort) /* what to return if name lookup fails */
{
- celt c;
- int errsave;
- int e;
- struct cvec *cv;
-
- errsave = v->err;
- v->err = 0;
- c = element(v, startp, endp);
- e = v->err;
- v->err = errsave;
-
- if (e != 0)
- return (chr)lastresort;
-
- cv = range(v, c, c, 0);
- if (cv->nchrs == 0)
- return (chr)lastresort;
- return cv->chrs[0];
+ celt c;
+ int errsave;
+ int e;
+ struct cvec *cv;
+
+ errsave = v->err;
+ v->err = 0;
+ c = element(v, startp, endp);
+ e = v->err;
+ v->err = errsave;
+
+ if (e != 0) {
+ return (chr)lastresort;
+ }
+
+ cv = range(v, c, c, 0);
+ if (cv->nchrs == 0) {
+ return (chr)lastresort;
+ }
+ return cv->chrs[0];
}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */