diff options
author | Guido van Rossum <guido@python.org> | 1997-07-15 01:47:08 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1997-07-15 01:47:08 (GMT) |
commit | faf490898d20b2e5dfbc5cf39b6a8e612837647b (patch) | |
tree | 87ed5f897505e6fc13bc55eb23831ada9a900864 /Modules/regexpr.c | |
parent | 74495409861b357d9925937d6576229c74e2550d (diff) | |
download | cpython-faf490898d20b2e5dfbc5cf39b6a8e612837647b.zip cpython-faf490898d20b2e5dfbc5cf39b6a8e612837647b.tar.gz cpython-faf490898d20b2e5dfbc5cf39b6a8e612837647b.tar.bz2 |
Reformatted.
Fixed some problem with uninitialized syntax tables.
Jeffrey.
Diffstat (limited to 'Modules/regexpr.c')
-rw-r--r-- | Modules/regexpr.c | 3105 |
1 files changed, 1548 insertions, 1557 deletions
diff --git a/Modules/regexpr.c b/Modules/regexpr.c index 0b6ec8b..6b6ccbef 100644 --- a/Modules/regexpr.c +++ b/Modules/regexpr.c @@ -213,10 +213,10 @@ while(state.stack.first.next != NULL) \ stack.index -= count; \ while (stack.index < 0) \ { \ - if (stack.current->prev == NULL) \ - on_error; \ - stack.current = stack.current->prev; \ - stack.index += STACK_PAGE_SIZE; \ + if (stack.current->prev == NULL) \ + on_error; \ + stack.current = stack.current->prev; \ + stack.index += STACK_PAGE_SIZE; \ } /* Store a pointer to the previous item on the stack. Used to pop an @@ -225,13 +225,15 @@ while (stack.index < 0) \ #define STACK_PREV(stack, top, on_error) \ if (stack.index == 0) \ { \ - if (stack.current->prev == NULL) \ - on_error; \ - stack.current = stack.current->prev; \ - stack.index = STACK_PAGE_SIZE - 1; \ + if (stack.current->prev == NULL) \ + on_error; \ + stack.current = stack.current->prev; \ + stack.index = STACK_PAGE_SIZE - 1; \ } \ else \ - stack.index--; \ +{ \ + stack.index--; \ +} \ top = &(stack.current->items[stack.index]) /* Store a pointer to the next item on the stack. Used to push an item @@ -240,16 +242,16 @@ top = &(stack.current->items[stack.index]) #define STACK_NEXT(stack, top, on_error) \ if (stack.index == STACK_PAGE_SIZE) \ { \ - if (stack.current->next == NULL) \ - { \ - stack.current->next = (item_page_t *)malloc(sizeof(item_page_t)); \ - if (stack.current->next == NULL) \ - on_error; \ - stack.current->next->prev = stack.current; \ - stack.current->next->next = NULL; \ - } \ - stack.current = stack.current->next; \ - stack.index = 0; \ + if (stack.current->next == NULL) \ + { \ + stack.current->next = (item_page_t *)malloc(sizeof(item_page_t)); \ + if (stack.current->next == NULL) \ + on_error; \ + stack.current->next->prev = stack.current; \ + stack.current->next->next = NULL; \ + } \ + stack.current = stack.current->next; \ + stack.index = 0; \ } \ top = &(stack.current->items[stack.index++]) @@ -259,18 +261,18 @@ top = &(stack.current->items[stack.index++]) #define STACK_BACK(stack, top, count, on_error) \ { \ - int index; \ - item_page_t *current; \ - current = stack.current; \ - index = stack.index - (count); \ - while (index < 0) \ - { \ - if (current->prev == NULL) \ - on_error; \ - current = current->prev; \ - index += STACK_PAGE_SIZE; \ - } \ - top = &(current->items[index]); \ + int index; \ + item_page_t *current; \ + current = stack.current; \ + index = stack.index - (count); \ + while (index < 0) \ + { \ + if (current->prev == NULL) \ + on_error; \ + current = current->prev; \ + index += STACK_PAGE_SIZE; \ + } \ + top = &(current->items[index]); \ } /* Store a pointer to the top item on the stack. Execute the @@ -279,12 +281,14 @@ top = &(stack.current->items[stack.index++]) #define STACK_TOP(stack, top, on_error) \ if (stack.index == 0) \ { \ - if (stack.current->prev == NULL) \ - on_error; \ - top = &(stack.current->prev->items[STACK_PAGE_SIZE - 1]); \ + if (stack.current->prev == NULL) \ + on_error; \ + top = &(stack.current->prev->items[STACK_PAGE_SIZE - 1]); \ } \ else \ - top = &(stack.current->items[stack.index - 1]) +{ \ + top = &(stack.current->items[stack.index - 1]); \ +} /* Test to see if the stack is empty */ @@ -305,14 +309,14 @@ else \ #define SET_REG_START(state, reg, text, on_error) \ if(state.changed[reg] < state.level) \ { \ - item_t *item; \ - STACK_NEXT(state.stack, item, on_error); \ - item->reg.num = reg; \ - item->reg.start = state.start[reg]; \ - item->reg.end = state.end[reg]; \ - item->reg.level = state.changed[reg]; \ - state.changed[reg] = state.level; \ - state.count++; \ + item_t *item; \ + STACK_NEXT(state.stack, item, on_error); \ + item->reg.num = reg; \ + item->reg.start = state.start[reg]; \ + item->reg.end = state.end[reg]; \ + item->reg.level = state.changed[reg]; \ + state.changed[reg] = state.level; \ + state.count++; \ } \ state.start[reg] = text @@ -322,137 +326,137 @@ state.start[reg] = text #define SET_REG_END(state, reg, text, on_error) \ if(state.changed[reg] < state.level) \ { \ - item_t *item; \ - STACK_NEXT(state.stack, item, on_error); \ - item->reg.num = reg; \ - item->reg.start = state.start[reg]; \ - item->reg.end = state.end[reg]; \ - item->reg.level = state.changed[reg]; \ - state.changed[reg] = state.level; \ - state.count++; \ + item_t *item; \ + STACK_NEXT(state.stack, item, on_error); \ + item->reg.num = reg; \ + item->reg.start = state.start[reg]; \ + item->reg.end = state.end[reg]; \ + item->reg.level = state.changed[reg]; \ + state.changed[reg] = state.level; \ + state.count++; \ } \ state.end[reg] = text #define PUSH_FAILURE(state, xcode, xtext, on_error) \ { \ - item_t *item; \ - STACK_NEXT(state.stack, item, on_error); \ - item->fail.code = xcode; \ - item->fail.text = xtext; \ - item->fail.count = state.count; \ - item->fail.level = state.level; \ - item->fail.phantom = 0; \ - state.count = 0; \ - state.level++; \ - state.point++; \ + item_t *item; \ + STACK_NEXT(state.stack, item, on_error); \ + item->fail.code = xcode; \ + item->fail.text = xtext; \ + item->fail.count = state.count; \ + item->fail.level = state.level; \ + item->fail.phantom = 0; \ + state.count = 0; \ + state.level++; \ + state.point++; \ } /* Update the last failure point with a new position in the text. */ #define UPDATE_FAILURE(state, xtext, on_error) \ { \ - item_t *item; \ - STACK_BACK(state.stack, item, state.count + 1, on_error); \ - if (!item->fail.phantom) \ - { \ - item_t *item2; \ - STACK_NEXT(state.stack, item2, on_error); \ - item2->fail.code = item->fail.code; \ - item2->fail.text = xtext; \ - item2->fail.count = state.count; \ - item2->fail.level = state.level; \ - item2->fail.phantom = 1; \ - state.count = 0; \ - state.level++; \ - state.point++; \ - } \ - else \ - { \ - STACK_DISCARD(state.stack, state.count, on_error); \ - STACK_TOP(state.stack, item, on_error); \ - item->fail.text = xtext; \ - state.count = 0; \ - state.level++; \ - } \ + item_t *item; \ + STACK_BACK(state.stack, item, state.count + 1, on_error); \ + if (!item->fail.phantom) \ + { \ + item_t *item2; \ + STACK_NEXT(state.stack, item2, on_error); \ + item2->fail.code = item->fail.code; \ + item2->fail.text = xtext; \ + item2->fail.count = state.count; \ + item2->fail.level = state.level; \ + item2->fail.phantom = 1; \ + state.count = 0; \ + state.level++; \ + state.point++; \ + } \ + else \ + { \ + STACK_DISCARD(state.stack, state.count, on_error); \ + STACK_TOP(state.stack, item, on_error); \ + item->fail.text = xtext; \ + state.count = 0; \ + state.level++; \ + } \ } #define POP_FAILURE(state, xcode, xtext, on_empty, on_error) \ { \ - item_t *item; \ - do \ - { \ - while(state.count > 0) \ - { \ - STACK_PREV(state.stack, item, on_error); \ - state.start[item->reg.num] = item->reg.start; \ - state.end[item->reg.num] = item->reg.end; \ - state.changed[item->reg.num] = item->reg.level; \ - state.count--; \ - } \ - STACK_PREV(state.stack, item, on_empty); \ - xcode = item->fail.code; \ - xtext = item->fail.text; \ - state.count = item->fail.count; \ - state.level = item->fail.level; \ - state.point--; \ - } \ - while (item->fail.text == NULL); \ + item_t *item; \ + do \ + { \ + while(state.count > 0) \ + { \ + STACK_PREV(state.stack, item, on_error); \ + state.start[item->reg.num] = item->reg.start; \ + state.end[item->reg.num] = item->reg.end; \ + state.changed[item->reg.num] = item->reg.level; \ + state.count--; \ + } \ + STACK_PREV(state.stack, item, on_empty); \ + xcode = item->fail.code; \ + xtext = item->fail.text; \ + state.count = item->fail.count; \ + state.level = item->fail.level; \ + state.point--; \ + } \ + while (item->fail.text == NULL); \ } enum regexp_compiled_ops /* opcodes for compiled regexp */ { - Cend, /* end of pattern reached */ - Cbol, /* beginning of line */ - Ceol, /* end of line */ - Cset, /* character set. Followed by 32 bytes of set. */ - Cexact, /* followed by a byte to match */ - Canychar, /* matches any character except newline */ - Cstart_memory, /* set register start addr (followed by reg number) */ - Cend_memory, /* set register end addr (followed by reg number) */ - Cmatch_memory, /* match a duplicate of reg contents (regnum follows)*/ - Cjump, /* followed by two bytes (lsb,msb) of displacement. */ - Cstar_jump, /* will change to jump/update_failure_jump at runtime */ - Cfailure_jump, /* jump to addr on failure */ - Cupdate_failure_jump, /* update topmost failure point and jump */ - Cdummy_failure_jump, /* push a dummy failure point and jump */ - Cbegbuf, /* match at beginning of buffer */ - Cendbuf, /* match at end of buffer */ - Cwordbeg, /* match at beginning of word */ - Cwordend, /* match at end of word */ - Cwordbound, /* match if at word boundary */ - Cnotwordbound, /* match if not at word boundary */ - Csyntaxspec, /* matches syntax code (1 byte follows) */ - Cnotsyntaxspec, /* matches if syntax code does not match (1 byte foll)*/ - Crepeat1 + Cend, /* end of pattern reached */ + Cbol, /* beginning of line */ + Ceol, /* end of line */ + Cset, /* character set. Followed by 32 bytes of set. */ + Cexact, /* followed by a byte to match */ + Canychar, /* matches any character except newline */ + Cstart_memory, /* set register start addr (followed by reg number) */ + Cend_memory, /* set register end addr (followed by reg number) */ + Cmatch_memory, /* match a duplicate of reg contents (regnum follows)*/ + Cjump, /* followed by two bytes (lsb,msb) of displacement. */ + Cstar_jump, /* will change to jump/update_failure_jump at runtime */ + Cfailure_jump, /* jump to addr on failure */ + Cupdate_failure_jump, /* update topmost failure point and jump */ + Cdummy_failure_jump, /* push a dummy failure point and jump */ + Cbegbuf, /* match at beginning of buffer */ + Cendbuf, /* match at end of buffer */ + Cwordbeg, /* match at beginning of word */ + Cwordend, /* match at end of word */ + Cwordbound, /* match if at word boundary */ + Cnotwordbound, /* match if not at word boundary */ + Csyntaxspec, /* matches syntax code (1 byte follows) */ + Cnotsyntaxspec, /* matches if syntax code does not match (1 byte foll)*/ + Crepeat1 }; enum regexp_syntax_op /* syntax codes for plain and quoted characters */ { - Rend, /* special code for end of regexp */ - Rnormal, /* normal character */ - Ranychar, /* any character except newline */ - Rquote, /* the quote character */ - Rbol, /* match beginning of line */ - Reol, /* match end of line */ - Roptional, /* match preceding expression optionally */ - Rstar, /* match preceding expr zero or more times */ - Rplus, /* match preceding expr one or more times */ - Ror, /* match either of alternatives */ - Ropenpar, /* opening parenthesis */ - Rclosepar, /* closing parenthesis */ - Rmemory, /* match memory register */ - Rextended_memory, /* \vnn to match registers 10-99 */ - Ropenset, /* open set. Internal syntax hard-coded below. */ - /* the following are gnu extensions to "normal" regexp syntax */ - Rbegbuf, /* beginning of buffer */ - Rendbuf, /* end of buffer */ - Rwordchar, /* word character */ - Rnotwordchar, /* not word character */ - Rwordbeg, /* beginning of word */ - Rwordend, /* end of word */ - Rwordbound, /* word bound */ - Rnotwordbound, /* not word bound */ - Rnum_ops + Rend, /* special code for end of regexp */ + Rnormal, /* normal character */ + Ranychar, /* any character except newline */ + Rquote, /* the quote character */ + Rbol, /* match beginning of line */ + Reol, /* match end of line */ + Roptional, /* match preceding expression optionally */ + Rstar, /* match preceding expr zero or more times */ + Rplus, /* match preceding expr one or more times */ + Ror, /* match either of alternatives */ + Ropenpar, /* opening parenthesis */ + Rclosepar, /* closing parenthesis */ + Rmemory, /* match memory register */ + Rextended_memory, /* \vnn to match registers 10-99 */ + Ropenset, /* open set. Internal syntax hard-coded below. */ + /* the following are gnu extensions to "normal" regexp syntax */ + Rbegbuf, /* beginning of buffer */ + Rendbuf, /* end of buffer */ + Rwordchar, /* word character */ + Rnotwordchar, /* not word character */ + Rwordbeg, /* beginning of word */ + Rwordend, /* end of word */ + Rwordbound, /* word bound */ + Rnotwordbound, /* not word bound */ + Rnum_ops }; static int re_compile_initialized = 0; @@ -474,114 +478,114 @@ static char re_syntax_table[256]; static void re_compile_initialize(void) { - int a; + int a; - static int syntax_table_inited = 0; - - if (!syntax_table_inited) - { - syntax_table_inited = 1; - memset(re_syntax_table, 0, 256); - for (a = 'a'; a <= 'z'; a++) - re_syntax_table[a] = Sword; - for (a = 'A'; a <= 'Z'; a++) - re_syntax_table[a] = Sword; - for (a = '0'; a <= '9'; a++) - re_syntax_table[a] = Sword; - } - re_compile_initialized = 1; - for (a = 0; a < 256; a++) - { - regexp_plain_ops[a] = Rnormal; - regexp_quoted_ops[a] = Rnormal; - } - for (a = '0'; a <= '9'; a++) - regexp_quoted_ops[a] = Rmemory; - regexp_plain_ops['\134'] = Rquote; - if (regexp_syntax & RE_NO_BK_PARENS) - { - regexp_plain_ops['('] = Ropenpar; - regexp_plain_ops[')'] = Rclosepar; - } - else - { - regexp_quoted_ops['('] = Ropenpar; - regexp_quoted_ops[')'] = Rclosepar; - } - if (regexp_syntax & RE_NO_BK_VBAR) - regexp_plain_ops['\174'] = Ror; - else - regexp_quoted_ops['\174'] = Ror; - regexp_plain_ops['*'] = Rstar; - if (regexp_syntax & RE_BK_PLUS_QM) - { - regexp_quoted_ops['+'] = Rplus; - regexp_quoted_ops['?'] = Roptional; - } - else - { - regexp_plain_ops['+'] = Rplus; - regexp_plain_ops['?'] = Roptional; - } - if (regexp_syntax & RE_NEWLINE_OR) - regexp_plain_ops['\n'] = Ror; - regexp_plain_ops['\133'] = Ropenset; - regexp_plain_ops['\136'] = Rbol; - regexp_plain_ops['$'] = Reol; - regexp_plain_ops['.'] = Ranychar; - if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS)) - { - regexp_quoted_ops['w'] = Rwordchar; - regexp_quoted_ops['W'] = Rnotwordchar; - regexp_quoted_ops['<'] = Rwordbeg; - regexp_quoted_ops['>'] = Rwordend; - regexp_quoted_ops['b'] = Rwordbound; - regexp_quoted_ops['B'] = Rnotwordbound; - regexp_quoted_ops['`'] = Rbegbuf; - regexp_quoted_ops['\''] = Rendbuf; - } - if (regexp_syntax & RE_ANSI_HEX) - regexp_quoted_ops['v'] = Rextended_memory; - for (a = 0; a < Rnum_ops; a++) - regexp_precedences[a] = 4; - if (regexp_syntax & RE_TIGHT_VBAR) - { - regexp_precedences[Ror] = 3; - regexp_precedences[Rbol] = 2; - regexp_precedences[Reol] = 2; - } - else - { - regexp_precedences[Ror] = 2; - regexp_precedences[Rbol] = 3; - regexp_precedences[Reol] = 3; - } - regexp_precedences[Rclosepar] = 1; - regexp_precedences[Rend] = 0; - regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0; - regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0; + static int syntax_table_inited = 0; + + if (!syntax_table_inited) + { + syntax_table_inited = 1; + memset(re_syntax_table, 0, 256); + for (a = 'a'; a <= 'z'; a++) + re_syntax_table[a] = Sword; + for (a = 'A'; a <= 'Z'; a++) + re_syntax_table[a] = Sword; + for (a = '0'; a <= '9'; a++) + re_syntax_table[a] = Sword; + } + re_compile_initialized = 1; + for (a = 0; a < 256; a++) + { + regexp_plain_ops[a] = Rnormal; + regexp_quoted_ops[a] = Rnormal; + } + for (a = '0'; a <= '9'; a++) + regexp_quoted_ops[a] = Rmemory; + regexp_plain_ops['\134'] = Rquote; + if (regexp_syntax & RE_NO_BK_PARENS) + { + regexp_plain_ops['('] = Ropenpar; + regexp_plain_ops[')'] = Rclosepar; + } + else + { + regexp_quoted_ops['('] = Ropenpar; + regexp_quoted_ops[')'] = Rclosepar; + } + if (regexp_syntax & RE_NO_BK_VBAR) + regexp_plain_ops['\174'] = Ror; + else + regexp_quoted_ops['\174'] = Ror; + regexp_plain_ops['*'] = Rstar; + if (regexp_syntax & RE_BK_PLUS_QM) + { + regexp_quoted_ops['+'] = Rplus; + regexp_quoted_ops['?'] = Roptional; + } + else + { + regexp_plain_ops['+'] = Rplus; + regexp_plain_ops['?'] = Roptional; + } + if (regexp_syntax & RE_NEWLINE_OR) + regexp_plain_ops['\n'] = Ror; + regexp_plain_ops['\133'] = Ropenset; + regexp_plain_ops['\136'] = Rbol; + regexp_plain_ops['$'] = Reol; + regexp_plain_ops['.'] = Ranychar; + if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS)) + { + regexp_quoted_ops['w'] = Rwordchar; + regexp_quoted_ops['W'] = Rnotwordchar; + regexp_quoted_ops['<'] = Rwordbeg; + regexp_quoted_ops['>'] = Rwordend; + regexp_quoted_ops['b'] = Rwordbound; + regexp_quoted_ops['B'] = Rnotwordbound; + regexp_quoted_ops['`'] = Rbegbuf; + regexp_quoted_ops['\''] = Rendbuf; + } + if (regexp_syntax & RE_ANSI_HEX) + regexp_quoted_ops['v'] = Rextended_memory; + for (a = 0; a < Rnum_ops; a++) + regexp_precedences[a] = 4; + if (regexp_syntax & RE_TIGHT_VBAR) + { + regexp_precedences[Ror] = 3; + regexp_precedences[Rbol] = 2; + regexp_precedences[Reol] = 2; + } + else + { + regexp_precedences[Ror] = 2; + regexp_precedences[Rbol] = 3; + regexp_precedences[Reol] = 3; + } + regexp_precedences[Rclosepar] = 1; + regexp_precedences[Rend] = 0; + regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0; + regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0; } int re_set_syntax(int syntax) { - int ret; - - ret = regexp_syntax; - regexp_syntax = syntax; - re_syntax = syntax; /* Exported copy */ - re_compile_initialize(); - return ret; + int ret; + + ret = regexp_syntax; + regexp_syntax = syntax; + re_syntax = syntax; /* Exported copy */ + re_compile_initialize(); + return ret; } static int hex_char_to_decimal(int ch) { - if (ch >= '0' && ch <= '9') - return ch - '0'; - if (ch >= 'a' && ch <= 'f') - return ch - 'a' + 10; - if (ch >= 'A' && ch <= 'F') - return ch - 'A' + 10; - return 16; + if (ch >= '0' && ch <= '9') + return ch - '0'; + if (ch >= 'a' && ch <= 'f') + return ch - 'a' + 10; + if (ch >= 'A' && ch <= 'F') + return ch - 'A' + 10; + return 16; } static void re_compile_fastmap_aux(char *code, @@ -590,129 +594,129 @@ static void re_compile_fastmap_aux(char *code, char *can_be_null, char *fastmap) { - int a; - int b; - int syntaxcode; - - if (visited[pos]) - return; /* we have already been here */ - visited[pos] = 1; - for (;;) - switch (code[pos++]) - { - case Cend: - { - *can_be_null = 1; - return; - } - case Cbol: - case Cbegbuf: - case Cendbuf: - case Cwordbeg: - case Cwordend: - case Cwordbound: - case Cnotwordbound: - { - for (a = 0; a < 256; a++) - fastmap[a] = 1; - break; - } - case Csyntaxspec: - { - syntaxcode = code[pos++]; - for (a = 0; a < 256; a++) - if (SYNTAX(a) == syntaxcode) - fastmap[a] = 1; - return; - } - case Cnotsyntaxspec: - { - syntaxcode = code[pos++]; - for (a = 0; a < 256; a++) - if (SYNTAX(a) != syntaxcode) - fastmap[a] = 1; - return; - } - case Ceol: - { - fastmap['\n'] = 1; - if (*can_be_null == 0) - *can_be_null = 2; /* can match null, but only at end of buffer*/ - return; - } - case Cset: - { - for (a = 0; a < 256/8; a++) - if (code[pos + a] != 0) - for (b = 0; b < 8; b++) - if (code[pos + a] & (1 << b)) - fastmap[(a << 3) + b] = 1; - pos += 256/8; - return; - } - case Cexact: - { - fastmap[(unsigned char)code[pos]] = 1; - return; - } - case Canychar: - { - for (a = 0; a < 256; a++) - if (a != '\n') - fastmap[a] = 1; - return; - } - case Cstart_memory: - case Cend_memory: - { - pos++; - break; - } - case Cmatch_memory: - { - for (a = 0; a < 256; a++) - fastmap[a] = 1; - *can_be_null = 1; - return; - } - case Cjump: - case Cdummy_failure_jump: - case Cupdate_failure_jump: - case Cstar_jump: - { - a = (unsigned char)code[pos++]; - a |= (unsigned char)code[pos++] << 8; - pos += (int)SHORT(a); - if (visited[pos]) - { - /* argh... the regexp contains empty loops. This is not - good, as this may cause a failure stack overflow when - matching. Oh well. */ - /* this path leads nowhere; pursue other paths. */ - return; - } - visited[pos] = 1; - break; - } - case Cfailure_jump: - { - a = (unsigned char)code[pos++]; - a |= (unsigned char)code[pos++] << 8; - a = pos + (int)SHORT(a); - re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap); - break; - } - case Crepeat1: - { - pos += 2; - break; - } - default: - { - abort(); /* probably some opcode is missing from this switch */ - /*NOTREACHED*/ - } - } + int a; + int b; + int syntaxcode; + + if (visited[pos]) + return; /* we have already been here */ + visited[pos] = 1; + for (;;) + switch (code[pos++]) + { + case Cend: + { + *can_be_null = 1; + return; + } + case Cbol: + case Cbegbuf: + case Cendbuf: + case Cwordbeg: + case Cwordend: + case Cwordbound: + case Cnotwordbound: + { + for (a = 0; a < 256; a++) + fastmap[a] = 1; + break; + } + case Csyntaxspec: + { + syntaxcode = code[pos++]; + for (a = 0; a < 256; a++) + if (SYNTAX(a) == syntaxcode) + fastmap[a] = 1; + return; + } + case Cnotsyntaxspec: + { + syntaxcode = code[pos++]; + for (a = 0; a < 256; a++) + if (SYNTAX(a) != syntaxcode) + fastmap[a] = 1; + return; + } + case Ceol: + { + fastmap['\n'] = 1; + if (*can_be_null == 0) + *can_be_null = 2; /* can match null, but only at end of buffer*/ + return; + } + case Cset: + { + for (a = 0; a < 256/8; a++) + if (code[pos + a] != 0) + for (b = 0; b < 8; b++) + if (code[pos + a] & (1 << b)) + fastmap[(a << 3) + b] = 1; + pos += 256/8; + return; + } + case Cexact: + { + fastmap[(unsigned char)code[pos]] = 1; + return; + } + case Canychar: + { + for (a = 0; a < 256; a++) + if (a != '\n') + fastmap[a] = 1; + return; + } + case Cstart_memory: + case Cend_memory: + { + pos++; + break; + } + case Cmatch_memory: + { + for (a = 0; a < 256; a++) + fastmap[a] = 1; + *can_be_null = 1; + return; + } + case Cjump: + case Cdummy_failure_jump: + case Cupdate_failure_jump: + case Cstar_jump: + { + a = (unsigned char)code[pos++]; + a |= (unsigned char)code[pos++] << 8; + pos += (int)SHORT(a); + if (visited[pos]) + { + /* argh... the regexp contains empty loops. This is not + good, as this may cause a failure stack overflow when + matching. Oh well. */ + /* this path leads nowhere; pursue other paths. */ + return; + } + visited[pos] = 1; + break; + } + case Cfailure_jump: + { + a = (unsigned char)code[pos++]; + a |= (unsigned char)code[pos++] << 8; + a = pos + (int)SHORT(a); + re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap); + break; + } + case Crepeat1: + { + pos += 2; + break; + } + default: + { + abort(); /* probably some opcode is missing from this switch */ + /*NOTREACHED*/ + } + } } static int re_do_compile_fastmap(char *buffer, @@ -721,44 +725,44 @@ static int re_do_compile_fastmap(char *buffer, char *can_be_null, char *fastmap) { - char small_visited[512], *visited; + char small_visited[512], *visited; - if (used <= sizeof(small_visited)) - visited = small_visited; - else - { - visited = malloc(used); - if (!visited) - return 0; - } - *can_be_null = 0; - memset(fastmap, 0, 256); - memset(visited, 0, used); - re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap); - if (visited != small_visited) - free(visited); - return 1; + if (used <= sizeof(small_visited)) + visited = small_visited; + else + { + visited = malloc(used); + if (!visited) + return 0; + } + *can_be_null = 0; + memset(fastmap, 0, 256); + memset(visited, 0, used); + re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap); + if (visited != small_visited) + free(visited); + return 1; } void re_compile_fastmap(regexp_t bufp) { - if (!bufp->fastmap || bufp->fastmap_accurate) - return; - assert(bufp->used > 0); - if (!re_do_compile_fastmap(bufp->buffer, - bufp->used, - 0, - &bufp->can_be_null, - bufp->fastmap)) - return; - if (bufp->buffer[0] == Cbol) - bufp->anchor = 1; /* begline */ - else - if (bufp->buffer[0] == Cbegbuf) - bufp->anchor = 2; /* begbuf */ - else - bufp->anchor = 0; /* none */ - bufp->fastmap_accurate = 1; + if (!bufp->fastmap || bufp->fastmap_accurate) + return; + assert(bufp->used > 0); + if (!re_do_compile_fastmap(bufp->buffer, + bufp->used, + 0, + &bufp->can_be_null, + bufp->fastmap)) + return; + if (bufp->buffer[0] == Cbol) + bufp->anchor = 1; /* begline */ + else + if (bufp->buffer[0] == Cbegbuf) + bufp->anchor = 2; /* begbuf */ + else + bufp->anchor = 0; /* none */ + bufp->fastmap_accurate = 1; } /* @@ -786,242 +790,241 @@ void re_compile_fastmap(regexp_t bufp) static int re_optimize_star_jump(regexp_t bufp, char *code) { - char map[256]; - char can_be_null; - char *p1; - char *p2; - char ch; - int a; - int b; - int num_instructions = 0; - - a = (unsigned char)*code++; - a |= (unsigned char)*code++ << 8; - a = (int)SHORT(a); - - p1 = code + a + 3; /* skip the failure_jump */ - assert(p1[-3] == Cfailure_jump); - p2 = code; - /* p1 points inside loop, p2 points to after loop */ - if (!re_do_compile_fastmap(bufp->buffer, bufp->used, - p2 - bufp->buffer, &can_be_null, map)) - goto make_normal_jump; - - /* If we might introduce a new update point inside the - * loop, we can't optimize because then update_jump would - * update a wrong failure point. Thus we have to be - * quite careful here. - */ - - /* loop until we find something that consumes a character */ + char map[256]; + char can_be_null; + char *p1; + char *p2; + char ch; + int a; + int b; + int num_instructions = 0; + + a = (unsigned char)*code++; + a |= (unsigned char)*code++ << 8; + a = (int)SHORT(a); + + p1 = code + a + 3; /* skip the failure_jump */ + assert(p1[-3] == Cfailure_jump); + p2 = code; + /* p1 points inside loop, p2 points to after loop */ + if (!re_do_compile_fastmap(bufp->buffer, bufp->used, + p2 - bufp->buffer, &can_be_null, map)) + goto make_normal_jump; + + /* If we might introduce a new update point inside the + * loop, we can't optimize because then update_jump would + * update a wrong failure point. Thus we have to be + * quite careful here. + */ + + /* loop until we find something that consumes a character */ loop_p1: - num_instructions++; - switch (*p1++) - { - case Cbol: - case Ceol: - case Cbegbuf: - case Cendbuf: - case Cwordbeg: - case Cwordend: - case Cwordbound: - case Cnotwordbound: - { - goto loop_p1; - } - case Cstart_memory: - case Cend_memory: - { - p1++; - goto loop_p1; - } - case Cexact: - { - ch = (unsigned char)*p1++; - if (map[(int)ch]) - goto make_normal_jump; - break; - } - case Canychar: - { - for (b = 0; b < 256; b++) - if (b != '\n' && map[b]) - goto make_normal_jump; - break; - } - case Cset: - { - for (b = 0; b < 256; b++) - if ((p1[b >> 3] & (1 << (b & 7))) && map[b]) - goto make_normal_jump; - p1 += 256/8; - break; - } - default: - { - goto make_normal_jump; - } - } - /* now we know that we can't backtrack. */ - while (p1 != p2 - 3) - { - num_instructions++; - switch (*p1++) - { - case Cend: - { - return 0; - } - case Cbol: - case Ceol: - case Canychar: - case Cbegbuf: - case Cendbuf: - case Cwordbeg: - case Cwordend: - case Cwordbound: - case Cnotwordbound: - { - break; - } - case Cset: - { - p1 += 256/8; - break; - } - case Cexact: - case Cstart_memory: - case Cend_memory: - case Cmatch_memory: - case Csyntaxspec: - case Cnotsyntaxspec: - { - p1++; - break; - } - case Cjump: - case Cstar_jump: - case Cfailure_jump: - case Cupdate_failure_jump: - case Cdummy_failure_jump: - { - goto make_normal_jump; - } - default: - { - return 0; - break; - } - } - } - + num_instructions++; + switch (*p1++) + { + case Cbol: + case Ceol: + case Cbegbuf: + case Cendbuf: + case Cwordbeg: + case Cwordend: + case Cwordbound: + case Cnotwordbound: + { + goto loop_p1; + } + case Cstart_memory: + case Cend_memory: + { + p1++; + goto loop_p1; + } + case Cexact: + { + ch = (unsigned char)*p1++; + if (map[(int)ch]) + goto make_normal_jump; + break; + } + case Canychar: + { + for (b = 0; b < 256; b++) + if (b != '\n' && map[b]) + goto make_normal_jump; + break; + } + case Cset: + { + for (b = 0; b < 256; b++) + if ((p1[b >> 3] & (1 << (b & 7))) && map[b]) + goto make_normal_jump; + p1 += 256/8; + break; + } + default: + { + goto make_normal_jump; + } + } + /* now we know that we can't backtrack. */ + while (p1 != p2 - 3) + { + num_instructions++; + switch (*p1++) + { + case Cend: + { + return 0; + } + case Cbol: + case Ceol: + case Canychar: + case Cbegbuf: + case Cendbuf: + case Cwordbeg: + case Cwordend: + case Cwordbound: + case Cnotwordbound: + { + break; + } + case Cset: + { + p1 += 256/8; + break; + } + case Cexact: + case Cstart_memory: + case Cend_memory: + case Cmatch_memory: + case Csyntaxspec: + case Cnotsyntaxspec: + { + p1++; + break; + } + case Cjump: + case Cstar_jump: + case Cfailure_jump: + case Cupdate_failure_jump: + case Cdummy_failure_jump: + { + goto make_normal_jump; + } + default: + { + return 0; + break; + } + } + } + make_update_jump: - code -= 3; - a += 3; /* jump to after the Cfailure_jump */ - code[0] = Cupdate_failure_jump; - code[1] = a & 0xff; - code[2] = a >> 8; - if (num_instructions > 1) - return 1; - assert(num_instructions == 1); - /* if the only instruction matches a single character, we can do - * better - */ - p1 = code + 3 + a; /* start of sole instruction */ - if (*p1 == Cset || *p1 == Cexact || *p1 == Canychar || - *p1 == Csyntaxspec || *p1 == Cnotsyntaxspec) - code[0] = Crepeat1; - return 1; - + code -= 3; + a += 3; /* jump to after the Cfailure_jump */ + code[0] = Cupdate_failure_jump; + code[1] = a & 0xff; + code[2] = a >> 8; + if (num_instructions > 1) + return 1; + assert(num_instructions == 1); + /* if the only instruction matches a single character, we can do + * better */ + p1 = code + 3 + a; /* start of sole instruction */ + if (*p1 == Cset || *p1 == Cexact || *p1 == Canychar || + *p1 == Csyntaxspec || *p1 == Cnotsyntaxspec) + code[0] = Crepeat1; + return 1; + make_normal_jump: - code -= 3; - *code = Cjump; - return 1; + code -= 3; + *code = Cjump; + return 1; } static int re_optimize(regexp_t bufp) { - char *code; - - code = bufp->buffer; - - while(1) - { - switch (*code++) - { - case Cend: - { - return 1; - } - case Canychar: - case Cbol: - case Ceol: - case Cbegbuf: - case Cendbuf: - case Cwordbeg: - case Cwordend: - case Cwordbound: - case Cnotwordbound: - { - break; - } - case Cset: - { - code += 256/8; - break; - } - case Cexact: - case Cstart_memory: - case Cend_memory: - case Cmatch_memory: - case Csyntaxspec: - case Cnotsyntaxspec: - { - code++; - break; - } - case Cstar_jump: - { - if (!re_optimize_star_jump(bufp, code)) - { - return 0; - } - /* fall through */ - } - case Cupdate_failure_jump: - case Cjump: - case Cdummy_failure_jump: - case Cfailure_jump: - case Crepeat1: - { - code += 2; - break; - } - default: - { - return 0; - } - } - } + char *code; + + code = bufp->buffer; + + while(1) + { + switch (*code++) + { + case Cend: + { + return 1; + } + case Canychar: + case Cbol: + case Ceol: + case Cbegbuf: + case Cendbuf: + case Cwordbeg: + case Cwordend: + case Cwordbound: + case Cnotwordbound: + { + break; + } + case Cset: + { + code += 256/8; + break; + } + case Cexact: + case Cstart_memory: + case Cend_memory: + case Cmatch_memory: + case Csyntaxspec: + case Cnotsyntaxspec: + { + code++; + break; + } + case Cstar_jump: + { + if (!re_optimize_star_jump(bufp, code)) + { + return 0; + } + /* fall through */ + } + case Cupdate_failure_jump: + case Cjump: + case Cdummy_failure_jump: + case Cfailure_jump: + case Crepeat1: + { + code += 2; + break; + } + default: + { + return 0; + } + } + } } #define NEXTCHAR(var) \ { \ - if (pos >= size) \ - goto ends_prematurely; \ - (var) = regex[pos]; \ - pos++; \ + if (pos >= size) \ + goto ends_prematurely; \ + (var) = regex[pos]; \ + pos++; \ } #define ALLOC(amount) \ { \ - if (pattern_offset+(amount) > alloc) \ - { \ - alloc += 256 + (amount); \ - pattern = realloc(pattern, alloc); \ - if (!pattern) \ - goto out_of_memory; \ - } \ + if (pattern_offset+(amount) > alloc) \ + { \ + alloc += 256 + (amount); \ + pattern = realloc(pattern, alloc); \ + if (!pattern) \ + goto out_of_memory; \ + } \ } #define STORE(ch) pattern[pattern_offset++] = (ch) @@ -1031,521 +1034,523 @@ static int re_optimize(regexp_t bufp) #define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset #define PUSH_LEVEL_STARTS \ - if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \ - starts_base += NUM_LEVELS; \ - else \ - goto too_complex +if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \ + starts_base += NUM_LEVELS; \ +else \ + goto too_complex \ #define POP_LEVEL_STARTS starts_base -= NUM_LEVELS #define PUT_ADDR(offset,addr) \ { \ - int disp = (addr) - (offset) - 2; \ - pattern[(offset)] = disp & 0xff; \ - pattern[(offset)+1] = (disp>>8) & 0xff; \ + int disp = (addr) - (offset) - 2; \ + pattern[(offset)] = disp & 0xff; \ + pattern[(offset)+1] = (disp>>8) & 0xff; \ } #define INSERT_JUMP(pos,type,addr) \ { \ - int a, p = (pos), t = (type), ad = (addr); \ - for (a = pattern_offset - 1; a >= p; a--) \ - pattern[a + 3] = pattern[a]; \ - pattern[p] = t; \ - PUT_ADDR(p+1,ad); \ - pattern_offset += 3; \ + int a, p = (pos), t = (type), ad = (addr); \ + for (a = pattern_offset - 1; a >= p; a--) \ + pattern[a + 3] = pattern[a]; \ + pattern[p] = t; \ + PUT_ADDR(p+1,ad); \ + pattern_offset += 3; \ } + #define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7)) #define SET_FIELDS \ { \ - bufp->allocated = alloc; \ - bufp->buffer = pattern; \ - bufp->used = pattern_offset; \ + bufp->allocated = alloc; \ + bufp->buffer = pattern; \ + bufp->used = pattern_offset; \ } #define GETHEX(var) \ { \ - char gethex_ch, gethex_value; \ - NEXTCHAR(gethex_ch); \ - gethex_value = hex_char_to_decimal(gethex_ch); \ - if (gethex_value == 16) \ - goto hex_error; \ - NEXTCHAR(gethex_ch); \ - gethex_ch = hex_char_to_decimal(gethex_ch); \ - if (gethex_ch == 16) \ - goto hex_error; \ - (var) = gethex_value * 16 + gethex_ch; \ + char gethex_ch, gethex_value; \ + NEXTCHAR(gethex_ch); \ + gethex_value = hex_char_to_decimal(gethex_ch); \ + if (gethex_value == 16) \ + goto hex_error; \ + NEXTCHAR(gethex_ch); \ + gethex_ch = hex_char_to_decimal(gethex_ch); \ + if (gethex_ch == 16) \ + goto hex_error; \ + (var) = gethex_value * 16 + gethex_ch; \ } -#define ANSI_TRANSLATE(ch) \ +#define ANSI_TRANSLATE(ch) \ { \ - switch (ch) \ - { \ - case 'a': \ - case 'A': \ - { \ - ch = 7; /* audible bell */ \ - break; \ - } \ - case 'b': \ - case 'B': \ - { \ - ch = 8; /* backspace */ \ - break; \ - } \ - case 'f': \ - case 'F': \ - { \ - ch = 12; /* form feed */ \ - break; \ - } \ - case 'n': \ - case 'N': \ - { \ - ch = 10; /* line feed */ \ - break; \ - } \ - case 'r': \ - case 'R': \ - { \ - ch = 13; /* carriage return */ \ - break; \ - } \ - case 't': \ - case 'T': \ - { \ - ch = 9; /* tab */ \ - break; \ - } \ - case 'v': \ - case 'V': \ - { \ - ch = 11; /* vertical tab */ \ - break; \ - } \ - case 'x': /* hex code */ \ - case 'X': \ - { \ - GETHEX(ch); \ - break; \ - } \ - default: \ - { \ - /* other characters passed through */ \ - if (translate) \ - ch = translate[(unsigned char)ch]; \ - break; \ - } \ - } \ + switch (ch) \ + { \ + case 'a': \ + case 'A': \ + { \ + ch = 7; /* audible bell */ \ + break; \ + } \ + case 'b': \ + case 'B': \ + { \ + ch = 8; /* backspace */ \ + break; \ + } \ + case 'f': \ + case 'F': \ + { \ + ch = 12; /* form feed */ \ + break; \ + } \ + case 'n': \ + case 'N': \ + { \ + ch = 10; /* line feed */ \ + break; \ + } \ + case 'r': \ + case 'R': \ + { \ + ch = 13; /* carriage return */ \ + break; \ + } \ + case 't': \ + case 'T': \ + { \ + ch = 9; /* tab */ \ + break; \ + } \ + case 'v': \ + case 'V': \ + { \ + ch = 11; /* vertical tab */ \ + break; \ + } \ + case 'x': /* hex code */ \ + case 'X': \ + { \ + GETHEX(ch); \ + break; \ + } \ + default: \ + { \ + /* other characters passed through */ \ + if (translate) \ + ch = translate[(unsigned char)ch]; \ + break; \ + } \ + } \ } char *re_compile_pattern(char *regex, int size, regexp_t bufp) { - int a; - int pos; - int op; - int current_level; - int level; - int opcode; - int pattern_offset = 0, alloc; - int starts[NUM_LEVELS * MAX_NESTING]; - int starts_base; - int future_jumps[MAX_NESTING]; - int num_jumps; - unsigned char ch = '\0'; - char *pattern; - char *translate; - int next_register; - int paren_depth; - int num_open_registers; - int open_registers[RE_NREGS]; - int beginning_context; - - if (!re_compile_initialized) - re_compile_initialize(); - bufp->used = 0; - bufp->fastmap_accurate = 0; - bufp->uses_registers = 1; - bufp->num_registers = 1; - translate = bufp->translate; - pattern = bufp->buffer; - alloc = bufp->allocated; - if (alloc == 0 || pattern == NULL) - { - alloc = 256; - pattern = malloc(alloc); - if (!pattern) - goto out_of_memory; - } - pattern_offset = 0; - starts_base = 0; - num_jumps = 0; - current_level = 0; - SET_LEVEL_START; - num_open_registers = 0; - next_register = 1; - paren_depth = 0; - beginning_context = 1; - op = -1; - /* we use Rend dummy to ensure that pending jumps are updated (due to - low priority of Rend) before exiting the loop. */ - pos = 0; - while (op != Rend) - { - if (pos >= size) - op = Rend; - else - { - NEXTCHAR(ch); - if (translate) - ch = translate[(unsigned char)ch]; - op = regexp_plain_ops[(unsigned char)ch]; - if (op == Rquote) - { - NEXTCHAR(ch); - op = regexp_quoted_ops[(unsigned char)ch]; - if (op == Rnormal && regexp_ansi_sequences) - ANSI_TRANSLATE(ch); - } - } - level = regexp_precedences[op]; - /* printf("ch='%c' op=%d level=%d current_level=%d curlevstart=%d\n", - ch, op, level, current_level, CURRENT_LEVEL_START); */ - if (level > current_level) - { - for (current_level++; current_level < level; current_level++) - SET_LEVEL_START; - SET_LEVEL_START; - } - else - if (level < current_level) - { - current_level = level; - for (;num_jumps > 0 && - future_jumps[num_jumps-1] >= CURRENT_LEVEL_START; - num_jumps--) - PUT_ADDR(future_jumps[num_jumps-1], pattern_offset); - } - switch (op) - { - case Rend: - { - break; - } - case Rnormal: - { - normal_char: - opcode = Cexact; - store_opcode_and_arg: /* opcode & ch must be set */ - SET_LEVEL_START; - ALLOC(2); - STORE(opcode); - STORE(ch); - break; - } - case Ranychar: - { - opcode = Canychar; - store_opcode: - SET_LEVEL_START; - ALLOC(1); - STORE(opcode); - break; - } - case Rquote: - { - abort(); - /*NOTREACHED*/ - } - case Rbol: - { - if (!beginning_context) - if (regexp_context_indep_ops) - goto op_error; - else - goto normal_char; - opcode = Cbol; - goto store_opcode; - } - case Reol: - { - if (!((pos >= size) || - ((regexp_syntax & RE_NO_BK_VBAR) ? - (regex[pos] == '\174') : - (pos+1 < size && regex[pos] == '\134' && - regex[pos+1] == '\174')) || - ((regexp_syntax & RE_NO_BK_PARENS)? - (regex[pos] == ')'): - (pos+1 < size && regex[pos] == '\134' && - regex[pos+1] == ')')))) - if (regexp_context_indep_ops) - goto op_error; - else - goto normal_char; - opcode = Ceol; - goto store_opcode; - /* NOTREACHED */ - break; - } - case Roptional: - { - if (beginning_context) - if (regexp_context_indep_ops) - goto op_error; - else - goto normal_char; - if (CURRENT_LEVEL_START == pattern_offset) - break; /* ignore empty patterns for ? */ - ALLOC(3); - INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, - pattern_offset + 3); - break; - } - case Rstar: - case Rplus: - { - if (beginning_context) - if (regexp_context_indep_ops) - goto op_error; - else - goto normal_char; - if (CURRENT_LEVEL_START == pattern_offset) - break; /* ignore empty patterns for + and * */ - ALLOC(9); - INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, - pattern_offset + 6); - INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START); - if (op == Rplus) /* jump over initial failure_jump */ - INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump, - CURRENT_LEVEL_START + 6); - break; - } - case Ror: - { - ALLOC(6); - INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, - pattern_offset + 6); - if (num_jumps >= MAX_NESTING) - goto too_complex; - STORE(Cjump); - future_jumps[num_jumps++] = pattern_offset; - STORE(0); - STORE(0); - SET_LEVEL_START; - break; - } - case Ropenpar: - { - SET_LEVEL_START; - if (next_register < RE_NREGS) - { - bufp->uses_registers = 1; - ALLOC(2); - STORE(Cstart_memory); - STORE(next_register); - open_registers[num_open_registers++] = next_register; - bufp->num_registers++; - next_register++; - } - paren_depth++; - PUSH_LEVEL_STARTS; - current_level = 0; - SET_LEVEL_START; - break; - } - case Rclosepar: - { - if (paren_depth <= 0) - goto parenthesis_error; - POP_LEVEL_STARTS; - current_level = regexp_precedences[Ropenpar]; - paren_depth--; - if (paren_depth < num_open_registers) - { - bufp->uses_registers = 1; - ALLOC(2); - STORE(Cend_memory); - num_open_registers--; - STORE(open_registers[num_open_registers]); - } - break; - } - case Rmemory: - { - if (ch == '0') - goto bad_match_register; - assert(ch >= '0' && ch <= '9'); - bufp->uses_registers = 1; - opcode = Cmatch_memory; - ch -= '0'; - goto store_opcode_and_arg; - } - case Rextended_memory: - { - NEXTCHAR(ch); - if (ch < '0' || ch > '9') - goto bad_match_register; - NEXTCHAR(a); - if (a < '0' || a > '9') - goto bad_match_register; - ch = 10 * (a - '0') + ch - '0'; - if (ch <= 0 || ch >= RE_NREGS) - goto bad_match_register; - bufp->uses_registers = 1; - opcode = Cmatch_memory; - goto store_opcode_and_arg; - } - case Ropenset: - { - int complement; - int prev; - int offset; - int range; - int firstchar; + int a; + int pos; + int op; + int current_level; + int level; + int opcode; + int pattern_offset = 0, alloc; + int starts[NUM_LEVELS * MAX_NESTING]; + int starts_base; + int future_jumps[MAX_NESTING]; + int num_jumps; + unsigned char ch = '\0'; + char *pattern; + char *translate; + int next_register; + int paren_depth; + int num_open_registers; + int open_registers[RE_NREGS]; + int beginning_context; + + if (!re_compile_initialized) + re_compile_initialize(); + bufp->used = 0; + bufp->fastmap_accurate = 0; + bufp->uses_registers = 1; + bufp->num_registers = 1; + translate = bufp->translate; + pattern = bufp->buffer; + alloc = bufp->allocated; + if (alloc == 0 || pattern == NULL) + { + alloc = 256; + pattern = malloc(alloc); + if (!pattern) + goto out_of_memory; + } + pattern_offset = 0; + starts_base = 0; + num_jumps = 0; + current_level = 0; + SET_LEVEL_START; + num_open_registers = 0; + next_register = 1; + paren_depth = 0; + beginning_context = 1; + op = -1; + /* we use Rend dummy to ensure that pending jumps are updated + (due to low priority of Rend) before exiting the loop. */ + pos = 0; + while (op != Rend) + { + if (pos >= size) + op = Rend; + else + { + NEXTCHAR(ch); + if (translate) + ch = translate[(unsigned char)ch]; + op = regexp_plain_ops[(unsigned char)ch]; + if (op == Rquote) + { + NEXTCHAR(ch); + op = regexp_quoted_ops[(unsigned char)ch]; + if (op == Rnormal && regexp_ansi_sequences) + ANSI_TRANSLATE(ch); + } + } + level = regexp_precedences[op]; + /* printf("ch='%c' op=%d level=%d current_level=%d + curlevstart=%d\n", ch, op, level, current_level, + CURRENT_LEVEL_START); */ + if (level > current_level) + { + for (current_level++; current_level < level; current_level++) + SET_LEVEL_START; + SET_LEVEL_START; + } + else + if (level < current_level) + { + current_level = level; + for (;num_jumps > 0 && + future_jumps[num_jumps-1] >= CURRENT_LEVEL_START; + num_jumps--) + PUT_ADDR(future_jumps[num_jumps-1], pattern_offset); + } + switch (op) + { + case Rend: + { + break; + } + case Rnormal: + { + normal_char: + opcode = Cexact; + store_opcode_and_arg: /* opcode & ch must be set */ + SET_LEVEL_START; + ALLOC(2); + STORE(opcode); + STORE(ch); + break; + } + case Ranychar: + { + opcode = Canychar; + store_opcode: + SET_LEVEL_START; + ALLOC(1); + STORE(opcode); + break; + } + case Rquote: + { + abort(); + /*NOTREACHED*/ + } + case Rbol: + { + if (!beginning_context) + if (regexp_context_indep_ops) + goto op_error; + else + goto normal_char; + opcode = Cbol; + goto store_opcode; + } + case Reol: + { + if (!((pos >= size) || + ((regexp_syntax & RE_NO_BK_VBAR) ? + (regex[pos] == '\174') : + (pos+1 < size && regex[pos] == '\134' && + regex[pos+1] == '\174')) || + ((regexp_syntax & RE_NO_BK_PARENS)? + (regex[pos] == ')'): + (pos+1 < size && regex[pos] == '\134' && + regex[pos+1] == ')')))) + if (regexp_context_indep_ops) + goto op_error; + else + goto normal_char; + opcode = Ceol; + goto store_opcode; + /* NOTREACHED */ + break; + } + case Roptional: + { + if (beginning_context) + if (regexp_context_indep_ops) + goto op_error; + else + goto normal_char; + if (CURRENT_LEVEL_START == pattern_offset) + break; /* ignore empty patterns for ? */ + ALLOC(3); + INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, + pattern_offset + 3); + break; + } + case Rstar: + case Rplus: + { + if (beginning_context) + if (regexp_context_indep_ops) + goto op_error; + else + goto normal_char; + if (CURRENT_LEVEL_START == pattern_offset) + break; /* ignore empty patterns for + and * */ + ALLOC(9); + INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, + pattern_offset + 6); + INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START); + if (op == Rplus) /* jump over initial failure_jump */ + INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump, + CURRENT_LEVEL_START + 6); + break; + } + case Ror: + { + ALLOC(6); + INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump, + pattern_offset + 6); + if (num_jumps >= MAX_NESTING) + goto too_complex; + STORE(Cjump); + future_jumps[num_jumps++] = pattern_offset; + STORE(0); + STORE(0); + SET_LEVEL_START; + break; + } + case Ropenpar: + { + SET_LEVEL_START; + if (next_register < RE_NREGS) + { + bufp->uses_registers = 1; + ALLOC(2); + STORE(Cstart_memory); + STORE(next_register); + open_registers[num_open_registers++] = next_register; + bufp->num_registers++; + next_register++; + } + paren_depth++; + PUSH_LEVEL_STARTS; + current_level = 0; + SET_LEVEL_START; + break; + } + case Rclosepar: + { + if (paren_depth <= 0) + goto parenthesis_error; + POP_LEVEL_STARTS; + current_level = regexp_precedences[Ropenpar]; + paren_depth--; + if (paren_depth < num_open_registers) + { + bufp->uses_registers = 1; + ALLOC(2); + STORE(Cend_memory); + num_open_registers--; + STORE(open_registers[num_open_registers]); + } + break; + } + case Rmemory: + { + if (ch == '0') + goto bad_match_register; + assert(ch >= '0' && ch <= '9'); + bufp->uses_registers = 1; + opcode = Cmatch_memory; + ch -= '0'; + goto store_opcode_and_arg; + } + case Rextended_memory: + { + NEXTCHAR(ch); + if (ch < '0' || ch > '9') + goto bad_match_register; + NEXTCHAR(a); + if (a < '0' || a > '9') + goto bad_match_register; + ch = 10 * (a - '0') + ch - '0'; + if (ch <= 0 || ch >= RE_NREGS) + goto bad_match_register; + bufp->uses_registers = 1; + opcode = Cmatch_memory; + goto store_opcode_and_arg; + } + case Ropenset: + { + int complement; + int prev; + int offset; + int range; + int firstchar; - SET_LEVEL_START; - ALLOC(1+256/8); - STORE(Cset); - offset = pattern_offset; - for (a = 0; a < 256/8; a++) - STORE(0); - NEXTCHAR(ch); - if (translate) - ch = translate[(unsigned char)ch]; - if (ch == '\136') - { - complement = 1; - NEXTCHAR(ch); - if (translate) - ch = translate[(unsigned char)ch]; - } - else - complement = 0; - prev = -1; - range = 0; - firstchar = 1; - while (ch != '\135' || firstchar) - { - firstchar = 0; - if (regexp_ansi_sequences && ch == '\134') - { - NEXTCHAR(ch); - ANSI_TRANSLATE(ch); - } - if (range) - { - for (a = prev; a <= (int)ch; a++) - SETBIT(pattern, offset, a); - prev = -1; - range = 0; - } - else - if (prev != -1 && ch == '-') - range = 1; - else - { - SETBIT(pattern, offset, ch); - prev = ch; - } - NEXTCHAR(ch); - if (translate) - ch = translate[(unsigned char)ch]; - } - if (range) - SETBIT(pattern, offset, '-'); - if (complement) - { - for (a = 0; a < 256/8; a++) - pattern[offset+a] ^= 0xff; - } - break; - } - case Rbegbuf: - { - opcode = Cbegbuf; - goto store_opcode; - } - case Rendbuf: - { - opcode = Cendbuf; - goto store_opcode; - } - case Rwordchar: - { - opcode = Csyntaxspec; - ch = Sword; - goto store_opcode_and_arg; - } - case Rnotwordchar: - { - opcode = Cnotsyntaxspec; - ch = Sword; - goto store_opcode_and_arg; - } - case Rwordbeg: - { - opcode = Cwordbeg; - goto store_opcode; - } - case Rwordend: - { - opcode = Cwordend; - goto store_opcode; - } - case Rwordbound: - { - opcode = Cwordbound; - goto store_opcode; - } - case Rnotwordbound: - { - opcode = Cnotwordbound; - goto store_opcode; - } - default: - { - abort(); - } - } - beginning_context = (op == Ropenpar || op == Ror); - } - if (starts_base != 0) - goto parenthesis_error; - assert(num_jumps == 0); - ALLOC(1); - STORE(Cend); - SET_FIELDS; - if(!re_optimize(bufp)) - return "Optimization error"; - return NULL; + SET_LEVEL_START; + ALLOC(1+256/8); + STORE(Cset); + offset = pattern_offset; + for (a = 0; a < 256/8; a++) + STORE(0); + NEXTCHAR(ch); + if (translate) + ch = translate[(unsigned char)ch]; + if (ch == '\136') + { + complement = 1; + NEXTCHAR(ch); + if (translate) + ch = translate[(unsigned char)ch]; + } + else + complement = 0; + prev = -1; + range = 0; + firstchar = 1; + while (ch != '\135' || firstchar) + { + firstchar = 0; + if (regexp_ansi_sequences && ch == '\134') + { + NEXTCHAR(ch); + ANSI_TRANSLATE(ch); + } + if (range) + { + for (a = prev; a <= (int)ch; a++) + SETBIT(pattern, offset, a); + prev = -1; + range = 0; + } + else + if (prev != -1 && ch == '-') + range = 1; + else + { + SETBIT(pattern, offset, ch); + prev = ch; + } + NEXTCHAR(ch); + if (translate) + ch = translate[(unsigned char)ch]; + } + if (range) + SETBIT(pattern, offset, '-'); + if (complement) + { + for (a = 0; a < 256/8; a++) + pattern[offset+a] ^= 0xff; + } + break; + } + case Rbegbuf: + { + opcode = Cbegbuf; + goto store_opcode; + } + case Rendbuf: + { + opcode = Cendbuf; + goto store_opcode; + } + case Rwordchar: + { + opcode = Csyntaxspec; + ch = Sword; + goto store_opcode_and_arg; + } + case Rnotwordchar: + { + opcode = Cnotsyntaxspec; + ch = Sword; + goto store_opcode_and_arg; + } + case Rwordbeg: + { + opcode = Cwordbeg; + goto store_opcode; + } + case Rwordend: + { + opcode = Cwordend; + goto store_opcode; + } + case Rwordbound: + { + opcode = Cwordbound; + goto store_opcode; + } + case Rnotwordbound: + { + opcode = Cnotwordbound; + goto store_opcode; + } + default: + { + abort(); + } + } + beginning_context = (op == Ropenpar || op == Ror); + } + if (starts_base != 0) + goto parenthesis_error; + assert(num_jumps == 0); + ALLOC(1); + STORE(Cend); + SET_FIELDS; + if(!re_optimize(bufp)) + return "Optimization error"; + return NULL; op_error: - SET_FIELDS; - return "Badly placed special character"; + SET_FIELDS; + return "Badly placed special character"; bad_match_register: - SET_FIELDS; - return "Bad match register number"; + SET_FIELDS; + return "Bad match register number"; hex_error: - SET_FIELDS; - return "Bad hexadecimal number"; + SET_FIELDS; + return "Bad hexadecimal number"; parenthesis_error: - SET_FIELDS; - return "Badly placed parenthesis"; + SET_FIELDS; + return "Badly placed parenthesis"; out_of_memory: - SET_FIELDS; - return "Out of memory"; + SET_FIELDS; + return "Out of memory"; ends_prematurely: - SET_FIELDS; - return "Regular expression ends prematurely"; + SET_FIELDS; + return "Regular expression ends prematurely"; too_complex: - SET_FIELDS; - return "Regular expression too complex"; + SET_FIELDS; + return "Regular expression too complex"; } #undef CHARAT @@ -1568,7 +1573,7 @@ char *re_compile_pattern(char *regex, int size, regexp_t bufp) PREFETCH; \ var = (unsigned char)*text++; \ if (translate) \ - var = translate[var] + var = translate[var] int re_match(regexp_t bufp, char *string, @@ -1576,406 +1581,392 @@ int re_match(regexp_t bufp, int pos, regexp_registers_t old_regs) { - char *code; - char *translate; - char *text; - char *textstart; - char *textend; - int a; - int b; - int ch; - int reg; - int match_end; - char *regstart; - char *regend; - int regsize; - match_state state; + char *code; + char *translate; + char *text; + char *textstart; + char *textend; + int a; + int b; + int ch; + int reg; + int match_end; + char *regstart; + char *regend; + int regsize; + match_state state; - assert(pos >= 0 && size >= 0); - assert(pos <= size); + assert(pos >= 0 && size >= 0); + assert(pos <= size); - text = string + pos; - textstart = string; - textend = string + size; + text = string + pos; + textstart = string; + textend = string + size; - code = bufp->buffer; + code = bufp->buffer; - translate = bufp->translate; + translate = bufp->translate; - NEW_STATE(state, bufp->num_registers); + NEW_STATE(state, bufp->num_registers); + + if (!re_compile_initialized) + re_compile_initialize(); continue_matching: - switch (*code++) - { - case Cend: - { - match_end = text - textstart; - if (old_regs) + switch (*code++) + { + case Cend: { - old_regs->start[0] = pos; - old_regs->end[0] = match_end; - if (!bufp->uses_registers) - { - for (a = 1; a < RE_NREGS; a++) - { - old_regs->start[a] = -1; - old_regs->end[a] = -1; - } - } - else - { - for (a = 1; a < bufp->num_registers; a++) - { - if ((GET_REG_START(state, a) == NULL) || - (GET_REG_END(state, a) == NULL)) - { - old_regs->start[a] = -1; - old_regs->end[a] = -1; - continue; - } - old_regs->start[a] = GET_REG_START(state, a) - textstart; - old_regs->end[a] = GET_REG_END(state, a) - textstart; - } - for (; a < RE_NREGS; a++) - { - old_regs->start[a] = -1; - old_regs->end[a] = -1; - } - } + match_end = text - textstart; + if (old_regs) + { + old_regs->start[0] = pos; + old_regs->end[0] = match_end; + if (!bufp->uses_registers) + { + for (a = 1; a < RE_NREGS; a++) + { + old_regs->start[a] = -1; + old_regs->end[a] = -1; + } + } + else + { + for (a = 1; a < bufp->num_registers; a++) + { + if ((GET_REG_START(state, a) == NULL) || + (GET_REG_END(state, a) == NULL)) + { + old_regs->start[a] = -1; + old_regs->end[a] = -1; + continue; + } + old_regs->start[a] = GET_REG_START(state, a) - textstart; + old_regs->end[a] = GET_REG_END(state, a) - textstart; + } + for (; a < RE_NREGS; a++) + { + old_regs->start[a] = -1; + old_regs->end[a] = -1; + } + } + } + FREE_STATE(state); + return match_end - pos; } - FREE_STATE(state); - return match_end - pos; - } - case Cbol: - { - if (text == textstart || text[-1] == '\n') - goto continue_matching; - goto fail; - } - case Ceol: - { - if (text == textend || *text == '\n') - goto continue_matching; - goto fail; - } - case Cset: - { - NEXTCHAR(ch); - if (code[ch/8] & (1<<(ch & 7))) + case Cbol: { - code += 256/8; - goto continue_matching; + if (text == textstart || text[-1] == '\n') + goto continue_matching; + goto fail; } - goto fail; - } - case Cexact: - { - NEXTCHAR(ch); - if (ch != (unsigned char)*code++) - goto fail; -/* { */ -/* char *p1 = code - 2; */ -/* ch = *(code - 1); */ -/* POP_FAILURE(state, code, text, goto done_matching, goto error); */ -/* while ((code == p1) && (*text != ch)) */ -/* POP_FAILURE(state, code, text, goto done_matching, goto error); */ -/* if ((code == p1) && (*text == ch)) */ -/* { */ -/* code += 2; */ -/* text++; */ -/* } */ -/* } */ - goto continue_matching; - } - case Canychar: - { - NEXTCHAR(ch); - if (ch == '\n') - goto fail; - goto continue_matching; - } - case Cstart_memory: - { - reg = *code++; - SET_REG_START(state, reg, text, goto error); - goto continue_matching; - } - case Cend_memory: - { - reg = *code++; - SET_REG_END(state, reg, text, goto error); - goto continue_matching; - } - case Cmatch_memory: - { - reg = *code++; - regstart = GET_REG_START(state, reg); - regend = GET_REG_END(state, reg); - if ((regstart == NULL) || (regend == NULL)) - goto fail; /* or should we just match nothing? */ - regsize = regend - regstart; - - if (regsize > (textend - text)) - goto fail; - if(translate) + case Ceol: { - for (; regstart < regend; regstart++, text++) - if (translate[*regstart] != translate[*text]) - goto fail; + if (text == textend || *text == '\n') + goto continue_matching; + goto fail; } - else - for (; regstart < regend; regstart++, text++) - if (*regstart != *text) - goto fail; -/* if (memcmp(text, regstart, regsize) != 0) - goto fail; - text += regsize; */ - goto continue_matching; - } - case Cupdate_failure_jump: - { - UPDATE_FAILURE(state, text, goto error); - /* fall to next case */ - } - /* treat Cstar_jump just like Cjump if it hasn't been optimized */ - case Cstar_jump: - case Cjump: - { - a = (unsigned char)*code++; - a |= (unsigned char)*code++ << 8; - code += (int)SHORT(a); - goto continue_matching; - } - case Cdummy_failure_jump: - { - a = (unsigned char)*code++; - a |= (unsigned char)*code++ << 8; - a = (int)SHORT(a); - assert(*code == Cfailure_jump); - b = (unsigned char)code[1]; - b |= (unsigned char)code[2] << 8; - PUSH_FAILURE(state, code + (int)SHORT(b) + 3, NULL, goto error); - code += a; - goto continue_matching; - } - case Cfailure_jump: - { - a = (unsigned char)*code++; - a |= (unsigned char)*code++ << 8; - a = (int)SHORT(a); - PUSH_FAILURE(state, code + a, text, goto error); - goto continue_matching; - } - case Crepeat1: - { - char *pinst; - a = (unsigned char)*code++; - a |= (unsigned char)*code++ << 8; - a = (int)SHORT(a); - pinst = code + a; - /* pinst is sole instruction in loop, and it matches a - * single character. Since Crepeat1 was originally a - * Cupdate_failure_jump, we also know that backtracking is - * useless: so long as the single-character expression matches, - * it must be used. Also, in the case of +, we've already - * matched one character, so + can't fail: nothing here can - * cause a failure. - */ - switch (*pinst++) + case Cset: { - case Cset: - { - if (translate) - { - while (text < textend) - { - ch = translate[(unsigned char)*text]; - if (pinst[ch/8] & (1<<(ch & 7))) - text++; - else - break; - } - } - else - { - while (text < textend) - { - ch = (unsigned char)*text; - if (pinst[ch/8] & (1<<(ch & 7))) - text++; - else - break; - } - } - break; - } - case Cexact: - { - ch = (unsigned char)*pinst; - if (translate) - { - while (text < textend && - translate[(unsigned char)*text] == ch) - text++; - } - else - { - while (text < textend && (unsigned char)*text == ch) - text++; - } - break; - } - case Canychar: - { - while (text < textend && (unsigned char)*text != '\n') - text++; - break; - } - case Csyntaxspec: - { - a = (unsigned char)*pinst; - if (translate) - { - while (text < textend && - translate[SYNTAX(*text)] == a) - text++; - } - else - { - while (text < textend && SYNTAX(*text) == a) - text++; - } - break; - } - case Cnotsyntaxspec: - { - a = (unsigned char)*pinst; - if (translate) - { - while (text < textend && - translate[SYNTAX(*text)] != a) - text++; - } - else - { - while (text < textend && SYNTAX(*text) != a) - text++; - } - break; - } - default: - { - abort(); - /*NOTREACHED*/ - } + NEXTCHAR(ch); + if (code[ch/8] & (1<<(ch & 7))) + { + code += 256/8; + goto continue_matching; + } + goto fail; + } + case Cexact: + { + NEXTCHAR(ch); + if (ch != (unsigned char)*code++) + goto fail; + goto continue_matching; + } + case Canychar: + { + NEXTCHAR(ch); + if (ch == '\n') + goto fail; + goto continue_matching; + } + case Cstart_memory: + { + reg = *code++; + SET_REG_START(state, reg, text, goto error); + goto continue_matching; + } + case Cend_memory: + { + reg = *code++; + SET_REG_END(state, reg, text, goto error); + goto continue_matching; + } + case Cmatch_memory: + { + reg = *code++; + regstart = GET_REG_START(state, reg); + regend = GET_REG_END(state, reg); + if ((regstart == NULL) || (regend == NULL)) + goto fail; /* or should we just match nothing? */ + regsize = regend - regstart; + + if (regsize > (textend - text)) + goto fail; + if(translate) + { + for (; regstart < regend; regstart++, text++) + if (translate[*regstart] != translate[*text]) + goto fail; + } + else + for (; regstart < regend; regstart++, text++) + if (*regstart != *text) + goto fail; + goto continue_matching; + } + case Cupdate_failure_jump: + { + UPDATE_FAILURE(state, text, goto error); + /* fall to next case */ + } + /* treat Cstar_jump just like Cjump if it hasn't been optimized */ + case Cstar_jump: + case Cjump: + { + a = (unsigned char)*code++; + a |= (unsigned char)*code++ << 8; + code += (int)SHORT(a); + goto continue_matching; + } + case Cdummy_failure_jump: + { + a = (unsigned char)*code++; + a |= (unsigned char)*code++ << 8; + a = (int)SHORT(a); + assert(*code == Cfailure_jump); + b = (unsigned char)code[1]; + b |= (unsigned char)code[2] << 8; + PUSH_FAILURE(state, code + (int)SHORT(b) + 3, NULL, goto error); + code += a; + goto continue_matching; + } + case Cfailure_jump: + { + a = (unsigned char)*code++; + a |= (unsigned char)*code++ << 8; + a = (int)SHORT(a); + PUSH_FAILURE(state, code + a, text, goto error); + goto continue_matching; + } + case Crepeat1: + { + char *pinst; + a = (unsigned char)*code++; + a |= (unsigned char)*code++ << 8; + a = (int)SHORT(a); + pinst = code + a; + /* pinst is sole instruction in loop, and it matches a + * single character. Since Crepeat1 was originally a + * Cupdate_failure_jump, we also know that backtracking + * is useless: so long as the single-character + * expression matches, it must be used. Also, in the + * case of +, we've already matched one character, so + + * can't fail: nothing here can cause a failure. */ + switch (*pinst++) + { + case Cset: + { + if (translate) + { + while (text < textend) + { + ch = translate[(unsigned char)*text]; + if (pinst[ch/8] & (1<<(ch & 7))) + text++; + else + break; + } + } + else + { + while (text < textend) + { + ch = (unsigned char)*text; + if (pinst[ch/8] & (1<<(ch & 7))) + text++; + else + break; + } + } + break; + } + case Cexact: + { + ch = (unsigned char)*pinst; + if (translate) + { + while (text < textend && + translate[(unsigned char)*text] == ch) + text++; + } + else + { + while (text < textend && (unsigned char)*text == ch) + text++; + } + break; + } + case Canychar: + { + while (text < textend && (unsigned char)*text != '\n') + text++; + break; + } + case Csyntaxspec: + { + a = (unsigned char)*pinst; + if (translate) + { + while (text < textend && + translate[SYNTAX(*text)] == a) + text++; + } + else + { + while (text < textend && SYNTAX(*text) == a) + text++; + } + break; + } + case Cnotsyntaxspec: + { + a = (unsigned char)*pinst; + if (translate) + { + while (text < textend && + translate[SYNTAX(*text)] != a) + text++; + } + else + { + while (text < textend && SYNTAX(*text) != a) + text++; + } + break; + } + default: + { + abort(); + /*NOTREACHED*/ + } + } + /* due to the funky way + and * are compiled, the top + * failure- stack entry at this point is actually a + * success entry -- update it & pop it */ + UPDATE_FAILURE(state, text, goto error); + goto fail; /* i.e., succeed <wink/sigh> */ + } + case Cbegbuf: + { + if (text == textstart) + goto continue_matching; + goto fail; + } + case Cendbuf: + { + if (text == textend) + goto continue_matching; + goto fail; + } + case Cwordbeg: + { + if (text == textend) + goto fail; + if (SYNTAX(*text) != Sword) + goto fail; + if (text == textstart) + goto continue_matching; + if (SYNTAX(text[-1]) != Sword) + goto continue_matching; + goto fail; + } + case Cwordend: + { + if (text == textstart) + goto fail; + if (SYNTAX(text[-1]) != Sword) + goto fail; + if (text == textend) + goto continue_matching; + if (SYNTAX(*text) == Sword) + goto fail; + goto continue_matching; + } + case Cwordbound: + { + /* Note: as in gnu regexp, this also matches at the + * beginning and end of buffer. */ + + if (text == textstart || text == textend) + goto continue_matching; + if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)) + goto continue_matching; + goto fail; + } + case Cnotwordbound: + { + /* Note: as in gnu regexp, this never matches at the + * beginning and end of buffer. */ + if (text == textstart || text == textend) + goto fail; + if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))) + goto fail; + goto continue_matching; + } + case Csyntaxspec: + { + NEXTCHAR(ch); + if (SYNTAX(ch) != (unsigned char)*code++) + goto fail; + goto continue_matching; + } + case Cnotsyntaxspec: + { + NEXTCHAR(ch); + if (SYNTAX(ch) != (unsigned char)*code++) + break; + goto continue_matching; + } + default: + { + abort(); + /*NOTREACHED*/ + } } - /* due to the funky way + and * are compiled, the top failure- - * stack entry at this point is actually a success entry -- - * update it & pop it - */ - UPDATE_FAILURE(state, text, goto error); - goto fail; /* i.e., succeed <wink/sigh> */ - } - case Cbegbuf: - { - if (text == textstart) - goto continue_matching; - goto fail; - } - case Cendbuf: - { - if (text == textend) - goto continue_matching; - goto fail; - } - case Cwordbeg: - { - if (text == textend) - goto fail; - if (SYNTAX(*text) != Sword) - goto fail; - if (text == textstart) - goto continue_matching; - if (SYNTAX(text[-1]) != Sword) - goto continue_matching; - goto fail; - } - case Cwordend: - { - if (text == textstart) - goto fail; - if (SYNTAX(text[-1]) != Sword) - goto fail; - if (text == textend) - goto continue_matching; - if (SYNTAX(*text) == Sword) - goto fail; - goto continue_matching; - } - case Cwordbound: - { - /* Note: as in gnu regexp, this also matches at the beginning - * and end of buffer. */ - - if (text == textstart || text == textend) - goto continue_matching; - if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)) - goto continue_matching; - goto fail; - } - case Cnotwordbound: - { - /* Note: as in gnu regexp, this never matches at the beginning - * and end of buffer. */ - if (text == textstart || text == textend) - goto fail; - if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))) - goto fail; - goto continue_matching; - } - case Csyntaxspec: - { - NEXTCHAR(ch); - if (SYNTAX(ch) != (unsigned char)*code++) - goto fail; - goto continue_matching; - } - case Cnotsyntaxspec: - { - NEXTCHAR(ch); - if (SYNTAX(ch) != (unsigned char)*code++) - break; - goto continue_matching; - } - default: - { - abort(); - /*NOTREACHED*/ - } - } #if 0 /* This line is never reached --Guido */ - abort(); + abort(); #endif - /* - *NOTREACHED - */ + /* + *NOTREACHED + */ fail: - POP_FAILURE(state, code, text, goto done_matching, goto error); - goto continue_matching; + POP_FAILURE(state, code, text, goto done_matching, goto error); + goto continue_matching; done_matching: /* if(translated != NULL) */ /* free(translated); */ - FREE_STATE(state); - return -1; + FREE_STATE(state); + return -1; error: /* if (translated != NULL) */ /* free(translated); */ - FREE_STATE(state); - return -2; + FREE_STATE(state); + return -2; } #undef PREFETCH @@ -1988,91 +1979,91 @@ int re_search(regexp_t bufp, int range, regexp_registers_t regs) { - char *fastmap; - char *translate; - char *text; - char *partstart; - char *partend; - int dir; - int ret; - char anchor; + char *fastmap; + char *translate; + char *text; + char *partstart; + char *partend; + int dir; + int ret; + char anchor; - assert(size >= 0 && pos >= 0); - assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */ + assert(size >= 0 && pos >= 0); + assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */ - fastmap = bufp->fastmap; - translate = bufp->translate; - if (fastmap && !bufp->fastmap_accurate) - re_compile_fastmap(bufp); - anchor = bufp->anchor; - if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */ - fastmap = NULL; - - if (range < 0) - { - dir = -1; - range = -range; - } - else - dir = 1; - - if (anchor == 2) - if (pos != 0) - return -1; - else - range = 0; - - for (; range >= 0; range--, pos += dir) - { - if (fastmap) - { - if (dir == 1) - { /* searching forwards */ - - text = string + pos; - partend = string + size; - partstart = text; - if (translate) - while (text != partend && - !fastmap[(unsigned char) translate[(unsigned char)*text]]) - text++; - else - while (text != partend && !fastmap[(unsigned char)*text]) - text++; - pos += text - partstart; - range -= text - partstart; - if (pos == size && bufp->can_be_null == 0) - return -1; + fastmap = bufp->fastmap; + translate = bufp->translate; + if (fastmap && !bufp->fastmap_accurate) + re_compile_fastmap(bufp); + anchor = bufp->anchor; + if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */ + fastmap = NULL; + + if (range < 0) + { + dir = -1; + range = -range; } else - { /* searching backwards */ - text = string + pos; - partstart = string + pos - range; - partend = text; - if (translate) - while (text != partstart && - !fastmap[(unsigned char) - translate[(unsigned char)*text]]) - text--; - else - while (text != partstart && - !fastmap[(unsigned char)*text]) - text--; - pos -= partend - text; - range -= partend - text; + dir = 1; + + if (anchor == 2) + if (pos != 0) + return -1; + else + range = 0; + + for (; range >= 0; range--, pos += dir) + { + if (fastmap) + { + if (dir == 1) + { /* searching forwards */ + + text = string + pos; + partend = string + size; + partstart = text; + if (translate) + while (text != partend && + !fastmap[(unsigned char) translate[(unsigned char)*text]]) + text++; + else + while (text != partend && !fastmap[(unsigned char)*text]) + text++; + pos += text - partstart; + range -= text - partstart; + if (pos == size && bufp->can_be_null == 0) + return -1; + } + else + { /* searching backwards */ + text = string + pos; + partstart = string + pos - range; + partend = text; + if (translate) + while (text != partstart && + !fastmap[(unsigned char) + translate[(unsigned char)*text]]) + text--; + else + while (text != partstart && + !fastmap[(unsigned char)*text]) + text--; + pos -= partend - text; + range -= partend - text; + } + } + if (anchor == 1) + { /* anchored to begline */ + if (pos > 0 && (string[pos - 1] != '\n')) + continue; + } + assert(pos >= 0 && pos <= size); + ret = re_match(bufp, string, size, pos, regs); + if (ret >= 0) + return pos; + if (ret == -2) + return -2; } - } - if (anchor == 1) - { /* anchored to begline */ - if (pos > 0 && (string[pos - 1] != '\n')) - continue; - } - assert(pos >= 0 && pos <= size); - ret = re_match(bufp, string, size, pos, regs); - if (ret >= 0) - return pos; - if (ret == -2) - return -2; - } - return -1; + return -1; } |