From dda6696be67bedf4d5547e8bf597cb7cd4769c69 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 7 May 1998 15:32:44 +0000 Subject: AMK's revised version of the previous patch. --- Modules/pcre-int.h | 7 ++++--- Modules/pcre.h | 1 + Modules/pypcre.c | 32 ++++++++++++++++++++++++-------- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/Modules/pcre-int.h b/Modules/pcre-int.h index 07aeb84..ceae8eb 100644 --- a/Modules/pcre-int.h +++ b/Modules/pcre-int.h @@ -3,7 +3,7 @@ *************************************************/ -#define PCRE_VERSION "1.07 16-Feb-1998" +#define PCRE_VERSION "1.09 28-Apr-1998" /* This is a library of functions to support regular expressions whose syntax @@ -80,11 +80,12 @@ only some permitted at run or study time. */ #ifdef FOR_PYTHON #define PUBLIC_OPTIONS \ (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ - PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_LOCALE) + PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY| \ + PCRE_LOCALE) #else #define PUBLIC_OPTIONS \ (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ - PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA) + PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY) #endif #define PUBLIC_EXEC_OPTIONS \ (PCRE_CASELESS|PCRE_ANCHORED|PCRE_MULTILINE|PCRE_NOTBOL|PCRE_NOTEOL| \ diff --git a/Modules/pcre.h b/Modules/pcre.h index 06768a9..347ca72 100644 --- a/Modules/pcre.h +++ b/Modules/pcre.h @@ -34,6 +34,7 @@ extern "C" { #define PCRE_EXTRA 0x0040 #define PCRE_NOTBOL 0x0080 #define PCRE_NOTEOL 0x0100 +#define PCRE_UNGREEDY 0x0400 #ifdef FOR_PYTHON #define PCRE_LOCALE 0x0200 #endif diff --git a/Modules/pypcre.c b/Modules/pypcre.c index ab8c477..4e474c5 100644 --- a/Modules/pypcre.c +++ b/Modules/pypcre.c @@ -1216,6 +1216,7 @@ compile_branch(int options, int *brackets, uschar **codeptr, int repeat_type, op_type; int repeat_min, repeat_max; int bravalue, length; +int greedy_default, greedy_non_default; register int c; register uschar *code = *codeptr; const uschar *ptr = *ptrptr; @@ -1224,6 +1225,11 @@ uschar *previous = NULL; uschar class[32]; uschar *class_flag; /* Pointer to the single-byte flag for OP_CLASS_L */ +/* Set up the default and non-default settings for greediness */ + +greedy_default = ((options & PCRE_UNGREEDY) != 0); +greedy_non_default = greedy_default ^ 1; + /* Switch on next character until the end of the branch */ for (;; ptr++) @@ -1536,10 +1542,13 @@ for (;; ptr++) goto FAILED; } - /* If the next character is '?' this is a minimizing repeat. Advance to the + /* If the next character is '?' this is a minimizing repeat, by default, + but if PCRE_UNGREEDY is set, it works the other way round. Advance to the next character. */ - if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0; + if (ptr[1] == '?') + { repeat_type = greedy_non_default; ptr++; } + else repeat_type = greedy_default; /* If the maximum is zero then the minimum must also be zero; Perl allows this case, so we do too - by simply omitting the item altogether. */ @@ -1628,14 +1637,20 @@ for (;; ptr++) /* If the mininum is 1 and the previous item was a character string, we either have to put back the item that got cancelled if the string length was 1, or add the character back onto the end of a longer - string. For a character type nothing need be done; it will just get put - back naturally. */ + string. For a character type nothing need be done; it will just get + put back naturally. Note that the final character is always going to + get added below. */ else if (*previous == OP_CHARS) { if (code == previous) code += 2; else previous[1]++; } + /* For a single negated character we also have to put back the + item that got cancelled. */ + + else if (*previous == OP_NOT) code++; + /* If the maximum is unlimited, insert an OP_STAR. */ if (repeat_max < 0) @@ -2484,7 +2499,7 @@ while ((c = *(++ptr)) != 0) ptr += 2; break; } - /* Else fall thourh */ + /* Else fall through */ /* Else loop setting valid options until ) is met. Anything else is an error. */ @@ -2725,14 +2740,15 @@ printf("Length = %d top_bracket = %d top_backref=%d\n", if (re->options != 0) { - printf("%s%s%s%s%s%s%s\n", + printf("%s%s%s%s%s%s%s%s\n", ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "", ((re->options & PCRE_CASELESS) != 0)? "caseless " : "", ((re->options & PCRE_EXTENDED) != 0)? "extended " : "", ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "", ((re->options & PCRE_DOTALL) != 0)? "dotall " : "", ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "", - ((re->options & PCRE_EXTRA) != 0)? "extra " : ""); + ((re->options & PCRE_EXTRA) != 0)? "extra " : "", + ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : ""); } if ((re->options & PCRE_FIRSTSET) != 0) @@ -3070,7 +3086,7 @@ static int grow_stack(match_data *md) if (md->offset_top == NULL || md->eptr == NULL || md->ecode == NULL || md->off_num == NULL || md->r1 == NULL || md->r2 == NULL) { - PyErr_SetString(PyExc_MemoryError, "Can't increase failure stack for re operation"); + PyErr_NoMemory(); longjmp(md->error_env, 1); } return 0; -- cgit v0.12