summaryrefslogtreecommitdiffstats
path: root/Modules/pypcre.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/pypcre.c')
-rw-r--r--Modules/pypcre.c32
1 files changed, 24 insertions, 8 deletions
diff --git a/Modules/pypcre.c b/Modules/pypcre.c
index ab8c477..4e474c5 100644
--- a/Modules/pypcre.c
+++ b/Modules/pypcre.c
@@ -1216,6 +1216,7 @@ compile_branch(int options, int *brackets, uschar **codeptr,
int repeat_type, op_type;
int repeat_min, repeat_max;
int bravalue, length;
+int greedy_default, greedy_non_default;
register int c;
register uschar *code = *codeptr;
const uschar *ptr = *ptrptr;
@@ -1224,6 +1225,11 @@ uschar *previous = NULL;
uschar class[32];
uschar *class_flag; /* Pointer to the single-byte flag for OP_CLASS_L */
+/* Set up the default and non-default settings for greediness */
+
+greedy_default = ((options & PCRE_UNGREEDY) != 0);
+greedy_non_default = greedy_default ^ 1;
+
/* Switch on next character until the end of the branch */
for (;; ptr++)
@@ -1536,10 +1542,13 @@ for (;; ptr++)
goto FAILED;
}
- /* If the next character is '?' this is a minimizing repeat. Advance to the
+ /* If the next character is '?' this is a minimizing repeat, by default,
+ but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
next character. */
- if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;
+ if (ptr[1] == '?')
+ { repeat_type = greedy_non_default; ptr++; }
+ else repeat_type = greedy_default;
/* If the maximum is zero then the minimum must also be zero; Perl allows
this case, so we do too - by simply omitting the item altogether. */
@@ -1628,14 +1637,20 @@ for (;; ptr++)
/* If the mininum is 1 and the previous item was a character string,
we either have to put back the item that got cancelled if the string
length was 1, or add the character back onto the end of a longer
- string. For a character type nothing need be done; it will just get put
- back naturally. */
+ string. For a character type nothing need be done; it will just get
+ put back naturally. Note that the final character is always going to
+ get added below. */
else if (*previous == OP_CHARS)
{
if (code == previous) code += 2; else previous[1]++;
}
+ /* For a single negated character we also have to put back the
+ item that got cancelled. */
+
+ else if (*previous == OP_NOT) code++;
+
/* If the maximum is unlimited, insert an OP_STAR. */
if (repeat_max < 0)
@@ -2484,7 +2499,7 @@ while ((c = *(++ptr)) != 0)
ptr += 2;
break;
}
- /* Else fall thourh */
+ /* Else fall through */
/* Else loop setting valid options until ) is met. Anything else is an
error. */
@@ -2725,14 +2740,15 @@ printf("Length = %d top_bracket = %d top_backref=%d\n",
if (re->options != 0)
{
- printf("%s%s%s%s%s%s%s\n",
+ printf("%s%s%s%s%s%s%s%s\n",
((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
- ((re->options & PCRE_EXTRA) != 0)? "extra " : "");
+ ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
+ ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
}
if ((re->options & PCRE_FIRSTSET) != 0)
@@ -3070,7 +3086,7 @@ static int grow_stack(match_data *md)
if (md->offset_top == NULL || md->eptr == NULL || md->ecode == NULL ||
md->off_num == NULL || md->r1 == NULL || md->r2 == NULL)
{
- PyErr_SetString(PyExc_MemoryError, "Can't increase failure stack for re operation");
+ PyErr_NoMemory();
longjmp(md->error_env, 1);
}
return 0;