summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1998-05-07 15:32:44 (GMT)
committerGuido van Rossum <guido@python.org>1998-05-07 15:32:44 (GMT)
commitdda6696be67bedf4d5547e8bf597cb7cd4769c69 (patch)
treec57b0abedb711efdbf9ce481a43f52da77b4afe4
parent0ef1b079a2a2c2c5ca2ffd8c8bd22f1145b17af4 (diff)
downloadcpython-dda6696be67bedf4d5547e8bf597cb7cd4769c69.zip
cpython-dda6696be67bedf4d5547e8bf597cb7cd4769c69.tar.gz
cpython-dda6696be67bedf4d5547e8bf597cb7cd4769c69.tar.bz2
AMK's revised version of the previous patch.
-rw-r--r--Modules/pcre-int.h7
-rw-r--r--Modules/pcre.h1
-rw-r--r--Modules/pypcre.c32
3 files changed, 29 insertions, 11 deletions
diff --git a/Modules/pcre-int.h b/Modules/pcre-int.h
index 07aeb84..ceae8eb 100644
--- a/Modules/pcre-int.h
+++ b/Modules/pcre-int.h
@@ -3,7 +3,7 @@
*************************************************/
-#define PCRE_VERSION "1.07 16-Feb-1998"
+#define PCRE_VERSION "1.09 28-Apr-1998"
/* This is a library of functions to support regular expressions whose syntax
@@ -80,11 +80,12 @@ only some permitted at run or study time. */
#ifdef FOR_PYTHON
#define PUBLIC_OPTIONS \
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
- PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_LOCALE)
+ PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY| \
+ PCRE_LOCALE)
#else
#define PUBLIC_OPTIONS \
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
- PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA)
+ PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY)
#endif
#define PUBLIC_EXEC_OPTIONS \
(PCRE_CASELESS|PCRE_ANCHORED|PCRE_MULTILINE|PCRE_NOTBOL|PCRE_NOTEOL| \
diff --git a/Modules/pcre.h b/Modules/pcre.h
index 06768a9..347ca72 100644
--- a/Modules/pcre.h
+++ b/Modules/pcre.h
@@ -34,6 +34,7 @@ extern "C" {
#define PCRE_EXTRA 0x0040
#define PCRE_NOTBOL 0x0080
#define PCRE_NOTEOL 0x0100
+#define PCRE_UNGREEDY 0x0400
#ifdef FOR_PYTHON
#define PCRE_LOCALE 0x0200
#endif
diff --git a/Modules/pypcre.c b/Modules/pypcre.c
index ab8c477..4e474c5 100644
--- a/Modules/pypcre.c
+++ b/Modules/pypcre.c
@@ -1216,6 +1216,7 @@ compile_branch(int options, int *brackets, uschar **codeptr,
int repeat_type, op_type;
int repeat_min, repeat_max;
int bravalue, length;
+int greedy_default, greedy_non_default;
register int c;
register uschar *code = *codeptr;
const uschar *ptr = *ptrptr;
@@ -1224,6 +1225,11 @@ uschar *previous = NULL;
uschar class[32];
uschar *class_flag; /* Pointer to the single-byte flag for OP_CLASS_L */
+/* Set up the default and non-default settings for greediness */
+
+greedy_default = ((options & PCRE_UNGREEDY) != 0);
+greedy_non_default = greedy_default ^ 1;
+
/* Switch on next character until the end of the branch */
for (;; ptr++)
@@ -1536,10 +1542,13 @@ for (;; ptr++)
goto FAILED;
}
- /* If the next character is '?' this is a minimizing repeat. Advance to the
+ /* If the next character is '?' this is a minimizing repeat, by default,
+ but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
next character. */
- if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;
+ if (ptr[1] == '?')
+ { repeat_type = greedy_non_default; ptr++; }
+ else repeat_type = greedy_default;
/* If the maximum is zero then the minimum must also be zero; Perl allows
this case, so we do too - by simply omitting the item altogether. */
@@ -1628,14 +1637,20 @@ for (;; ptr++)
/* If the mininum is 1 and the previous item was a character string,
we either have to put back the item that got cancelled if the string
length was 1, or add the character back onto the end of a longer
- string. For a character type nothing need be done; it will just get put
- back naturally. */
+ string. For a character type nothing need be done; it will just get
+ put back naturally. Note that the final character is always going to
+ get added below. */
else if (*previous == OP_CHARS)
{
if (code == previous) code += 2; else previous[1]++;
}
+ /* For a single negated character we also have to put back the
+ item that got cancelled. */
+
+ else if (*previous == OP_NOT) code++;
+
/* If the maximum is unlimited, insert an OP_STAR. */
if (repeat_max < 0)
@@ -2484,7 +2499,7 @@ while ((c = *(++ptr)) != 0)
ptr += 2;
break;
}
- /* Else fall thourh */
+ /* Else fall through */
/* Else loop setting valid options until ) is met. Anything else is an
error. */
@@ -2725,14 +2740,15 @@ printf("Length = %d top_bracket = %d top_backref=%d\n",
if (re->options != 0)
{
- printf("%s%s%s%s%s%s%s\n",
+ printf("%s%s%s%s%s%s%s%s\n",
((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
- ((re->options & PCRE_EXTRA) != 0)? "extra " : "");
+ ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
+ ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
}
if ((re->options & PCRE_FIRSTSET) != 0)
@@ -3070,7 +3086,7 @@ static int grow_stack(match_data *md)
if (md->offset_top == NULL || md->eptr == NULL || md->ecode == NULL ||
md->off_num == NULL || md->r1 == NULL || md->r2 == NULL)
{
- PyErr_SetString(PyExc_MemoryError, "Can't increase failure stack for re operation");
+ PyErr_NoMemory();
longjmp(md->error_env, 1);
}
return 0;