summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_sre.c74
-rw-r--r--Modules/sre_constants.h6
2 files changed, 63 insertions, 17 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 63e4ef3..8811038 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -24,8 +24,9 @@
* 2000-10-24 fl really fixed assert_not; reset groups in findall
* 2000-12-21 fl fixed memory leak in groupdict
* 2001-01-02 fl properly reset pointer after failed assertion in MIN_UNTIL
- * 2001-01-15 fl avoid recursion for MIN_UTIL; fixed uppercase literal bug
+ * 2001-01-15 fl avoid recursion for MIN_UNTIL; fixed uppercase literal bug
* 2001-01-16 fl fixed memory leak in pattern destructor
+ * 2001-03-20 fl lots of fixes for 2.1b2
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
@@ -40,7 +41,7 @@
#ifndef SRE_RECURSIVE
-char copyright[] = " SRE 2.1 Copyright (c) 1997-2001 by Secret Labs AB ";
+char copyright[] = " SRE 2.1b2 Copyright (c) 1997-2001 by Secret Labs AB ";
#include "Python.h"
@@ -141,11 +142,6 @@ static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127 };
-static unsigned int sre_lower(unsigned int ch)
-{
- return ((ch) < 128 ? sre_char_lower[ch] : ch);
-}
-
#define SRE_IS_DIGIT(ch)\
((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
#define SRE_IS_SPACE(ch)\
@@ -157,30 +153,39 @@ static unsigned int sre_lower(unsigned int ch)
#define SRE_IS_WORD(ch)\
((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
-/* locale-specific character predicates */
-
-static unsigned int sre_lower_locale(unsigned int ch)
+static unsigned int sre_lower(unsigned int ch)
{
- return ((ch) < 256 ? tolower((ch)) : ch);
+ return ((ch) < 128 ? sre_char_lower[ch] : ch);
}
+
+/* locale-specific character predicates */
+
#define SRE_LOC_IS_DIGIT(ch) ((ch) < 256 ? isdigit((ch)) : 0)
#define SRE_LOC_IS_SPACE(ch) ((ch) < 256 ? isspace((ch)) : 0)
#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
#define SRE_LOC_IS_ALNUM(ch) ((ch) < 256 ? isalnum((ch)) : 0)
#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
+static unsigned int sre_lower_locale(unsigned int ch)
+{
+ return ((ch) < 256 ? tolower((ch)) : ch);
+}
+
/* unicode-specific character predicates */
#if defined(HAVE_UNICODE)
-static unsigned int sre_lower_unicode(unsigned int ch)
-{
- return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
-}
+
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
+
+static unsigned int sre_lower_unicode(unsigned int ch)
+{
+ return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
+}
+
#endif
LOCAL(int)
@@ -418,6 +423,42 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
this = ((void*) ptr < state->end) ?
SRE_IS_WORD((int) ptr[0]) : 0;
return this == that;
+
+ case SRE_AT_LOC_BOUNDARY:
+ if (state->beginning == state->end)
+ return 0;
+ that = ((void*) ptr > state->beginning) ?
+ SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
+ this = ((void*) ptr < state->end) ?
+ SRE_LOC_IS_WORD((int) ptr[0]) : 0;
+ return this != that;
+
+ case SRE_AT_LOC_NON_BOUNDARY:
+ if (state->beginning == state->end)
+ return 0;
+ that = ((void*) ptr > state->beginning) ?
+ SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
+ this = ((void*) ptr < state->end) ?
+ SRE_LOC_IS_WORD((int) ptr[0]) : 0;
+ return this == that;
+
+ case SRE_AT_UNI_BOUNDARY:
+ if (state->beginning == state->end)
+ return 0;
+ that = ((void*) ptr > state->beginning) ?
+ SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
+ this = ((void*) ptr < state->end) ?
+ SRE_UNI_IS_WORD((int) ptr[0]) : 0;
+ return this != that;
+
+ case SRE_AT_UNI_NON_BOUNDARY:
+ if (state->beginning == state->end)
+ return 0;
+ that = ((void*) ptr > state->beginning) ?
+ SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
+ this = ((void*) ptr < state->end) ?
+ SRE_UNI_IS_WORD((int) ptr[0]) : 0;
+ return this == that;
}
return 0;
@@ -1037,7 +1078,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
/* see if the tail matches */
state->repeat = rp->prev;
- if (rp->pattern[2] == 65535) {
+ /* FIXME: the following fix doesn't always work (#133283) */
+ if (0 && rp->pattern[2] == 65535) {
/* unbounded repeat */
for (;;) {
i = SRE_MATCH(state, pattern, level + 1);
diff --git a/Modules/sre_constants.h b/Modules/sre_constants.h
index c6850ad..73bcb34 100644
--- a/Modules/sre_constants.h
+++ b/Modules/sre_constants.h
@@ -11,7 +11,7 @@
* See the _sre.c file for information on usage and redistribution.
*/
-#define SRE_MAGIC 20010115
+#define SRE_MAGIC 20010320
#define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1
#define SRE_OP_ANY 2
@@ -49,6 +49,10 @@
#define SRE_AT_END 5
#define SRE_AT_END_LINE 6
#define SRE_AT_END_STRING 7
+#define SRE_AT_LOC_BOUNDARY 8
+#define SRE_AT_LOC_NON_BOUNDARY 9
+#define SRE_AT_UNI_BOUNDARY 10
+#define SRE_AT_UNI_NON_BOUNDARY 11
#define SRE_CATEGORY_DIGIT 0
#define SRE_CATEGORY_NOT_DIGIT 1
#define SRE_CATEGORY_SPACE 2