summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2013-11-23 21:20:30 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2013-11-23 21:20:30 (GMT)
commit32eddc1bbc47479a3639b9191ffc82a52903c5f4 (patch)
tree8ce67ed6f7d6db96277f4e6d07457f2b159fb362 /Modules
parent3ed82c55a85665a33b821064c1911b4aa09301d9 (diff)
downloadcpython-32eddc1bbc47479a3639b9191ffc82a52903c5f4.zip
cpython-32eddc1bbc47479a3639b9191ffc82a52903c5f4.tar.gz
cpython-32eddc1bbc47479a3639b9191ffc82a52903c5f4.tar.bz2
Issue #16203: Add re.fullmatch() function and regex.fullmatch() method,
which anchor the pattern at both ends of the string to match. Original patch by Matthew Barnett.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_sre.c77
-rw-r--r--Modules/sre.h1
-rw-r--r--Modules/sre_lib.h28
3 files changed, 81 insertions, 25 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 3a92db9..55a86c2 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -4,24 +4,25 @@
* regular expression matching engine
*
* partial history:
- * 1999-10-24 fl created (based on existing template matcher code)
- * 2000-03-06 fl first alpha, sort of
- * 2000-08-01 fl fixes for 1.6b1
- * 2000-08-07 fl use PyOS_CheckStack() if available
- * 2000-09-20 fl added expand method
- * 2001-03-20 fl lots of fixes for 2.1b2
- * 2001-04-15 fl export copyright as Python attribute, not global
- * 2001-04-28 fl added __copy__ methods (work in progress)
- * 2001-05-14 fl fixes for 1.5.2 compatibility
- * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
- * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
- * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
- * 2001-10-21 fl added sub/subn primitive
- * 2001-10-24 fl added finditer primitive (for 2.2 only)
- * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
- * 2002-11-09 fl fixed empty sub/subn return type
- * 2003-04-18 mvl fully support 4-byte codes
- * 2003-10-17 gn implemented non recursive scheme
+ * 1999-10-24 fl created (based on existing template matcher code)
+ * 2000-03-06 fl first alpha, sort of
+ * 2000-08-01 fl fixes for 1.6b1
+ * 2000-08-07 fl use PyOS_CheckStack() if available
+ * 2000-09-20 fl added expand method
+ * 2001-03-20 fl lots of fixes for 2.1b2
+ * 2001-04-15 fl export copyright as Python attribute, not global
+ * 2001-04-28 fl added __copy__ methods (work in progress)
+ * 2001-05-14 fl fixes for 1.5.2 compatibility
+ * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
+ * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
+ * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
+ * 2001-10-21 fl added sub/subn primitive
+ * 2001-10-24 fl added finditer primitive (for 2.2 only)
+ * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
+ * 2002-11-09 fl fixed empty sub/subn return type
+ * 2003-04-18 mvl fully support 4-byte codes
+ * 2003-10-17 gn implemented non recursive scheme
+ * 2013-02-04 mrab added fullmatch primitive
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
@@ -559,6 +560,40 @@ pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
}
static PyObject*
+pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
+{
+ SRE_STATE state;
+ Py_ssize_t status;
+
+ PyObject* string;
+ Py_ssize_t start = 0;
+ Py_ssize_t end = PY_SSIZE_T_MAX;
+ static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
+ if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:fullmatch", kwlist,
+ &string, &start, &end))
+ return NULL;
+
+ string = state_init(&state, self, string, start, end);
+ if (!string)
+ return NULL;
+
+ state.match_all = 1;
+ state.ptr = state.start;
+
+ TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
+
+ status = sre_match(&state, PatternObject_GetCode(self));
+
+ TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
+ if (PyErr_Occurred())
+ return NULL;
+
+ state_fini(&state);
+
+ return pattern_new_match(self, &state, status);
+}
+
+static PyObject*
pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
{
SRE_STATE state;
@@ -1223,6 +1258,10 @@ PyDoc_STRVAR(pattern_match_doc,
"match(string[, pos[, endpos]]) -> match object or None.\n\
Matches zero or more characters at the beginning of the string");
+PyDoc_STRVAR(pattern_fullmatch_doc,
+"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\
+ Matches against all of the string");
+
PyDoc_STRVAR(pattern_search_doc,
"search(string[, pos[, endpos]]) -> match object or None.\n\
Scan through string looking for a match, and return a corresponding\n\
@@ -1258,6 +1297,8 @@ PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
static PyMethodDef pattern_methods[] = {
{"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
pattern_match_doc},
+ {"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS,
+ pattern_fullmatch_doc},
{"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
pattern_search_doc},
{"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
diff --git a/Modules/sre.h b/Modules/sre.h
index 42fe28d..621e2d8 100644
--- a/Modules/sre.h
+++ b/Modules/sre.h
@@ -86,6 +86,7 @@ typedef struct {
SRE_REPEAT *repeat;
/* hooks */
SRE_TOLOWER_HOOK lower;
+ int match_all;
} SRE_STATE;
typedef struct {
diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h
index 214c22a..df86697 100644
--- a/Modules/sre_lib.h
+++ b/Modules/sre_lib.h
@@ -454,17 +454,24 @@ do { \
#define JUMP_ASSERT 12
#define JUMP_ASSERT_NOT 13
-#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
+#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, matchall) \
DATA_ALLOC(SRE(match_context), nextctx); \
nextctx->last_ctx_pos = ctx_pos; \
nextctx->jump = jumpvalue; \
nextctx->pattern = nextpattern; \
+ nextctx->match_all = matchall; \
ctx_pos = alloc_pos; \
ctx = nextctx; \
goto entrance; \
jumplabel: \
while (0) /* gcc doesn't like labels at end of scopes */ \
+#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
+ DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->match_all)
+
+#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
+ DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
+
typedef struct {
Py_ssize_t last_ctx_pos;
Py_ssize_t jump;
@@ -477,6 +484,7 @@ typedef struct {
SRE_CODE chr;
SRE_REPEAT* rep;
} u;
+ int match_all;
} SRE(match_context);
/* check if string matches the given pattern. returns <0 for
@@ -499,6 +507,7 @@ SRE(match)(SRE_STATE* state, SRE_CODE* pattern)
ctx->last_ctx_pos = -1;
ctx->jump = JUMP_NONE;
ctx->pattern = pattern;
+ ctx->match_all = state->match_all;
ctx_pos = alloc_pos;
entrance:
@@ -571,8 +580,11 @@ entrance:
case SRE_OP_SUCCESS:
/* end of pattern */
TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
- state->ptr = ctx->ptr;
- RETURN_SUCCESS;
+ if (!ctx->match_all || ctx->ptr == state->end) {
+ state->ptr = ctx->ptr;
+ RETURN_SUCCESS;
+ }
+ RETURN_FAILURE;
case SRE_OP_AT:
/* match at given position */
@@ -726,7 +738,8 @@ entrance:
if (ctx->count < (Py_ssize_t) ctx->pattern[1])
RETURN_FAILURE;
- if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
+ if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
+ (!ctx->match_all || ctx->ptr == state->end)) {
/* tail is empty. we're finished */
state->ptr = ctx->ptr;
RETURN_SUCCESS;
@@ -810,7 +823,8 @@ entrance:
ctx->ptr += ctx->count;
}
- if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
+ if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
+ (!ctx->match_all || ctx->ptr == state->end)) {
/* tail is empty. we're finished */
state->ptr = ctx->ptr;
RETURN_SUCCESS;
@@ -1082,7 +1096,7 @@ entrance:
state->ptr = ctx->ptr - ctx->pattern[1];
if (state->ptr < state->beginning)
RETURN_FAILURE;
- DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
+ DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
RETURN_ON_FAILURE(ret);
ctx->pattern += ctx->pattern[0];
break;
@@ -1094,7 +1108,7 @@ entrance:
ctx->ptr, ctx->pattern[1]));
state->ptr = ctx->ptr - ctx->pattern[1];
if (state->ptr >= state->beginning) {
- DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
+ DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
if (ret) {
RETURN_ON_ERROR(ret);
RETURN_FAILURE;