summaryrefslogtreecommitdiffstats
path: root/Modules/_sre.c
diff options
context:
space:
mode:
authorThomas Wouters <thomas@python.org>2006-04-21 10:40:58 (GMT)
committerThomas Wouters <thomas@python.org>2006-04-21 10:40:58 (GMT)
commit49fd7fa4431da299196d74087df4a04f99f9c46f (patch)
tree35ace5fe78d3d52c7a9ab356ab9f6dbf8d4b71f4 /Modules/_sre.c
parent9ada3d6e29d5165dadacbe6be07bcd35cfbef59d (diff)
downloadcpython-49fd7fa4431da299196d74087df4a04f99f9c46f.zip
cpython-49fd7fa4431da299196d74087df4a04f99f9c46f.tar.gz
cpython-49fd7fa4431da299196d74087df4a04f99f9c46f.tar.bz2
Merge p3yk branch with the trunk up to revision 45595. This breaks a fair
number of tests, all because of the codecs/_multibytecodecs issue described here (it's not a Py3K issue, just something Py3K discovers): http://mail.python.org/pipermail/python-dev/2006-April/064051.html Hye-Shik Chang promised to look for a fix, so no need to fix it here. The tests that are expected to break are: test_codecencodings_cn test_codecencodings_hk test_codecencodings_jp test_codecencodings_kr test_codecencodings_tw test_codecs test_multibytecodec This merge fixes an actual test failure (test_weakref) in this branch, though, so I believe merging is the right thing to do anyway.
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r--Modules/_sre.c407
1 files changed, 203 insertions, 204 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 81223d7..4af08ed 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -275,7 +275,7 @@ data_stack_grow(SRE_STATE* state, int size)
data_stack_dealloc(state);
return SRE_ERROR_MEMORY;
}
- state->data_stack = stack;
+ state->data_stack = (char *)stack;
state->data_stack_size = cursize;
}
return 0;
@@ -335,7 +335,7 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
{
/* check if pointer is at given position */
- int this, that;
+ int thisp, thatp;
switch (at) {
@@ -362,57 +362,57 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
case SRE_AT_BOUNDARY:
if (state->beginning == state->end)
return 0;
- that = ((void*) ptr > state->beginning) ?
+ thatp = ((void*) ptr > state->beginning) ?
SRE_IS_WORD((int) ptr[-1]) : 0;
- this = ((void*) ptr < state->end) ?
+ thisp = ((void*) ptr < state->end) ?
SRE_IS_WORD((int) ptr[0]) : 0;
- return this != that;
+ return thisp != thatp;
case SRE_AT_NON_BOUNDARY:
if (state->beginning == state->end)
return 0;
- that = ((void*) ptr > state->beginning) ?
+ thatp = ((void*) ptr > state->beginning) ?
SRE_IS_WORD((int) ptr[-1]) : 0;
- this = ((void*) ptr < state->end) ?
+ thisp = ((void*) ptr < state->end) ?
SRE_IS_WORD((int) ptr[0]) : 0;
- return this == that;
+ return thisp == thatp;
case SRE_AT_LOC_BOUNDARY:
if (state->beginning == state->end)
return 0;
- that = ((void*) ptr > state->beginning) ?
+ thatp = ((void*) ptr > state->beginning) ?
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
- this = ((void*) ptr < state->end) ?
+ thisp = ((void*) ptr < state->end) ?
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
- return this != that;
+ return thisp != thatp;
case SRE_AT_LOC_NON_BOUNDARY:
if (state->beginning == state->end)
return 0;
- that = ((void*) ptr > state->beginning) ?
+ thatp = ((void*) ptr > state->beginning) ?
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
- this = ((void*) ptr < state->end) ?
+ thisp = ((void*) ptr < state->end) ?
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
- return this == that;
+ return thisp == thatp;
#if defined(HAVE_UNICODE)
case SRE_AT_UNI_BOUNDARY:
if (state->beginning == state->end)
return 0;
- that = ((void*) ptr > state->beginning) ?
+ thatp = ((void*) ptr > state->beginning) ?
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
- this = ((void*) ptr < state->end) ?
+ thisp = ((void*) ptr < state->end) ?
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
- return this != that;
+ return thisp != thatp;
case SRE_AT_UNI_NON_BOUNDARY:
if (state->beginning == state->end)
return 0;
- that = ((void*) ptr > state->beginning) ?
+ thatp = ((void*) ptr > state->beginning) ?
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
- this = ((void*) ptr < state->end) ?
+ thisp = ((void*) ptr < state->end) ?
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
- return this == that;
+ return thisp == thatp;
#endif
}
@@ -516,8 +516,8 @@ LOCAL(int)
SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount)
{
SRE_CODE chr;
- SRE_CHAR* ptr = state->ptr;
- SRE_CHAR* end = state->end;
+ SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
+ SRE_CHAR* end = (SRE_CHAR *)state->end;
int i;
/* adjust end */
@@ -803,7 +803,7 @@ typedef struct {
LOCAL(int)
SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
{
- SRE_CHAR* end = state->end;
+ SRE_CHAR* end = (SRE_CHAR *)state->end;
int alloc_pos, ctx_pos = -1;
int i, ret = 0;
int jump;
@@ -821,7 +821,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
entrance:
- ctx->ptr = state->ptr;
+ ctx->ptr = (SRE_CHAR *)state->ptr;
if (ctx->pattern[0] == SRE_OP_INFO) {
/* optimization info block */
@@ -1477,8 +1477,8 @@ exit:
LOCAL(int)
SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
{
- SRE_CHAR* ptr = state->start;
- SRE_CHAR* end = state->end;
+ SRE_CHAR* ptr = (SRE_CHAR *)state->start;
+ SRE_CHAR* end = (SRE_CHAR *)state->end;
int status = 0;
int prefix_len = 0;
int prefix_skip = 0;
@@ -1524,7 +1524,7 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
/* pattern starts with a known prefix. use the overlap
table to skip forward as fast as we possibly can */
int i = 0;
- end = state->end;
+ end = (SRE_CHAR *)state->end;
while (ptr < end) {
for (;;) {
if ((SRE_CODE) ptr[0] != prefix[i]) {
@@ -1559,7 +1559,7 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
/* pattern starts with a literal character. this is used
for short prefixes, and if fast search is disabled */
SRE_CODE chr = pattern[1];
- end = state->end;
+ end = (SRE_CHAR *)state->end;
for (;;) {
while (ptr < end && (SRE_CODE) ptr[0] != chr)
ptr++;
@@ -1576,7 +1576,7 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
}
} else if (charset) {
/* pattern starts with a character from a known set */
- end = state->end;
+ end = (SRE_CHAR *)state->end;
for (;;) {
while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
ptr++;
@@ -1619,72 +1619,8 @@ SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, int len)
/* factories and destructors */
/* see sre.h for object declarations */
-
-static PyTypeObject Pattern_Type;
-static PyTypeObject Match_Type;
-static PyTypeObject Scanner_Type;
-
-static PyObject *
-_compile(PyObject* self_, PyObject* args)
-{
- /* "compile" pattern descriptor to pattern object */
-
- PatternObject* self;
- int i, n;
-
- PyObject* pattern;
- int flags = 0;
- PyObject* code;
- int groups = 0;
- PyObject* groupindex = NULL;
- PyObject* indexgroup = NULL;
- if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
- &PyList_Type, &code, &groups,
- &groupindex, &indexgroup))
- return NULL;
-
- n = PyList_GET_SIZE(code);
-
- self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
- if (!self)
- return NULL;
-
- self->codesize = n;
-
- for (i = 0; i < n; i++) {
- PyObject *o = PyList_GET_ITEM(code, i);
- unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
- : PyLong_AsUnsignedLong(o);
- self->code[i] = (SRE_CODE) value;
- if ((unsigned long) self->code[i] != value) {
- PyErr_SetString(PyExc_OverflowError,
- "regular expression code size limit exceeded");
- break;
- }
- }
-
- if (PyErr_Occurred()) {
- PyObject_DEL(self);
- return NULL;
- }
-
- Py_INCREF(pattern);
- self->pattern = pattern;
-
- self->flags = flags;
-
- self->groups = groups;
-
- Py_XINCREF(groupindex);
- self->groupindex = groupindex;
-
- Py_XINCREF(indexgroup);
- self->indexgroup = indexgroup;
-
- self->weakreflist = NULL;
-
- return (PyObject*) self;
-}
+static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
+static PyObject*pattern_scanner(PatternObject*, PyObject*);
static PyObject *
sre_codesize(PyObject* self, PyObject* args)
@@ -1900,98 +1836,6 @@ pattern_error(int status)
}
}
-static PyObject*
-pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
-{
- /* create match object (from state object) */
-
- MatchObject* match;
- int i, j;
- char* base;
- int n;
-
- if (status > 0) {
-
- /* create match object (with room for extra group marks) */
- match = PyObject_NEW_VAR(MatchObject, &Match_Type,
- 2*(pattern->groups+1));
- if (!match)
- return NULL;
-
- Py_INCREF(pattern);
- match->pattern = pattern;
-
- Py_INCREF(state->string);
- match->string = state->string;
-
- match->regs = NULL;
- match->groups = pattern->groups+1;
-
- /* fill in group slices */
-
- base = (char*) state->beginning;
- n = state->charsize;
-
- match->mark[0] = ((char*) state->start - base) / n;
- match->mark[1] = ((char*) state->ptr - base) / n;
-
- for (i = j = 0; i < pattern->groups; i++, j+=2)
- if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
- match->mark[j+2] = ((char*) state->mark[j] - base) / n;
- match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
- } else
- match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
-
- match->pos = state->pos;
- match->endpos = state->endpos;
-
- match->lastindex = state->lastindex;
-
- return (PyObject*) match;
-
- } else if (status == 0) {
-
- /* no match */
- Py_INCREF(Py_None);
- return Py_None;
-
- }
-
- /* internal error */
- pattern_error(status);
- return NULL;
-}
-
-static PyObject*
-pattern_scanner(PatternObject* pattern, PyObject* args)
-{
- /* create search state object */
-
- ScannerObject* self;
-
- PyObject* string;
- int start = 0;
- int end = INT_MAX;
- if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
- return NULL;
-
- /* create scanner object */
- self = PyObject_NEW(ScannerObject, &Scanner_Type);
- if (!self)
- return NULL;
-
- string = state_init(&self->state, pattern, string, start, end);
- if (!string) {
- PyObject_DEL(self);
- return NULL;
- }
-
- Py_INCREF(pattern);
- self->pattern = (PyObject*) pattern;
-
- return (PyObject*) self;
-}
-
static void
pattern_dealloc(PatternObject* self)
{
@@ -2414,7 +2258,7 @@ error:
}
static PyObject*
-pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
+pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
int count, int subn)
{
SRE_STATE state;
@@ -2429,21 +2273,21 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
int i, b, e;
int filter_is_callable;
- if (PyCallable_Check(template)) {
+ if (PyCallable_Check(ptemplate)) {
/* sub/subn takes either a function or a template */
- filter = template;
+ filter = ptemplate;
Py_INCREF(filter);
filter_is_callable = 1;
} else {
/* if not callable, check if it's a literal string */
int literal;
- ptr = getstring(template, &n, &b);
+ ptr = getstring(ptemplate, &n, &b);
if (ptr) {
if (b == 1) {
- literal = sre_literal_template(ptr, n);
+ literal = sre_literal_template((unsigned char *)ptr, n);
} else {
#if defined(HAVE_UNICODE)
- literal = sre_uliteral_template(ptr, n);
+ literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
#endif
}
} else {
@@ -2451,14 +2295,14 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
literal = 0;
}
if (literal) {
- filter = template;
+ filter = ptemplate;
Py_INCREF(filter);
filter_is_callable = 0;
} else {
/* not a literal; hand it over to the template compiler */
filter = call(
SRE_PY_MODULE, "_subx",
- PyTuple_Pack(2, self, template)
+ PyTuple_Pack(2, self, ptemplate)
);
if (!filter)
return NULL;
@@ -2597,29 +2441,29 @@ error:
static PyObject*
pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
{
- PyObject* template;
+ PyObject* ptemplate;
PyObject* string;
int count = 0;
static char* kwlist[] = { "repl", "string", "count", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:sub", kwlist,
- &template, &string, &count))
+ &ptemplate, &string, &count))
return NULL;
- return pattern_subx(self, template, string, count, 0);
+ return pattern_subx(self, ptemplate, string, count, 0);
}
static PyObject*
pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
{
- PyObject* template;
+ PyObject* ptemplate;
PyObject* string;
int count = 0;
static char* kwlist[] = { "repl", "string", "count", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|i:subn", kwlist,
- &template, &string, &count))
+ &ptemplate, &string, &count))
return NULL;
- return pattern_subx(self, template, string, count, 1);
+ return pattern_subx(self, ptemplate, string, count, 1);
}
static PyObject*
@@ -2799,6 +2643,68 @@ static PyTypeObject Pattern_Type = {
offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
};
+static PyObject *
+_compile(PyObject* self_, PyObject* args)
+{
+ /* "compile" pattern descriptor to pattern object */
+
+ PatternObject* self;
+ int i, n;
+
+ PyObject* pattern;
+ int flags = 0;
+ PyObject* code;
+ int groups = 0;
+ PyObject* groupindex = NULL;
+ PyObject* indexgroup = NULL;
+ if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
+ &PyList_Type, &code, &groups,
+ &groupindex, &indexgroup))
+ return NULL;
+
+ n = PyList_GET_SIZE(code);
+
+ self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
+ if (!self)
+ return NULL;
+
+ self->codesize = n;
+
+ for (i = 0; i < n; i++) {
+ PyObject *o = PyList_GET_ITEM(code, i);
+ unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
+ : PyLong_AsUnsignedLong(o);
+ self->code[i] = (SRE_CODE) value;
+ if ((unsigned long) self->code[i] != value) {
+ PyErr_SetString(PyExc_OverflowError,
+ "regular expression code size limit exceeded");
+ break;
+ }
+ }
+
+ if (PyErr_Occurred()) {
+ PyObject_DEL(self);
+ return NULL;
+ }
+
+ Py_INCREF(pattern);
+ self->pattern = pattern;
+
+ self->flags = flags;
+
+ self->groups = groups;
+
+ Py_XINCREF(groupindex);
+ self->groupindex = groupindex;
+
+ Py_XINCREF(indexgroup);
+ self->indexgroup = indexgroup;
+
+ self->weakreflist = NULL;
+
+ return (PyObject*) self;
+}
+
/* -------------------------------------------------------------------- */
/* match methods */
@@ -2868,14 +2774,14 @@ match_getslice(MatchObject* self, PyObject* index, PyObject* def)
static PyObject*
match_expand(MatchObject* self, PyObject* args)
{
- PyObject* template;
- if (!PyArg_ParseTuple(args, "O:expand", &template))
+ PyObject* ptemplate;
+ if (!PyArg_ParseTuple(args, "O:expand", &ptemplate))
return NULL;
/* delegate to Python code */
return call(
SRE_PY_MODULE, "_expand",
- PyTuple_Pack(3, self->pattern, self, template)
+ PyTuple_Pack(3, self->pattern, self, ptemplate)
);
}
@@ -3262,6 +3168,69 @@ static PyTypeObject Match_Type = {
(getattrfunc)match_getattr /*tp_getattr*/
};
+static PyObject*
+pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
+{
+ /* create match object (from state object) */
+
+ MatchObject* match;
+ int i, j;
+ char* base;
+ int n;
+
+ if (status > 0) {
+
+ /* create match object (with room for extra group marks) */
+ match = PyObject_NEW_VAR(MatchObject, &Match_Type,
+ 2*(pattern->groups+1));
+ if (!match)
+ return NULL;
+
+ Py_INCREF(pattern);
+ match->pattern = pattern;
+
+ Py_INCREF(state->string);
+ match->string = state->string;
+
+ match->regs = NULL;
+ match->groups = pattern->groups+1;
+
+ /* fill in group slices */
+
+ base = (char*) state->beginning;
+ n = state->charsize;
+
+ match->mark[0] = ((char*) state->start - base) / n;
+ match->mark[1] = ((char*) state->ptr - base) / n;
+
+ for (i = j = 0; i < pattern->groups; i++, j+=2)
+ if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
+ match->mark[j+2] = ((char*) state->mark[j] - base) / n;
+ match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
+ } else
+ match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
+
+ match->pos = state->pos;
+ match->endpos = state->endpos;
+
+ match->lastindex = state->lastindex;
+
+ return (PyObject*) match;
+
+ } else if (status == 0) {
+
+ /* no match */
+ Py_INCREF(Py_None);
+ return Py_None;
+
+ }
+
+ /* internal error */
+ pattern_error(status);
+ return NULL;
+}
+
+
/* -------------------------------------------------------------------- */
/* scanner methods (experimental) */
@@ -3372,6 +3341,36 @@ static PyTypeObject Scanner_Type = {
(getattrfunc)scanner_getattr, /*tp_getattr*/
};
+static PyObject*
+pattern_scanner(PatternObject* pattern, PyObject* args)
+{
+ /* create search state object */
+
+ ScannerObject* self;
+
+ PyObject* string;
+ int start = 0;
+ int end = INT_MAX;
+ if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
+ return NULL;
+
+ /* create scanner object */
+ self = PyObject_NEW(ScannerObject, &Scanner_Type);
+ if (!self)
+ return NULL;
+
+ string = state_init(&self->state, pattern, string, start, end);
+ if (!string) {
+ PyObject_DEL(self);
+ return NULL;
+ }
+
+ Py_INCREF(pattern);
+ self->pattern = (PyObject*) pattern;
+
+ return (PyObject*) self;
+}
+
static PyMethodDef _functions[] = {
{"compile", _compile, METH_VARARGS},
{"getcodesize", sre_codesize, METH_VARARGS},