summaryrefslogtreecommitdiffstats
path: root/Modules/_sre.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r--Modules/_sre.c182
1 files changed, 95 insertions, 87 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 5bcc387..84330ef 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -70,10 +70,6 @@ static char copyright[] =
/* enables copy/deepcopy handling (work in progress) */
#undef USE_BUILTIN_COPY
-#if PY_VERSION_HEX < 0x01060000
-#define PyObject_DEL(op) PyMem_DEL((op))
-#endif
-
/* -------------------------------------------------------------------- */
#if defined(_MSC_VER)
@@ -804,7 +800,7 @@ entrance:
if (ctx->pattern[0] == SRE_OP_INFO) {
/* optimization info block */
/* <INFO> <1=skip> <2=flags> <3=min> ... */
- if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
+ if (ctx->pattern[3] && (Py_uintptr_t)(end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
"need %" PY_FORMAT_SIZE_T "d)\n",
(end - ctx->ptr)/state->charsize,
@@ -1816,6 +1812,24 @@ state_fini(SRE_STATE* state)
(((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
LOCAL(PyObject*)
+getslice(int logical_charsize, const void *ptr,
+ PyObject* string, Py_ssize_t start, Py_ssize_t end)
+{
+ if (logical_charsize == 1) {
+ if (PyBytes_CheckExact(string) &&
+ start == 0 && end == PyBytes_GET_SIZE(string)) {
+ Py_INCREF(string);
+ return string;
+ }
+ return PyBytes_FromStringAndSize(
+ (const char *)ptr + start, end - start);
+ }
+ else {
+ return PyUnicode_Substring(string, start, end);
+ }
+}
+
+LOCAL(PyObject*)
state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
{
Py_ssize_t i, j;
@@ -1835,7 +1849,7 @@ state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
j = STATE_OFFSET(state, state->mark[index+1]);
}
- return PySequence_GetSlice(string, i, j);
+ return getslice(state->logical_charsize, state->beginning, string, i, j);
}
static void
@@ -1880,7 +1894,7 @@ static PyObject*
pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
{
SRE_STATE state;
- int status;
+ Py_ssize_t status;
PyObject* string;
Py_ssize_t start = 0;
@@ -1997,59 +2011,11 @@ deepcopy(PyObject** object, PyObject* memo)
#endif
static PyObject*
-join_list(PyObject* list, PyObject* string)
-{
- /* join list elements */
-
- PyObject* joiner;
-#if PY_VERSION_HEX >= 0x01060000
- PyObject* function;
- PyObject* args;
-#endif
- PyObject* result;
-
- joiner = PySequence_GetSlice(string, 0, 0);
- if (!joiner)
- return NULL;
-
- if (PyList_GET_SIZE(list) == 0) {
- Py_DECREF(list);
- return joiner;
- }
-
-#if PY_VERSION_HEX >= 0x01060000
- function = PyObject_GetAttrString(joiner, "join");
- if (!function) {
- Py_DECREF(joiner);
- return NULL;
- }
- args = PyTuple_New(1);
- if (!args) {
- Py_DECREF(function);
- Py_DECREF(joiner);
- return NULL;
- }
- PyTuple_SET_ITEM(args, 0, list);
- result = PyObject_CallObject(function, args);
- Py_DECREF(args); /* also removes list */
- Py_DECREF(function);
-#else
- result = call(
- "string", "join",
- PyTuple_Pack(2, list, joiner)
- );
-#endif
- Py_DECREF(joiner);
-
- return result;
-}
-
-static PyObject*
pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
{
SRE_STATE state;
PyObject* list;
- int status;
+ Py_ssize_t status;
Py_ssize_t i, b, e;
PyObject* string;
@@ -2099,7 +2065,8 @@ pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
case 0:
b = STATE_OFFSET(&state, state.start);
e = STATE_OFFSET(&state, state.ptr);
- item = PySequence_GetSlice(string, b, e);
+ item = getslice(state.logical_charsize, state.beginning,
+ string, b, e);
if (!item)
goto error;
break;
@@ -2145,7 +2112,6 @@ error:
}
-#if PY_VERSION_HEX >= 0x02020000
static PyObject*
pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
{
@@ -2167,7 +2133,6 @@ pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
return iterator;
}
-#endif
static PyObject*
pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
@@ -2175,7 +2140,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
SRE_STATE state;
PyObject* list;
PyObject* item;
- int status;
+ Py_ssize_t status;
Py_ssize_t n;
Py_ssize_t i;
void* last;
@@ -2231,7 +2196,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
}
/* get segment before this match */
- item = PySequence_GetSlice(
+ item = getslice(state.logical_charsize, state.beginning,
string, STATE_OFFSET(&state, last),
STATE_OFFSET(&state, state.start)
);
@@ -2260,7 +2225,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
}
/* get segment following last match (even if empty) */
- item = PySequence_GetSlice(
+ item = getslice(state.logical_charsize, state.beginning,
string, STATE_OFFSET(&state, last), state.endpos
);
if (!item)
@@ -2286,12 +2251,13 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
{
SRE_STATE state;
PyObject* list;
+ PyObject* joiner;
PyObject* item;
PyObject* filter;
PyObject* args;
PyObject* match;
void* ptr;
- int status;
+ Py_ssize_t status;
Py_ssize_t n;
Py_ssize_t i, b, e;
int logical_charsize, charsize;
@@ -2310,7 +2276,7 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
ptr = getstring(ptemplate, &n, &logical_charsize, &charsize, &view);
b = charsize;
if (ptr) {
- literal = sre_literal_template(b, ptr, n);
+ literal = sre_literal_template(charsize, ptr, n);
} else {
PyErr_Clear();
literal = 0;
@@ -2375,7 +2341,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
if (i < b) {
/* get segment before this match */
- item = PySequence_GetSlice(string, i, b);
+ item = getslice(state.logical_charsize, state.beginning,
+ string, i, b);
if (!item)
goto error;
status = PyList_Append(list, item);
@@ -2430,7 +2397,8 @@ next:
/* get segment following last match */
if (i < state.endpos) {
- item = PySequence_GetSlice(string, i, state.endpos);
+ item = getslice(state.logical_charsize, state.beginning,
+ string, i, state.endpos);
if (!item)
goto error;
status = PyList_Append(list, item);
@@ -2444,10 +2412,25 @@ next:
Py_DECREF(filter);
/* convert list to single string (also removes list) */
- item = join_list(list, string);
-
- if (!item)
+ joiner = getslice(state.logical_charsize, state.beginning, string, 0, 0);
+ if (!joiner) {
+ Py_DECREF(list);
return NULL;
+ }
+ if (PyList_GET_SIZE(list) == 0) {
+ Py_DECREF(list);
+ item = joiner;
+ }
+ else {
+ if (state.logical_charsize == 1)
+ item = _PyBytes_Join(joiner, list);
+ else
+ item = PyUnicode_Join(joiner, list);
+ Py_DECREF(joiner);
+ Py_DECREF(list);
+ if (!item)
+ return NULL;
+ }
if (subn)
return Py_BuildValue("Nn", item, n);
@@ -2590,10 +2573,8 @@ static PyMethodDef pattern_methods[] = {
pattern_split_doc},
{"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
pattern_findall_doc},
-#if PY_VERSION_HEX >= 0x02020000
{"finditer", (PyCFunction) pattern_finditer, METH_VARARGS|METH_KEYWORDS,
pattern_finditer_doc},
-#endif
{"scanner", (PyCFunction) pattern_scanner, METH_VARARGS|METH_KEYWORDS},
{"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
{"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
@@ -2788,7 +2769,7 @@ _compile(PyObject* self_, PyObject* args)
skip = *code; \
VTRACE(("%lu (skip to %p)\n", \
(unsigned long)skip, code+skip)); \
- if (skip-adj > end-code) \
+ if (skip-adj > (Py_uintptr_t)(end - code)) \
FAIL; \
code++; \
} while (0)
@@ -2821,7 +2802,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
case SRE_OP_CHARSET:
offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
- if (offset > end-code)
+ if (offset > (Py_uintptr_t)(end - code))
FAIL;
code += offset;
break;
@@ -2829,7 +2810,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
case SRE_OP_BIGCHARSET:
GET_ARG; /* Number of blocks */
offset = 256/sizeof(SRE_CODE); /* 256-byte table */
- if (offset > end-code)
+ if (offset > (Py_uintptr_t)(end - code))
FAIL;
/* Make sure that each byte points to a valid block */
for (i = 0; i < 256; i++) {
@@ -2838,7 +2819,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
}
code += offset;
offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
- if (offset > end-code)
+ if (offset > (Py_uintptr_t)(end - code))
FAIL;
code += offset;
break;
@@ -2901,7 +2882,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
sre_match() code is robust even if they don't, and the worst
you can get is nonsensical match results. */
GET_ARG;
- if (arg > 2*groups+1) {
+ if (arg > 2 * (size_t)groups + 1) {
VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
FAIL;
}
@@ -2989,11 +2970,11 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
GET_ARG; prefix_len = arg;
GET_ARG;
/* Here comes the prefix string */
- if (prefix_len > newcode-code)
+ if (prefix_len > (Py_uintptr_t)(newcode - code))
FAIL;
code += prefix_len;
/* And here comes the overlap table */
- if (prefix_len > newcode-code)
+ if (prefix_len > (Py_uintptr_t)(newcode - code))
FAIL;
/* Each overlap value should be < prefix_len */
for (i = 0; i < prefix_len; i++) {
@@ -3084,7 +3065,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
case SRE_OP_GROUPREF:
case SRE_OP_GROUPREF_IGNORE:
GET_ARG;
- if (arg >= groups)
+ if (arg >= (size_t)groups)
FAIL;
break;
@@ -3093,7 +3074,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
'group' is either an integer group number or a group name,
'then' and 'else' are sub-regexes, and 'else' is optional. */
GET_ARG;
- if (arg >= groups)
+ if (arg >= (size_t)groups)
FAIL;
GET_SKIP_ADJ(1);
code--; /* The skip is relative to the first arg! */
@@ -3122,7 +3103,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
to allow arbitrary jumps anywhere in the code; so we just look
for a JUMP opcode preceding our skip target.
*/
- if (skip >= 3 && skip-3 < end-code &&
+ if (skip >= 3 && skip-3 < (Py_uintptr_t)(end - code) &&
code[skip-3] == SRE_OP_JUMP)
{
VTRACE(("both then and else parts present\n"));
@@ -3206,6 +3187,12 @@ match_dealloc(MatchObject* self)
static PyObject*
match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
{
+ Py_ssize_t length;
+ int logical_charsize, charsize;
+ Py_buffer view;
+ PyObject *result;
+ void* ptr;
+
if (index < 0 || index >= self->groups) {
/* raise IndexError if we were given a bad group number */
PyErr_SetString(
@@ -3223,9 +3210,14 @@ match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
return def;
}
- return PySequence_GetSlice(
- self->string, self->mark[index], self->mark[index+1]
- );
+ ptr = getstring(self->string, &length, &logical_charsize, &charsize, &view);
+ if (ptr == NULL)
+ return NULL;
+ result = getslice(logical_charsize, ptr,
+ self->string, self->mark[index], self->mark[index+1]);
+ if (logical_charsize == 1 && view.buf != NULL)
+ PyBuffer_Release(&view);
+ return result;
}
static Py_ssize_t
@@ -3645,6 +3637,22 @@ match_regs_get(MatchObject *self)
return match_regs(self);
}
+static PyObject *
+match_repr(MatchObject *self)
+{
+ PyObject *result;
+ PyObject *group0 = match_getslice_by_index(self, 0, Py_None);
+ if (group0 == NULL)
+ return NULL;
+ result = PyUnicode_FromFormat(
+ "<%s object; span=(%d, %d), match=%.50R>",
+ Py_TYPE(self)->tp_name,
+ self->mark[0], self->mark[1], group0);
+ Py_DECREF(group0);
+ return result;
+}
+
+
static PyGetSetDef match_getset[] = {
{"lastindex", (getter)match_lastindex_get, (setter)NULL},
{"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
@@ -3673,7 +3681,7 @@ static PyTypeObject Match_Type = {
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
- 0, /* tp_repr */
+ (reprfunc)match_repr, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
@@ -3776,7 +3784,7 @@ scanner_match(ScannerObject* self, PyObject *unused)
{
SRE_STATE* state = &self->state;
PyObject* match;
- int status;
+ Py_ssize_t status;
state_reset(state);
@@ -3807,7 +3815,7 @@ scanner_search(ScannerObject* self, PyObject *unused)
{
SRE_STATE* state = &self->state;
PyObject* match;
- int status;
+ Py_ssize_t status;
state_reset(state);