summaryrefslogtreecommitdiffstats
path: root/Modules/_sre.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_sre.c')
-rw-r--r--Modules/_sre.c1053
1 files changed, 588 insertions, 465 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index d6fcda1..957ccbc 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -97,48 +97,25 @@ static char copyright[] =
/* -------------------------------------------------------------------- */
/* search engine state */
-/* default character predicates (run sre_chars.py to regenerate tables) */
-
-#define SRE_DIGIT_MASK 1
-#define SRE_SPACE_MASK 2
-#define SRE_LINEBREAK_MASK 4
-#define SRE_ALNUM_MASK 8
-#define SRE_WORD_MASK 16
-
-/* FIXME: this assumes ASCII. create tables in init_sre() instead */
-
-static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
-2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
-25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
-0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
-
-static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
-10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
-27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
-44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
-61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
-108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
-122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
-106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
-120, 121, 122, 123, 124, 125, 126, 127 };
-
#define SRE_IS_DIGIT(ch)\
- ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
+ ((ch) < 128 && Py_ISDIGIT(ch))
#define SRE_IS_SPACE(ch)\
- ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
+ ((ch) < 128 && Py_ISSPACE(ch))
#define SRE_IS_LINEBREAK(ch)\
- ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
+ ((ch) == '\n')
#define SRE_IS_ALNUM(ch)\
- ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
+ ((ch) < 128 && Py_ISALNUM(ch))
#define SRE_IS_WORD(ch)\
- ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
+ ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
static unsigned int sre_lower(unsigned int ch)
{
- return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
+ return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
+}
+
+static unsigned int sre_upper(unsigned int ch)
+{
+ return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
}
/* locale-specific character predicates */
@@ -152,6 +129,11 @@ static unsigned int sre_lower_locale(unsigned int ch)
return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
}
+static unsigned int sre_upper_locale(unsigned int ch)
+{
+ return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch);
+}
+
/* unicode-specific character predicates */
#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch)
@@ -165,6 +147,11 @@ static unsigned int sre_lower_unicode(unsigned int ch)
return (unsigned int) Py_UNICODE_TOLOWER(ch);
}
+static unsigned int sre_upper_unicode(unsigned int ch)
+{
+ return (unsigned int) Py_UNICODE_TOUPPER(ch);
+}
+
LOCAL(int)
sre_category(SRE_CODE category, unsigned int ch)
{
@@ -271,25 +258,50 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size)
/* see sre.h for object declarations */
static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, Py_ssize_t);
-static PyObject*pattern_scanner(PatternObject*, PyObject*, PyObject* kw);
+static PyObject *pattern_scanner(PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t);
-static PyObject *
-sre_codesize(PyObject* self, PyObject *unused)
+
+/*[clinic input]
+module _sre
+class _sre.SRE_Pattern "PatternObject *" "&Pattern_Type"
+class _sre.SRE_Match "MatchObject *" "&Match_Type"
+class _sre.SRE_Scanner "ScannerObject *" "&Scanner_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b0230ec19a0deac8]*/
+
+static PyTypeObject Pattern_Type;
+static PyTypeObject Match_Type;
+static PyTypeObject Scanner_Type;
+
+/*[clinic input]
+_sre.getcodesize -> int
+[clinic start generated code]*/
+
+static int
+_sre_getcodesize_impl(PyModuleDef *module)
+/*[clinic end generated code: output=794f1f98ef4883e5 input=bd6f6ecf4916bb2b]*/
{
- return PyLong_FromSize_t(sizeof(SRE_CODE));
+ return sizeof(SRE_CODE);
}
-static PyObject *
-sre_getlower(PyObject* self, PyObject* args)
+/*[clinic input]
+_sre.getlower -> int
+
+ character: int
+ flags: int
+ /
+
+[clinic start generated code]*/
+
+static int
+_sre_getlower_impl(PyModuleDef *module, int character, int flags)
+/*[clinic end generated code: output=5fc3616ae2a4c306 input=087d2f1c44bbca6f]*/
{
- int character, flags;
- if (!PyArg_ParseTuple(args, "ii", &character, &flags))
- return NULL;
if (flags & SRE_FLAG_LOCALE)
- return Py_BuildValue("i", sre_lower_locale(character));
+ return sre_lower_locale(character);
if (flags & SRE_FLAG_UNICODE)
- return Py_BuildValue("i", sre_lower_unicode(character));
- return Py_BuildValue("i", sre_lower(character));
+ return sre_lower_unicode(character);
+ return sre_lower(character);
}
LOCAL(void)
@@ -328,7 +340,7 @@ getstring(PyObject* string, Py_ssize_t* p_length,
/* get pointer to byte string buffer */
if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
- PyErr_SetString(PyExc_TypeError, "expected string or buffer");
+ PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
return NULL;
}
@@ -357,6 +369,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
memset(state, 0, sizeof(SRE_STATE));
+ state->mark = PyMem_New(void *, pattern->groups * 2);
+ if (!state->mark) {
+ PyErr_NoMemory();
+ goto err;
+ }
state->lastmark = -1;
state->lastindex = -1;
@@ -367,12 +384,12 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
if (isbytes && pattern->isbytes == 0) {
PyErr_SetString(PyExc_TypeError,
- "can't use a string pattern on a bytes-like object");
+ "cannot use a string pattern on a bytes-like object");
goto err;
}
if (!isbytes && pattern->isbytes > 0) {
PyErr_SetString(PyExc_TypeError,
- "can't use a bytes pattern on a string-like object");
+ "cannot use a bytes pattern on a string-like object");
goto err;
}
@@ -400,15 +417,23 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
state->pos = start;
state->endpos = end;
- if (pattern->flags & SRE_FLAG_LOCALE)
+ if (pattern->flags & SRE_FLAG_LOCALE) {
state->lower = sre_lower_locale;
- else if (pattern->flags & SRE_FLAG_UNICODE)
+ state->upper = sre_upper_locale;
+ }
+ else if (pattern->flags & SRE_FLAG_UNICODE) {
state->lower = sre_lower_unicode;
- else
+ state->upper = sre_upper_unicode;
+ }
+ else {
state->lower = sre_lower;
+ state->upper = sre_upper;
+ }
return string;
err:
+ PyMem_Del(state->mark);
+ state->mark = NULL;
if (state->buffer.buf)
PyBuffer_Release(&state->buffer);
return NULL;
@@ -421,6 +446,8 @@ state_fini(SRE_STATE* state)
PyBuffer_Release(&state->buffer);
Py_XDECREF(state->string);
data_stack_dealloc(state);
+ PyMem_Del(state->mark);
+ state->mark = NULL;
}
/* calculate offset from start of string */
@@ -473,8 +500,9 @@ pattern_error(Py_ssize_t status)
{
switch (status) {
case SRE_ERROR_RECURSION_LIMIT:
+ /* This error code seems to be unused. */
PyErr_SetString(
- PyExc_RuntimeError,
+ PyExc_RecursionError,
"maximum recursion limit exceeded"
);
break;
@@ -550,26 +578,32 @@ fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
return string;
}
+/*[clinic input]
+_sre.SRE_Pattern.match
+
+ string: object = NULL
+ pos: Py_ssize_t = 0
+ endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
+ *
+ pattern: object = NULL
+
+Matches zero or more characters at the beginning of the string.
+[clinic start generated code]*/
+
static PyObject *
-pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
+_sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
+ Py_ssize_t pos, Py_ssize_t endpos,
+ PyObject *pattern)
+/*[clinic end generated code: output=74b4b1da3bb2d84e input=3d079aa99979b81d]*/
{
- static char *_keywords[] = {"string", "pos", "endpos", "pattern", NULL};
- PyObject *string = NULL;
- Py_ssize_t pos = 0;
- Py_ssize_t endpos = PY_SSIZE_T_MAX;
- PyObject *pattern = NULL;
SRE_STATE state;
Py_ssize_t status;
+ PyObject *match;
- if (!PyArg_ParseTupleAndKeywords(args, kwargs,
- "|Onn$O:match", _keywords,
- &string, &pos, &endpos, &pattern))
- return NULL;
string = fix_string_param(string, pattern, "pattern");
if (!string)
return NULL;
- string = state_init(&state, (PatternObject *)self, string, pos, endpos);
- if (!string)
+ if (!state_init(&state, (PatternObject *)self, string, pos, endpos))
return NULL;
state.ptr = state.start;
@@ -579,34 +613,43 @@ pattern_match(PatternObject *self, PyObject *args, PyObject *kwargs)
status = sre_match(&state, PatternObject_GetCode(self), 0);
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
- if (PyErr_Occurred())
+ if (PyErr_Occurred()) {
+ state_fini(&state);
return NULL;
+ }
+ match = pattern_new_match(self, &state, status);
state_fini(&state);
-
- return (PyObject *)pattern_new_match(self, &state, status);
+ return match;
}
-static PyObject*
-pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
+/*[clinic input]
+_sre.SRE_Pattern.fullmatch
+
+ string: object = NULL
+ pos: Py_ssize_t = 0
+ endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
+ *
+ pattern: object = NULL
+
+Matches against all of the string
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
+ Py_ssize_t pos, Py_ssize_t endpos,
+ PyObject *pattern)
+/*[clinic end generated code: output=1c98bc5da744ea94 input=d4228606cc12580f]*/
{
SRE_STATE state;
Py_ssize_t status;
+ PyObject *match;
- PyObject *string = NULL, *string2 = NULL;
- Py_ssize_t start = 0;
- Py_ssize_t end = PY_SSIZE_T_MAX;
- static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:fullmatch", kwlist,
- &string, &start, &end, &string2))
- return NULL;
-
- string = fix_string_param(string, string2, "pattern");
+ string = fix_string_param(string, pattern, "pattern");
if (!string)
return NULL;
- string = state_init(&state, self, string, start, end);
- if (!string)
+ if (!state_init(&state, self, string, pos, endpos))
return NULL;
state.ptr = state.start;
@@ -616,34 +659,45 @@ pattern_fullmatch(PatternObject* self, PyObject* args, PyObject* kw)
status = sre_match(&state, PatternObject_GetCode(self), 1);
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
- if (PyErr_Occurred())
+ if (PyErr_Occurred()) {
+ state_fini(&state);
return NULL;
+ }
+ match = pattern_new_match(self, &state, status);
state_fini(&state);
-
- return pattern_new_match(self, &state, status);
+ return match;
}
-static PyObject*
-pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
+/*[clinic input]
+_sre.SRE_Pattern.search
+
+ string: object = NULL
+ pos: Py_ssize_t = 0
+ endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
+ *
+ pattern: object = NULL
+
+Scan through string looking for a match, and return a corresponding match object instance.
+
+Return None if no position in the string matches.
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Pattern_search_impl(PatternObject *self, PyObject *string,
+ Py_ssize_t pos, Py_ssize_t endpos,
+ PyObject *pattern)
+/*[clinic end generated code: output=3839394a18e5ea4f input=dab42720f4be3a4b]*/
{
SRE_STATE state;
Py_ssize_t status;
+ PyObject *match;
- PyObject *string = NULL, *string2 = NULL;
- Py_ssize_t start = 0;
- Py_ssize_t end = PY_SSIZE_T_MAX;
- static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:search", kwlist,
- &string, &start, &end, &string2))
- return NULL;
-
- string = fix_string_param(string, string2, "pattern");
+ string = fix_string_param(string, pattern, "pattern");
if (!string)
return NULL;
- string = state_init(&state, self, string, start, end);
- if (!string)
+ if (!state_init(&state, self, string, pos, endpos))
return NULL;
TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
@@ -652,12 +706,14 @@ pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
- state_fini(&state);
-
- if (PyErr_Occurred())
+ if (PyErr_Occurred()) {
+ state_fini(&state);
return NULL;
+ }
- return pattern_new_match(self, &state, status);
+ match = pattern_new_match(self, &state, status);
+ state_fini(&state);
+ return match;
}
static PyObject*
@@ -707,28 +763,34 @@ deepcopy(PyObject** object, PyObject* memo)
}
#endif
-static PyObject*
-pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
+/*[clinic input]
+_sre.SRE_Pattern.findall
+
+ string: object = NULL
+ pos: Py_ssize_t = 0
+ endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
+ *
+ source: object = NULL
+
+Return a list of all non-overlapping matches of pattern in string.
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
+ Py_ssize_t pos, Py_ssize_t endpos,
+ PyObject *source)
+/*[clinic end generated code: output=51295498b300639d input=df688355c056b9de]*/
{
SRE_STATE state;
PyObject* list;
Py_ssize_t status;
Py_ssize_t i, b, e;
- PyObject *string = NULL, *string2 = NULL;
- Py_ssize_t start = 0;
- Py_ssize_t end = PY_SSIZE_T_MAX;
- static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:findall", kwlist,
- &string, &start, &end, &string2))
- return NULL;
-
- string = fix_string_param(string, string2, "source");
+ string = fix_string_param(string, source, "source");
if (!string)
return NULL;
- string = state_init(&state, self, string, start, end);
- if (!string)
+ if (!state_init(&state, self, string, pos, endpos))
return NULL;
list = PyList_New(0);
@@ -808,14 +870,28 @@ error:
}
-static PyObject*
-pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
+/*[clinic input]
+_sre.SRE_Pattern.finditer
+
+ string: object
+ pos: Py_ssize_t = 0
+ endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
+
+Return an iterator over all non-overlapping matches for the RE pattern in string.
+
+For each match, the iterator returns a match object.
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyObject *string,
+ Py_ssize_t pos, Py_ssize_t endpos)
+/*[clinic end generated code: output=0bbb1a0aeb38bb14 input=612aab69e9fe08e4]*/
{
PyObject* scanner;
PyObject* search;
PyObject* iterator;
- scanner = pattern_scanner(pattern, args, kw);
+ scanner = pattern_scanner(self, string, pos, endpos);
if (!scanner)
return NULL;
@@ -830,8 +906,38 @@ pattern_finditer(PatternObject* pattern, PyObject* args, PyObject* kw)
return iterator;
}
-static PyObject*
-pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
+/*[clinic input]
+_sre.SRE_Pattern.scanner
+
+ string: object
+ pos: Py_ssize_t = 0
+ endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize
+
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyObject *string,
+ Py_ssize_t pos, Py_ssize_t endpos)
+/*[clinic end generated code: output=54ea548aed33890b input=3aacdbde77a3a637]*/
+{
+ return pattern_scanner(self, string, pos, endpos);
+}
+
+/*[clinic input]
+_sre.SRE_Pattern.split
+
+ string: object = NULL
+ maxsplit: Py_ssize_t = 0
+ *
+ source: object = NULL
+
+Split string by the occurrences of pattern.
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
+ Py_ssize_t maxsplit, PyObject *source)
+/*[clinic end generated code: output=20bac2ff55b9f84c input=41e0b2e35e599d7b]*/
{
SRE_STATE state;
PyObject* list;
@@ -841,19 +947,24 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
Py_ssize_t i;
void* last;
- PyObject *string = NULL, *string2 = NULL;
- Py_ssize_t maxsplit = 0;
- static char* kwlist[] = { "string", "maxsplit", "source", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kw, "|On$O:split", kwlist,
- &string, &maxsplit, &string2))
- return NULL;
-
- string = fix_string_param(string, string2, "source");
+ string = fix_string_param(string, source, "source");
if (!string)
return NULL;
- string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
- if (!string)
+ assert(self->codesize != 0);
+ if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
+ if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "split() requires a non-empty pattern match.");
+ return NULL;
+ }
+ if (PyErr_WarnEx(PyExc_FutureWarning,
+ "split() requires a non-empty pattern match.",
+ 1) < 0)
+ return NULL;
+ }
+
+ if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
return NULL;
list = PyList_New(0);
@@ -997,8 +1108,7 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
}
}
- string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
- if (!string) {
+ if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) {
Py_DECREF(filter);
return NULL;
}
@@ -1140,36 +1250,50 @@ error:
}
-static PyObject*
-pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
-{
- PyObject* ptemplate;
- PyObject* string;
- Py_ssize_t count = 0;
- static char* kwlist[] = { "repl", "string", "count", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
- &ptemplate, &string, &count))
- return NULL;
+/*[clinic input]
+_sre.SRE_Pattern.sub
- return pattern_subx(self, ptemplate, string, count, 0);
+ repl: object
+ string: object
+ count: Py_ssize_t = 0
+
+Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Pattern_sub_impl(PatternObject *self, PyObject *repl,
+ PyObject *string, Py_ssize_t count)
+/*[clinic end generated code: output=1dbf2ec3479cba00 input=c53d70be0b3caf86]*/
+{
+ return pattern_subx(self, repl, string, count, 0);
}
-static PyObject*
-pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
-{
- PyObject* ptemplate;
- PyObject* string;
- Py_ssize_t count = 0;
- static char* kwlist[] = { "repl", "string", "count", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
- &ptemplate, &string, &count))
- return NULL;
+/*[clinic input]
+_sre.SRE_Pattern.subn
+
+ repl: object
+ string: object
+ count: Py_ssize_t = 0
- return pattern_subx(self, ptemplate, string, count, 1);
+Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Pattern_subn_impl(PatternObject *self, PyObject *repl,
+ PyObject *string, Py_ssize_t count)
+/*[clinic end generated code: output=0d9522cd529e9728 input=e7342d7ce6083577]*/
+{
+ return pattern_subx(self, repl, string, count, 1);
}
-static PyObject*
-pattern_copy(PatternObject* self, PyObject *unused)
+/*[clinic input]
+_sre.SRE_Pattern.__copy__
+
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Pattern___copy___impl(PatternObject *self)
+/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/
{
#ifdef USE_BUILTIN_COPY
PatternObject* copy;
@@ -1196,8 +1320,16 @@ pattern_copy(PatternObject* self, PyObject *unused)
#endif
}
-static PyObject*
-pattern_deepcopy(PatternObject* self, PyObject* memo)
+/*[clinic input]
+_sre.SRE_Pattern.__deepcopy__
+
+ memo: object
+
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Pattern___deepcopy___impl(PatternObject *self, PyObject *memo)
+/*[clinic end generated code: output=75efe69bd12c5d7d input=3959719482c07f70]*/
{
#ifdef USE_BUILTIN_COPY
PatternObject* copy;
@@ -1238,7 +1370,7 @@ pattern_repr(PatternObject *obj)
};
PyObject *result = NULL;
PyObject *flag_items;
- int i;
+ size_t i;
int flags = obj->flags;
/* Omit re.UNICODE for valid string patterns. */
@@ -1299,131 +1431,40 @@ done:
return result;
}
-PyDoc_STRVAR(pattern_match_doc,
-"match(string[, pos[, endpos]]) -> match object or None.\n\
- Matches zero or more characters at the beginning of the string");
-
-PyDoc_STRVAR(pattern_fullmatch_doc,
-"fullmatch(string[, pos[, endpos]]) -> match object or None.\n\
- Matches against all of the string");
-
-PyDoc_STRVAR(pattern_search_doc,
-"search(string[, pos[, endpos]]) -> match object or None.\n\
- Scan through string looking for a match, and return a corresponding\n\
- match object instance. Return None if no position in the string matches.");
-
-PyDoc_STRVAR(pattern_split_doc,
-"split(string[, maxsplit = 0]) -> list.\n\
- Split string by the occurrences of pattern.");
-
-PyDoc_STRVAR(pattern_findall_doc,
-"findall(string[, pos[, endpos]]) -> list.\n\
- Return a list of all non-overlapping matches of pattern in string.");
-
-PyDoc_STRVAR(pattern_finditer_doc,
-"finditer(string[, pos[, endpos]]) -> iterator.\n\
- Return an iterator over all non-overlapping matches for the \n\
- RE pattern in string. For each match, the iterator returns a\n\
- match object.");
-
-PyDoc_STRVAR(pattern_sub_doc,
-"sub(repl, string[, count = 0]) -> newstring.\n\
- Return the string obtained by replacing the leftmost non-overlapping\n\
- occurrences of pattern in string by the replacement repl.");
-
-PyDoc_STRVAR(pattern_subn_doc,
-"subn(repl, string[, count = 0]) -> (newstring, number of subs)\n\
- Return the tuple (new_string, number_of_subs_made) found by replacing\n\
- the leftmost non-overlapping occurrences of pattern with the\n\
- replacement repl.");
-
PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
-static PyMethodDef pattern_methods[] = {
- {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
- pattern_match_doc},
- {"fullmatch", (PyCFunction) pattern_fullmatch, METH_VARARGS|METH_KEYWORDS,
- pattern_fullmatch_doc},
- {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
- pattern_search_doc},
- {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
- pattern_sub_doc},
- {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
- pattern_subn_doc},
- {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
- pattern_split_doc},
- {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
- pattern_findall_doc},
- {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS|METH_KEYWORDS,
- pattern_finditer_doc},
- {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS|METH_KEYWORDS},
- {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
- {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
- {NULL, NULL}
-};
+/* PatternObject's 'groupindex' method. */
+static PyObject *
+pattern_groupindex(PatternObject *self)
+{
+ return PyDictProxy_New(self->groupindex);
+}
-#define PAT_OFF(x) offsetof(PatternObject, x)
-static PyMemberDef pattern_members[] = {
- {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
- {"flags", T_INT, PAT_OFF(flags), READONLY},
- {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
- {"groupindex", T_OBJECT, PAT_OFF(groupindex), READONLY},
- {NULL} /* Sentinel */
-};
+static int _validate(PatternObject *self); /* Forward */
-static PyTypeObject Pattern_Type = {
- PyVarObject_HEAD_INIT(NULL, 0)
- "_" SRE_MODULE ".SRE_Pattern",
- sizeof(PatternObject), sizeof(SRE_CODE),
- (destructor)pattern_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_reserved */
- (reprfunc)pattern_repr, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- 0, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /* tp_flags */
- pattern_doc, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iternext */
- pattern_methods, /* tp_methods */
- pattern_members, /* tp_members */
-};
+/*[clinic input]
+_sre.compile
-static int _validate(PatternObject *self); /* Forward */
+ pattern: object
+ flags: int
+ code: object(subclass_of='&PyList_Type')
+ groups: Py_ssize_t
+ groupindex: object
+ indexgroup: object
+
+[clinic start generated code]*/
static PyObject *
-_compile(PyObject* self_, PyObject* args)
+_sre_compile_impl(PyModuleDef *module, PyObject *pattern, int flags,
+ PyObject *code, Py_ssize_t groups, PyObject *groupindex,
+ PyObject *indexgroup)
+/*[clinic end generated code: output=3004b293730bf309 input=7d059ec8ae1edb85]*/
{
/* "compile" pattern descriptor to pattern object */
PatternObject* self;
Py_ssize_t i, n;
- PyObject* pattern;
- int flags = 0;
- PyObject* code;
- Py_ssize_t groups = 0;
- PyObject* groupindex = NULL;
- PyObject* indexgroup = NULL;
-
- if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
- &PyList_Type, &code, &groups,
- &groupindex, &indexgroup))
- return NULL;
-
n = PyList_GET_SIZE(code);
/* coverity[ampersand_in_size] */
self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
@@ -1579,6 +1620,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
break;
case SRE_OP_RANGE:
+ case SRE_OP_RANGE_IGNORE:
GET_ARG;
GET_ARG;
break;
@@ -1935,10 +1977,9 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
static int
_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
{
- if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
+ if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
+ code >= end || end[-1] != SRE_OP_SUCCESS)
FAIL;
- if (groups == 0) /* fix for simplejson */
- groups = 100; /* 100 groups should always be safe */
return _validate_inner(code, end-1, groups);
}
@@ -2036,13 +2077,22 @@ match_getslice(MatchObject* self, PyObject* index, PyObject* def)
return match_getslice_by_index(self, match_getindex(self, index), def);
}
-static PyObject*
-match_expand(MatchObject* self, PyObject* ptemplate)
+/*[clinic input]
+_sre.SRE_Match.expand
+
+ template: object
+
+Return the string obtained by doing backslash substitution on the string template, as done by the sub() method.
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template)
+/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/
{
/* delegate to Python code */
return call(
SRE_PY_MODULE, "_expand",
- PyTuple_Pack(3, self->pattern, self, ptemplate)
+ PyTuple_Pack(3, self->pattern, self, template)
);
}
@@ -2081,24 +2131,29 @@ match_group(MatchObject* self, PyObject* args)
return result;
}
-static PyObject*
-match_groups(MatchObject* self, PyObject* args, PyObject* kw)
+/*[clinic input]
+_sre.SRE_Match.groups
+
+ default: object = None
+ Is used for groups that did not participate in the match.
+
+Return a tuple containing all the subgroups of the match, from 1.
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value)
+/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/
{
PyObject* result;
Py_ssize_t index;
- PyObject* def = Py_None;
- static char* kwlist[] = { "default", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
- return NULL;
-
result = PyTuple_New(self->groups-1);
if (!result)
return NULL;
for (index = 1; index < self->groups; index++) {
PyObject* item;
- item = match_getslice_by_index(self, index, def);
+ item = match_getslice_by_index(self, index, default_value);
if (!item) {
Py_DECREF(result);
return NULL;
@@ -2109,18 +2164,23 @@ match_groups(MatchObject* self, PyObject* args, PyObject* kw)
return result;
}
-static PyObject*
-match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
+/*[clinic input]
+_sre.SRE_Match.groupdict
+
+ default: object = None
+ Is used for groups that did not participate in the match.
+
+Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value)
+/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/
{
PyObject* result;
PyObject* keys;
Py_ssize_t index;
- PyObject* def = Py_None;
- static char* kwlist[] = { "default", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
- return NULL;
-
result = PyDict_New();
if (!result || !self->pattern->groupindex)
return result;
@@ -2136,7 +2196,7 @@ match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
key = PyList_GET_ITEM(keys, index);
if (!key)
goto failed;
- value = match_getslice(self, key, def);
+ value = match_getslice(self, key, default_value);
if (!value) {
Py_DECREF(key);
goto failed;
@@ -2157,50 +2217,58 @@ failed:
return NULL;
}
-static PyObject*
-match_start(MatchObject* self, PyObject* args)
-{
- Py_ssize_t index;
+/*[clinic input]
+_sre.SRE_Match.start -> Py_ssize_t
- PyObject* index_ = NULL;
- if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
- return NULL;
+ group: object(c_default="NULL") = 0
+ /
- index = match_getindex(self, index_);
+Return index of the start of the substring matched by group.
+[clinic start generated code]*/
+
+static Py_ssize_t
+_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group)
+/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/
+{
+ Py_ssize_t index = match_getindex(self, group);
if (index < 0 || index >= self->groups) {
PyErr_SetString(
PyExc_IndexError,
"no such group"
);
- return NULL;
+ return -1;
}
/* mark is -1 if group is undefined */
- return PyLong_FromSsize_t(self->mark[index*2]);
+ return self->mark[index*2];
}
-static PyObject*
-match_end(MatchObject* self, PyObject* args)
-{
- Py_ssize_t index;
+/*[clinic input]
+_sre.SRE_Match.end -> Py_ssize_t
- PyObject* index_ = NULL;
- if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
- return NULL;
+ group: object(c_default="NULL") = 0
+ /
+
+Return index of the end of the substring matched by group.
+[clinic start generated code]*/
- index = match_getindex(self, index_);
+static Py_ssize_t
+_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group)
+/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/
+{
+ Py_ssize_t index = match_getindex(self, group);
if (index < 0 || index >= self->groups) {
PyErr_SetString(
PyExc_IndexError,
"no such group"
);
- return NULL;
+ return -1;
}
/* mark is -1 if group is undefined */
- return PyLong_FromSsize_t(self->mark[index*2+1]);
+ return self->mark[index*2+1];
}
LOCAL(PyObject*)
@@ -2230,16 +2298,20 @@ _pair(Py_ssize_t i1, Py_ssize_t i2)
return NULL;
}
-static PyObject*
-match_span(MatchObject* self, PyObject* args)
-{
- Py_ssize_t index;
+/*[clinic input]
+_sre.SRE_Match.span
- PyObject* index_ = NULL;
- if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
- return NULL;
+ group: object(c_default="NULL") = 0
+ /
- index = match_getindex(self, index_);
+For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group)
+/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/
+{
+ Py_ssize_t index = match_getindex(self, group);
if (index < 0 || index >= self->groups) {
PyErr_SetString(
@@ -2279,8 +2351,14 @@ match_regs(MatchObject* self)
return regs;
}
-static PyObject*
-match_copy(MatchObject* self, PyObject *unused)
+/*[clinic input]
+_sre.SRE_Match.__copy__
+
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Match___copy___impl(MatchObject *self)
+/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/
{
#ifdef USE_BUILTIN_COPY
MatchObject* copy;
@@ -2310,8 +2388,16 @@ match_copy(MatchObject* self, PyObject *unused)
#endif
}
-static PyObject*
-match_deepcopy(MatchObject* self, PyObject* memo)
+/*[clinic input]
+_sre.SRE_Match.__deepcopy__
+
+ memo: object
+
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo)
+/*[clinic end generated code: output=2b657578eb03f4a3 input=b65b72489eac64cc]*/
{
#ifdef USE_BUILTIN_COPY
MatchObject* copy;
@@ -2342,50 +2428,6 @@ PyDoc_STRVAR(match_group_doc,
Return subgroup(s) of the match by indices or names.\n\
For 0 returns the entire match.");
-PyDoc_STRVAR(match_start_doc,
-"start([group=0]) -> int.\n\
- Return index of the start of the substring matched by group.");
-
-PyDoc_STRVAR(match_end_doc,
-"end([group=0]) -> int.\n\
- Return index of the end of the substring matched by group.");
-
-PyDoc_STRVAR(match_span_doc,
-"span([group]) -> tuple.\n\
- For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
-
-PyDoc_STRVAR(match_groups_doc,
-"groups([default=None]) -> tuple.\n\
- Return a tuple containing all the subgroups of the match, from 1.\n\
- The default argument is used for groups\n\
- that did not participate in the match");
-
-PyDoc_STRVAR(match_groupdict_doc,
-"groupdict([default=None]) -> dict.\n\
- Return a dictionary containing all the named subgroups of the match,\n\
- keyed by the subgroup name. The default argument is used for groups\n\
- that did not participate in the match");
-
-PyDoc_STRVAR(match_expand_doc,
-"expand(template) -> str.\n\
- Return the string obtained by doing backslash substitution\n\
- on the string template, as done by the sub() method.");
-
-static PyMethodDef match_methods[] = {
- {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
- {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
- {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
- {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
- {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
- match_groups_doc},
- {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
- match_groupdict_doc},
- {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
- {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
- {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
- {NULL, NULL}
-};
-
static PyObject *
match_lastindex_get(MatchObject *self)
{
@@ -2436,57 +2478,6 @@ match_repr(MatchObject *self)
}
-static PyGetSetDef match_getset[] = {
- {"lastindex", (getter)match_lastindex_get, (setter)NULL},
- {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
- {"regs", (getter)match_regs_get, (setter)NULL},
- {NULL}
-};
-
-#define MATCH_OFF(x) offsetof(MatchObject, x)
-static PyMemberDef match_members[] = {
- {"string", T_OBJECT, MATCH_OFF(string), READONLY},
- {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
- {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
- {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
- {NULL}
-};
-
-/* FIXME: implement setattr("string", None) as a special case (to
- detach the associated string, if any */
-
-static PyTypeObject Match_Type = {
- PyVarObject_HEAD_INIT(NULL,0)
- "_" SRE_MODULE ".SRE_Match",
- sizeof(MatchObject), sizeof(Py_ssize_t),
- (destructor)match_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_reserved */
- (reprfunc)match_repr, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- 0, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /* tp_flags */
- match_doc, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iternext */
- match_methods, /* tp_methods */
- match_members, /* tp_members */
- match_getset, /* tp_getset */
-};
-
static PyObject*
pattern_new_match(PatternObject* pattern, SRE_STATE* state, Py_ssize_t status)
{
@@ -2562,8 +2553,14 @@ scanner_dealloc(ScannerObject* self)
PyObject_DEL(self);
}
-static PyObject*
-scanner_match(ScannerObject* self, PyObject *unused)
+/*[clinic input]
+_sre.SRE_Scanner.match
+
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Scanner_match_impl(ScannerObject *self)
+/*[clinic end generated code: output=936b30c63d4b81eb input=881a0154f8c13d9a]*/
{
SRE_STATE* state = &self->state;
PyObject* match;
@@ -2596,8 +2593,14 @@ scanner_match(ScannerObject* self, PyObject *unused)
}
-static PyObject*
-scanner_search(ScannerObject* self, PyObject *unused)
+/*[clinic input]
+_sre.SRE_Scanner.search
+
+[clinic start generated code]*/
+
+static PyObject *
+_sre_SRE_Scanner_search_impl(ScannerObject *self)
+/*[clinic end generated code: output=7dc211986088f025 input=161223ee92ef9270]*/
{
SRE_STATE* state = &self->state;
PyObject* match;
@@ -2629,9 +2632,160 @@ scanner_search(ScannerObject* self, PyObject *unused)
return match;
}
+static PyObject *
+pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos)
+{
+ ScannerObject* scanner;
+
+ /* create scanner object */
+ scanner = PyObject_NEW(ScannerObject, &Scanner_Type);
+ if (!scanner)
+ return NULL;
+ scanner->pattern = NULL;
+
+ /* create search state object */
+ if (!state_init(&scanner->state, self, string, pos, endpos)) {
+ Py_DECREF(scanner);
+ return NULL;
+ }
+
+ Py_INCREF(self);
+ scanner->pattern = (PyObject*) self;
+
+ return (PyObject*) scanner;
+}
+
+#include "clinic/_sre.c.h"
+
+static PyMethodDef pattern_methods[] = {
+ _SRE_SRE_PATTERN_MATCH_METHODDEF
+ _SRE_SRE_PATTERN_FULLMATCH_METHODDEF
+ _SRE_SRE_PATTERN_SEARCH_METHODDEF
+ _SRE_SRE_PATTERN_SUB_METHODDEF
+ _SRE_SRE_PATTERN_SUBN_METHODDEF
+ _SRE_SRE_PATTERN_FINDALL_METHODDEF
+ _SRE_SRE_PATTERN_SPLIT_METHODDEF
+ _SRE_SRE_PATTERN_FINDITER_METHODDEF
+ _SRE_SRE_PATTERN_SCANNER_METHODDEF
+ _SRE_SRE_PATTERN___COPY___METHODDEF
+ _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF
+ {NULL, NULL}
+};
+
+static PyGetSetDef pattern_getset[] = {
+ {"groupindex", (getter)pattern_groupindex, (setter)NULL,
+ "A dictionary mapping group names to group numbers."},
+ {NULL} /* Sentinel */
+};
+
+#define PAT_OFF(x) offsetof(PatternObject, x)
+static PyMemberDef pattern_members[] = {
+ {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY},
+ {"flags", T_INT, PAT_OFF(flags), READONLY},
+ {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY},
+ {NULL} /* Sentinel */
+};
+
+static PyTypeObject Pattern_Type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "_" SRE_MODULE ".SRE_Pattern",
+ sizeof(PatternObject), sizeof(SRE_CODE),
+ (destructor)pattern_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_reserved */
+ (reprfunc)pattern_repr, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ pattern_doc, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ pattern_methods, /* tp_methods */
+ pattern_members, /* tp_members */
+ pattern_getset, /* tp_getset */
+};
+
+
+static PyMethodDef match_methods[] = {
+ {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
+ _SRE_SRE_MATCH_START_METHODDEF
+ _SRE_SRE_MATCH_END_METHODDEF
+ _SRE_SRE_MATCH_SPAN_METHODDEF
+ _SRE_SRE_MATCH_GROUPS_METHODDEF
+ _SRE_SRE_MATCH_GROUPDICT_METHODDEF
+ _SRE_SRE_MATCH_EXPAND_METHODDEF
+ _SRE_SRE_MATCH___COPY___METHODDEF
+ _SRE_SRE_MATCH___DEEPCOPY___METHODDEF
+ {NULL, NULL}
+};
+
+static PyGetSetDef match_getset[] = {
+ {"lastindex", (getter)match_lastindex_get, (setter)NULL},
+ {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
+ {"regs", (getter)match_regs_get, (setter)NULL},
+ {NULL}
+};
+
+#define MATCH_OFF(x) offsetof(MatchObject, x)
+static PyMemberDef match_members[] = {
+ {"string", T_OBJECT, MATCH_OFF(string), READONLY},
+ {"re", T_OBJECT, MATCH_OFF(pattern), READONLY},
+ {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY},
+ {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY},
+ {NULL}
+};
+
+/* FIXME: implement setattr("string", None) as a special case (to
+ detach the associated string, if any */
+
+static PyTypeObject Match_Type = {
+ PyVarObject_HEAD_INIT(NULL,0)
+ "_" SRE_MODULE ".SRE_Match",
+ sizeof(MatchObject), sizeof(Py_ssize_t),
+ (destructor)match_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_reserved */
+ (reprfunc)match_repr, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ match_doc, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ match_methods, /* tp_methods */
+ match_members, /* tp_members */
+ match_getset, /* tp_getset */
+};
+
static PyMethodDef scanner_methods[] = {
- {"match", (PyCFunction) scanner_match, METH_NOARGS},
- {"search", (PyCFunction) scanner_search, METH_NOARGS},
+ _SRE_SRE_SCANNER_MATCH_METHODDEF
+ _SRE_SRE_SCANNER_SEARCH_METHODDEF
{NULL, NULL}
};
@@ -2673,47 +2827,10 @@ static PyTypeObject Scanner_Type = {
0, /* tp_getset */
};
-static PyObject*
-pattern_scanner(PatternObject* pattern, PyObject* args, PyObject* kw)
-{
- /* create search state object */
-
- ScannerObject* self;
-
- PyObject *string = NULL, *string2 = NULL;
- Py_ssize_t start = 0;
- Py_ssize_t end = PY_SSIZE_T_MAX;
- static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kw, "|Onn$O:scanner", kwlist,
- &string, &start, &end, &string2))
- return NULL;
-
- string = fix_string_param(string, string2, "source");
- if (!string)
- return NULL;
-
- /* create scanner object */
- self = PyObject_NEW(ScannerObject, &Scanner_Type);
- if (!self)
- return NULL;
- self->pattern = NULL;
-
- string = state_init(&self->state, pattern, string, start, end);
- if (!string) {
- Py_DECREF(self);
- return NULL;
- }
-
- Py_INCREF(pattern);
- self->pattern = (PyObject*) pattern;
-
- return (PyObject*) self;
-}
-
static PyMethodDef _functions[] = {
- {"compile", _compile, METH_VARARGS},
- {"getcodesize", sre_codesize, METH_NOARGS},
- {"getlower", sre_getlower, METH_VARARGS},
+ _SRE_COMPILE_METHODDEF
+ _SRE_GETCODESIZE_METHODDEF
+ _SRE_GETLOWER_METHODDEF
{NULL, NULL}
};
@@ -2763,6 +2880,12 @@ PyMODINIT_FUNC PyInit__sre(void)
Py_DECREF(x);
}
+ x = PyLong_FromUnsignedLong(SRE_MAXGROUPS);
+ if (x) {
+ PyDict_SetItemString(d, "MAXGROUPS", x);
+ Py_DECREF(x);
+ }
+
x = PyUnicode_FromString(copyright);
if (x) {
PyDict_SetItemString(d, "copyright", x);