From 1578f06c1c69fbbb942b90bfbacd512784b599fa Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 4 Apr 2022 10:53:26 +0300 Subject: bpo-47152: Move sources of the _sre module into a subdirectory (GH-32290) --- Lib/re/_constants.py | 2 +- Makefile.pre.in | 2 +- Modules/Setup.bootstrap.in | 2 +- Modules/_sre.c | 3063 ----------------------------------- Modules/_sre/clinic/sre.c.h | 926 +++++++++++ Modules/_sre/sre.c | 3063 +++++++++++++++++++++++++++++++++++ Modules/_sre/sre.h | 99 ++ Modules/_sre/sre_constants.h | 100 ++ Modules/_sre/sre_lib.h | 1759 ++++++++++++++++++++ Modules/clinic/_sre.c.h | 926 ----------- Modules/sre.h | 99 -- Modules/sre_constants.h | 100 -- Modules/sre_lib.h | 1759 -------------------- PCbuild/pythoncore.vcxproj | 8 +- PCbuild/pythoncore.vcxproj.filters | 20 +- Tools/c-analyzer/cpython/_parser.py | 6 +- configure | 1 + configure.ac | 1 + 18 files changed, 5969 insertions(+), 5967 deletions(-) delete mode 100644 Modules/_sre.c create mode 100644 Modules/_sre/clinic/sre.c.h create mode 100644 Modules/_sre/sre.c create mode 100644 Modules/_sre/sre.h create mode 100644 Modules/_sre/sre_constants.h create mode 100644 Modules/_sre/sre_lib.h delete mode 100644 Modules/clinic/_sre.c.h delete mode 100644 Modules/sre.h delete mode 100644 Modules/sre_constants.h delete mode 100644 Modules/sre_lib.h diff --git a/Lib/re/_constants.py b/Lib/re/_constants.py index 5317fd5..327ba54 100644 --- a/Lib/re/_constants.py +++ b/Lib/re/_constants.py @@ -234,7 +234,7 @@ if __name__ == "__main__": * * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. * - * See the _sre.c file for information on usage and redistribution. + * See the sre.c file for information on usage and redistribution. */ """) diff --git a/Makefile.pre.in b/Makefile.pre.in index f94ba93..c1e58f7 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1226,7 +1226,7 @@ Programs/python.o: $(srcdir)/Programs/python.c Programs/_testembed.o: $(srcdir)/Programs/_testembed.c Programs/test_frozenmain.h $(MAINCC) -c $(PY_CORE_CFLAGS) -o $@ $(srcdir)/Programs/_testembed.c -Modules/_sre.o: $(srcdir)/Modules/_sre.c $(srcdir)/Modules/sre.h $(srcdir)/Modules/sre_constants.h $(srcdir)/Modules/sre_lib.h +Modules/_sre/sre.o: $(srcdir)/Modules/_sre/sre.c $(srcdir)/Modules/_sre/sre.h $(srcdir)/Modules/_sre/sre_constants.h $(srcdir)/Modules/_sre/sre_lib.h Modules/posixmodule.o: $(srcdir)/Modules/posixmodule.c $(srcdir)/Modules/posixmodule.h diff --git a/Modules/Setup.bootstrap.in b/Modules/Setup.bootstrap.in index ec72497..e3e9b96 100644 --- a/Modules/Setup.bootstrap.in +++ b/Modules/Setup.bootstrap.in @@ -18,7 +18,7 @@ _collections _collectionsmodule.c errno errnomodule.c _io _io/_iomodule.c _io/iobase.c _io/fileio.c _io/bytesio.c _io/bufferedio.c _io/textio.c _io/stringio.c itertools itertoolsmodule.c -_sre _sre.c +_sre _sre/sre.c _thread _threadmodule.c time timemodule.c _weakref _weakref.c diff --git a/Modules/_sre.c b/Modules/_sre.c deleted file mode 100644 index 506363d..0000000 --- a/Modules/_sre.c +++ /dev/null @@ -1,3063 +0,0 @@ -/* - * Secret Labs' Regular Expression Engine - * - * regular expression matching engine - * - * partial history: - * 1999-10-24 fl created (based on existing template matcher code) - * 2000-03-06 fl first alpha, sort of - * 2000-08-01 fl fixes for 1.6b1 - * 2000-08-07 fl use PyOS_CheckStack() if available - * 2000-09-20 fl added expand method - * 2001-03-20 fl lots of fixes for 2.1b2 - * 2001-04-15 fl export copyright as Python attribute, not global - * 2001-04-28 fl added __copy__ methods (work in progress) - * 2001-05-14 fl fixes for 1.5.2 compatibility - * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) - * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) - * 2001-10-20 fl added split primitive; re-enable unicode for 1.6/2.0/2.1 - * 2001-10-21 fl added sub/subn primitive - * 2001-10-24 fl added finditer primitive (for 2.2 only) - * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) - * 2002-11-09 fl fixed empty sub/subn return type - * 2003-04-18 mvl fully support 4-byte codes - * 2003-10-17 gn implemented non recursive scheme - * 2013-02-04 mrab added fullmatch primitive - * - * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. - * - * This version of the SRE library can be redistributed under CNRI's - * Python 1.6 license. For any other use, please contact Secret Labs - * AB (info@pythonware.com). - * - * Portions of this engine have been developed in cooperation with - * CNRI. Hewlett-Packard provided funding for 1.6 integration and - * other compatibility work. - */ - -static const char copyright[] = - " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB "; - -#define PY_SSIZE_T_CLEAN - -#include "Python.h" -#include "pycore_long.h" // _PyLong_GetZero() -#include "pycore_moduleobject.h" // _PyModule_GetState() -#include "structmember.h" // PyMemberDef - -#include "sre.h" - -#define SRE_CODE_BITS (8 * sizeof(SRE_CODE)) - -#include - -/* name of this module, minus the leading underscore */ -#if !defined(SRE_MODULE) -#define SRE_MODULE "sre" -#endif - -#define SRE_PY_MODULE "re" - -/* defining this one enables tracing */ -#undef VERBOSE - -/* -------------------------------------------------------------------- */ - -#if defined(_MSC_VER) -#pragma optimize("agtw", on) /* doesn't seem to make much difference... */ -#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */ -/* fastest possible local call under MSVC */ -#define LOCAL(type) static __inline type __fastcall -#else -#define LOCAL(type) static inline type -#endif - -/* error codes */ -#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */ -#define SRE_ERROR_STATE -2 /* illegal state */ -#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */ -#define SRE_ERROR_MEMORY -9 /* out of memory */ -#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */ - -#if defined(VERBOSE) -#define TRACE(v) printf v -#else -#define TRACE(v) -#endif - -/* -------------------------------------------------------------------- */ -/* search engine state */ - -#define SRE_IS_DIGIT(ch)\ - ((ch) <= '9' && Py_ISDIGIT(ch)) -#define SRE_IS_SPACE(ch)\ - ((ch) <= ' ' && Py_ISSPACE(ch)) -#define SRE_IS_LINEBREAK(ch)\ - ((ch) == '\n') -#define SRE_IS_WORD(ch)\ - ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_')) - -static unsigned int sre_lower_ascii(unsigned int ch) -{ - return ((ch) < 128 ? Py_TOLOWER(ch) : ch); -} - -/* locale-specific character predicates */ -/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids - * warnings when c's type supports only numbers < N+1 */ -#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0) -#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_') - -static unsigned int sre_lower_locale(unsigned int ch) -{ - return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch); -} - -static unsigned int sre_upper_locale(unsigned int ch) -{ - return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch); -} - -/* unicode-specific character predicates */ - -#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch) -#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch) -#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch) -#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch) -#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_') - -static unsigned int sre_lower_unicode(unsigned int ch) -{ - return (unsigned int) Py_UNICODE_TOLOWER(ch); -} - -static unsigned int sre_upper_unicode(unsigned int ch) -{ - return (unsigned int) Py_UNICODE_TOUPPER(ch); -} - -LOCAL(int) -sre_category(SRE_CODE category, unsigned int ch) -{ - switch (category) { - - case SRE_CATEGORY_DIGIT: - return SRE_IS_DIGIT(ch); - case SRE_CATEGORY_NOT_DIGIT: - return !SRE_IS_DIGIT(ch); - case SRE_CATEGORY_SPACE: - return SRE_IS_SPACE(ch); - case SRE_CATEGORY_NOT_SPACE: - return !SRE_IS_SPACE(ch); - case SRE_CATEGORY_WORD: - return SRE_IS_WORD(ch); - case SRE_CATEGORY_NOT_WORD: - return !SRE_IS_WORD(ch); - case SRE_CATEGORY_LINEBREAK: - return SRE_IS_LINEBREAK(ch); - case SRE_CATEGORY_NOT_LINEBREAK: - return !SRE_IS_LINEBREAK(ch); - - case SRE_CATEGORY_LOC_WORD: - return SRE_LOC_IS_WORD(ch); - case SRE_CATEGORY_LOC_NOT_WORD: - return !SRE_LOC_IS_WORD(ch); - - case SRE_CATEGORY_UNI_DIGIT: - return SRE_UNI_IS_DIGIT(ch); - case SRE_CATEGORY_UNI_NOT_DIGIT: - return !SRE_UNI_IS_DIGIT(ch); - case SRE_CATEGORY_UNI_SPACE: - return SRE_UNI_IS_SPACE(ch); - case SRE_CATEGORY_UNI_NOT_SPACE: - return !SRE_UNI_IS_SPACE(ch); - case SRE_CATEGORY_UNI_WORD: - return SRE_UNI_IS_WORD(ch); - case SRE_CATEGORY_UNI_NOT_WORD: - return !SRE_UNI_IS_WORD(ch); - case SRE_CATEGORY_UNI_LINEBREAK: - return SRE_UNI_IS_LINEBREAK(ch); - case SRE_CATEGORY_UNI_NOT_LINEBREAK: - return !SRE_UNI_IS_LINEBREAK(ch); - } - return 0; -} - -LOCAL(int) -char_loc_ignore(SRE_CODE pattern, SRE_CODE ch) -{ - return ch == pattern - || (SRE_CODE) sre_lower_locale(ch) == pattern - || (SRE_CODE) sre_upper_locale(ch) == pattern; -} - - -/* helpers */ - -static void -data_stack_dealloc(SRE_STATE* state) -{ - if (state->data_stack) { - PyMem_Free(state->data_stack); - state->data_stack = NULL; - } - state->data_stack_size = state->data_stack_base = 0; -} - -static int -data_stack_grow(SRE_STATE* state, Py_ssize_t size) -{ - Py_ssize_t minsize, cursize; - minsize = state->data_stack_base+size; - cursize = state->data_stack_size; - if (cursize < minsize) { - void* stack; - cursize = minsize+minsize/4+1024; - TRACE(("allocate/grow stack %zd\n", cursize)); - stack = PyMem_Realloc(state->data_stack, cursize); - if (!stack) { - data_stack_dealloc(state); - return SRE_ERROR_MEMORY; - } - state->data_stack = (char *)stack; - state->data_stack_size = cursize; - } - return 0; -} - -/* generate 8-bit version */ - -#define SRE_CHAR Py_UCS1 -#define SIZEOF_SRE_CHAR 1 -#define SRE(F) sre_ucs1_##F -#include "sre_lib.h" - -/* generate 16-bit unicode version */ - -#define SRE_CHAR Py_UCS2 -#define SIZEOF_SRE_CHAR 2 -#define SRE(F) sre_ucs2_##F -#include "sre_lib.h" - -/* generate 32-bit unicode version */ - -#define SRE_CHAR Py_UCS4 -#define SIZEOF_SRE_CHAR 4 -#define SRE(F) sre_ucs4_##F -#include "sre_lib.h" - -/* -------------------------------------------------------------------- */ -/* factories and destructors */ - -/* module state */ -typedef struct { - PyTypeObject *Pattern_Type; - PyTypeObject *Match_Type; - PyTypeObject *Scanner_Type; -} _sremodulestate; - -static _sremodulestate * -get_sre_module_state(PyObject *m) -{ - _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m); - assert(state); - return state; -} - -static struct PyModuleDef sremodule; -#define get_sre_module_state_by_class(cls) \ - (get_sre_module_state(PyType_GetModule(cls))) - -/* see sre.h for object declarations */ -static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t); -static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t); - -/*[clinic input] -module _sre -class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type" -class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type" -class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/ - -/*[clinic input] -_sre.getcodesize -> int -[clinic start generated code]*/ - -static int -_sre_getcodesize_impl(PyObject *module) -/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/ -{ - return sizeof(SRE_CODE); -} - -/*[clinic input] -_sre.ascii_iscased -> bool - - character: int - / - -[clinic start generated code]*/ - -static int -_sre_ascii_iscased_impl(PyObject *module, int character) -/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/ -{ - unsigned int ch = (unsigned int)character; - return ch < 128 && Py_ISALPHA(ch); -} - -/*[clinic input] -_sre.unicode_iscased -> bool - - character: int - / - -[clinic start generated code]*/ - -static int -_sre_unicode_iscased_impl(PyObject *module, int character) -/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/ -{ - unsigned int ch = (unsigned int)character; - return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch); -} - -/*[clinic input] -_sre.ascii_tolower -> int - - character: int - / - -[clinic start generated code]*/ - -static int -_sre_ascii_tolower_impl(PyObject *module, int character) -/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/ -{ - return sre_lower_ascii(character); -} - -/*[clinic input] -_sre.unicode_tolower -> int - - character: int - / - -[clinic start generated code]*/ - -static int -_sre_unicode_tolower_impl(PyObject *module, int character) -/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/ -{ - return sre_lower_unicode(character); -} - -LOCAL(void) -state_reset(SRE_STATE* state) -{ - /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */ - /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/ - - state->lastmark = -1; - state->lastindex = -1; - - state->repeat = NULL; - - data_stack_dealloc(state); -} - -static const void* -getstring(PyObject* string, Py_ssize_t* p_length, - int* p_isbytes, int* p_charsize, - Py_buffer *view) -{ - /* given a python object, return a data pointer, a length (in - characters), and a character size. return NULL if the object - is not a string (or not compatible) */ - - /* Unicode objects do not support the buffer API. So, get the data - directly instead. */ - if (PyUnicode_Check(string)) { - if (PyUnicode_READY(string) == -1) - return NULL; - *p_length = PyUnicode_GET_LENGTH(string); - *p_charsize = PyUnicode_KIND(string); - *p_isbytes = 0; - return PyUnicode_DATA(string); - } - - /* get pointer to byte string buffer */ - if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) { - PyErr_Format(PyExc_TypeError, "expected string or bytes-like " - "object, got '%.200s'", Py_TYPE(string)->tp_name); - return NULL; - } - - *p_length = view->len; - *p_charsize = 1; - *p_isbytes = 1; - - if (view->buf == NULL) { - PyErr_SetString(PyExc_ValueError, "Buffer is NULL"); - PyBuffer_Release(view); - view->buf = NULL; - return NULL; - } - return view->buf; -} - -LOCAL(PyObject*) -state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, - Py_ssize_t start, Py_ssize_t end) -{ - /* prepare state object */ - - Py_ssize_t length; - int isbytes, charsize; - const void* ptr; - - memset(state, 0, sizeof(SRE_STATE)); - - state->mark = PyMem_New(const void *, pattern->groups * 2); - if (!state->mark) { - PyErr_NoMemory(); - goto err; - } - state->lastmark = -1; - state->lastindex = -1; - - state->repeats_array = PyMem_New(SRE_REPEAT, pattern->repeat_count); - if (!state->repeats_array) { - PyErr_NoMemory(); - goto err; - } - - state->buffer.buf = NULL; - ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer); - if (!ptr) - goto err; - - if (isbytes && pattern->isbytes == 0) { - PyErr_SetString(PyExc_TypeError, - "cannot use a string pattern on a bytes-like object"); - goto err; - } - if (!isbytes && pattern->isbytes > 0) { - PyErr_SetString(PyExc_TypeError, - "cannot use a bytes pattern on a string-like object"); - goto err; - } - - /* adjust boundaries */ - if (start < 0) - start = 0; - else if (start > length) - start = length; - - if (end < 0) - end = 0; - else if (end > length) - end = length; - - state->isbytes = isbytes; - state->charsize = charsize; - state->match_all = 0; - state->must_advance = 0; - - state->beginning = ptr; - - state->start = (void*) ((char*) ptr + start * state->charsize); - state->end = (void*) ((char*) ptr + end * state->charsize); - - Py_INCREF(string); - state->string = string; - state->pos = start; - state->endpos = end; - - return string; - err: - /* We add an explicit cast here because MSVC has a bug when - compiling C code where it believes that `const void**` cannot be - safely casted to `void*`, see bpo-39943 for details. */ - PyMem_Free((void*) state->mark); - state->mark = NULL; - PyMem_Free(state->repeats_array); - state->repeats_array = NULL; - - if (state->buffer.buf) - PyBuffer_Release(&state->buffer); - return NULL; -} - -LOCAL(void) -state_fini(SRE_STATE* state) -{ - if (state->buffer.buf) - PyBuffer_Release(&state->buffer); - Py_XDECREF(state->string); - data_stack_dealloc(state); - /* See above PyMem_Del for why we explicitly cast here. */ - PyMem_Free((void*) state->mark); - state->mark = NULL; - PyMem_Free(state->repeats_array); - state->repeats_array = NULL; -} - -/* calculate offset from start of string */ -#define STATE_OFFSET(state, member)\ - (((char*)(member) - (char*)(state)->beginning) / (state)->charsize) - -LOCAL(PyObject*) -getslice(int isbytes, const void *ptr, - PyObject* string, Py_ssize_t start, Py_ssize_t end) -{ - if (isbytes) { - if (PyBytes_CheckExact(string) && - start == 0 && end == PyBytes_GET_SIZE(string)) { - Py_INCREF(string); - return string; - } - return PyBytes_FromStringAndSize( - (const char *)ptr + start, end - start); - } - else { - return PyUnicode_Substring(string, start, end); - } -} - -LOCAL(PyObject*) -state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty) -{ - Py_ssize_t i, j; - - index = (index - 1) * 2; - - if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) { - if (empty) - /* want empty string */ - i = j = 0; - else { - Py_RETURN_NONE; - } - } else { - i = STATE_OFFSET(state, state->mark[index]); - j = STATE_OFFSET(state, state->mark[index+1]); - - /* check wrong span */ - if (i > j) { - PyErr_SetString(PyExc_SystemError, - "The span of capturing group is wrong," - " please report a bug for the re module."); - return NULL; - } - } - - return getslice(state->isbytes, state->beginning, string, i, j); -} - -static void -pattern_error(Py_ssize_t status) -{ - switch (status) { - case SRE_ERROR_RECURSION_LIMIT: - /* This error code seems to be unused. */ - PyErr_SetString( - PyExc_RecursionError, - "maximum recursion limit exceeded" - ); - break; - case SRE_ERROR_MEMORY: - PyErr_NoMemory(); - break; - case SRE_ERROR_INTERRUPTED: - /* An exception has already been raised, so let it fly */ - break; - default: - /* other error codes indicate compiler/engine bugs */ - PyErr_SetString( - PyExc_RuntimeError, - "internal error in regular expression engine" - ); - } -} - -static int -pattern_traverse(PatternObject *self, visitproc visit, void *arg) -{ - Py_VISIT(Py_TYPE(self)); - Py_VISIT(self->groupindex); - Py_VISIT(self->indexgroup); - Py_VISIT(self->pattern); - return 0; -} - -static int -pattern_clear(PatternObject *self) -{ - Py_CLEAR(self->groupindex); - Py_CLEAR(self->indexgroup); - Py_CLEAR(self->pattern); - return 0; -} - -static void -pattern_dealloc(PatternObject* self) -{ - PyTypeObject *tp = Py_TYPE(self); - - PyObject_GC_UnTrack(self); - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject *) self); - } - (void)pattern_clear(self); - tp->tp_free(self); - Py_DECREF(tp); -} - -LOCAL(Py_ssize_t) -sre_match(SRE_STATE* state, SRE_CODE* pattern) -{ - if (state->charsize == 1) - return sre_ucs1_match(state, pattern, 1); - if (state->charsize == 2) - return sre_ucs2_match(state, pattern, 1); - assert(state->charsize == 4); - return sre_ucs4_match(state, pattern, 1); -} - -LOCAL(Py_ssize_t) -sre_search(SRE_STATE* state, SRE_CODE* pattern) -{ - if (state->charsize == 1) - return sre_ucs1_search(state, pattern); - if (state->charsize == 2) - return sre_ucs2_search(state, pattern); - assert(state->charsize == 4); - return sre_ucs4_search(state, pattern); -} - -/*[clinic input] -_sre.SRE_Pattern.match - - cls: defining_class - / - string: object - pos: Py_ssize_t = 0 - endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize - -Matches zero or more characters at the beginning of the string. -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos) -/*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/ -{ - _sremodulestate *module_state = get_sre_module_state_by_class(cls); - SRE_STATE state; - Py_ssize_t status; - PyObject *match; - - if (!state_init(&state, (PatternObject *)self, string, pos, endpos)) - return NULL; - - state.ptr = state.start; - - TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr)); - - status = sre_match(&state, PatternObject_GetCode(self)); - - TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); - if (PyErr_Occurred()) { - state_fini(&state); - return NULL; - } - - match = pattern_new_match(module_state, self, &state, status); - state_fini(&state); - return match; -} - -/*[clinic input] -_sre.SRE_Pattern.fullmatch - - cls: defining_class - / - string: object - pos: Py_ssize_t = 0 - endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize - -Matches against all of the string. -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos) -/*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/ -{ - _sremodulestate *module_state = get_sre_module_state_by_class(cls); - SRE_STATE state; - Py_ssize_t status; - PyObject *match; - - if (!state_init(&state, self, string, pos, endpos)) - return NULL; - - state.ptr = state.start; - - TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr)); - - state.match_all = 1; - status = sre_match(&state, PatternObject_GetCode(self)); - - TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); - if (PyErr_Occurred()) { - state_fini(&state); - return NULL; - } - - match = pattern_new_match(module_state, self, &state, status); - state_fini(&state); - return match; -} - -/*[clinic input] -_sre.SRE_Pattern.search - - cls: defining_class - / - string: object - pos: Py_ssize_t = 0 - endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize - -Scan through string looking for a match, and return a corresponding match object instance. - -Return None if no position in the string matches. -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos) -/*[clinic end generated code: output=bd7f2d9d583e1463 input=afa9afb66a74a4b3]*/ -{ - _sremodulestate *module_state = get_sre_module_state_by_class(cls); - SRE_STATE state; - Py_ssize_t status; - PyObject *match; - - if (!state_init(&state, self, string, pos, endpos)) - return NULL; - - TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr)); - - status = sre_search(&state, PatternObject_GetCode(self)); - - TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); - - if (PyErr_Occurred()) { - state_fini(&state); - return NULL; - } - - match = pattern_new_match(module_state, self, &state, status); - state_fini(&state); - return match; -} - -static PyObject* -call(const char* module, const char* function, PyObject* args) -{ - PyObject* name; - PyObject* mod; - PyObject* func; - PyObject* result; - - if (!args) - return NULL; - name = PyUnicode_FromString(module); - if (!name) - return NULL; - mod = PyImport_Import(name); - Py_DECREF(name); - if (!mod) - return NULL; - func = PyObject_GetAttrString(mod, function); - Py_DECREF(mod); - if (!func) - return NULL; - result = PyObject_CallObject(func, args); - Py_DECREF(func); - Py_DECREF(args); - return result; -} - -/*[clinic input] -_sre.SRE_Pattern.findall - - string: object - pos: Py_ssize_t = 0 - endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize - -Return a list of all non-overlapping matches of pattern in string. -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, - Py_ssize_t pos, Py_ssize_t endpos) -/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/ -{ - SRE_STATE state; - PyObject* list; - Py_ssize_t status; - Py_ssize_t i, b, e; - - if (!state_init(&state, self, string, pos, endpos)) - return NULL; - - list = PyList_New(0); - if (!list) { - state_fini(&state); - return NULL; - } - - while (state.start <= state.end) { - - PyObject* item; - - state_reset(&state); - - state.ptr = state.start; - - status = sre_search(&state, PatternObject_GetCode(self)); - if (PyErr_Occurred()) - goto error; - - if (status <= 0) { - if (status == 0) - break; - pattern_error(status); - goto error; - } - - /* don't bother to build a match object */ - switch (self->groups) { - case 0: - b = STATE_OFFSET(&state, state.start); - e = STATE_OFFSET(&state, state.ptr); - item = getslice(state.isbytes, state.beginning, - string, b, e); - if (!item) - goto error; - break; - case 1: - item = state_getslice(&state, 1, string, 1); - if (!item) - goto error; - break; - default: - item = PyTuple_New(self->groups); - if (!item) - goto error; - for (i = 0; i < self->groups; i++) { - PyObject* o = state_getslice(&state, i+1, string, 1); - if (!o) { - Py_DECREF(item); - goto error; - } - PyTuple_SET_ITEM(item, i, o); - } - break; - } - - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - - state.must_advance = (state.ptr == state.start); - state.start = state.ptr; - } - - state_fini(&state); - return list; - -error: - Py_DECREF(list); - state_fini(&state); - return NULL; - -} - -/*[clinic input] -_sre.SRE_Pattern.finditer - - cls: defining_class - / - string: object - pos: Py_ssize_t = 0 - endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize - -Return an iterator over all non-overlapping matches for the RE pattern in string. - -For each match, the iterator returns a match object. -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos) -/*[clinic end generated code: output=1791dbf3618ade56 input=812e332a4848cbaf]*/ -{ - _sremodulestate *module_state = get_sre_module_state_by_class(cls); - PyObject* scanner; - PyObject* search; - PyObject* iterator; - - scanner = pattern_scanner(module_state, self, string, pos, endpos); - if (!scanner) - return NULL; - - search = PyObject_GetAttrString(scanner, "search"); - Py_DECREF(scanner); - if (!search) - return NULL; - - iterator = PyCallIter_New(search, Py_None); - Py_DECREF(search); - - return iterator; -} - -/*[clinic input] -_sre.SRE_Pattern.scanner - - cls: defining_class - / - string: object - pos: Py_ssize_t = 0 - endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize - -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos) -/*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/ -{ - _sremodulestate *module_state = get_sre_module_state_by_class(cls); - - return pattern_scanner(module_state, self, string, pos, endpos); -} - -/*[clinic input] -_sre.SRE_Pattern.split - - string: object - maxsplit: Py_ssize_t = 0 - -Split string by the occurrences of pattern. -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, - Py_ssize_t maxsplit) -/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/ -{ - SRE_STATE state; - PyObject* list; - PyObject* item; - Py_ssize_t status; - Py_ssize_t n; - Py_ssize_t i; - const void* last; - - assert(self->codesize != 0); - - if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) - return NULL; - - list = PyList_New(0); - if (!list) { - state_fini(&state); - return NULL; - } - - n = 0; - last = state.start; - - while (!maxsplit || n < maxsplit) { - - state_reset(&state); - - state.ptr = state.start; - - status = sre_search(&state, PatternObject_GetCode(self)); - if (PyErr_Occurred()) - goto error; - - if (status <= 0) { - if (status == 0) - break; - pattern_error(status); - goto error; - } - - /* get segment before this match */ - item = getslice(state.isbytes, state.beginning, - string, STATE_OFFSET(&state, last), - STATE_OFFSET(&state, state.start) - ); - if (!item) - goto error; - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - - /* add groups (if any) */ - for (i = 0; i < self->groups; i++) { - item = state_getslice(&state, i+1, string, 0); - if (!item) - goto error; - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - } - - n = n + 1; - state.must_advance = (state.ptr == state.start); - last = state.start = state.ptr; - - } - - /* get segment following last match (even if empty) */ - item = getslice(state.isbytes, state.beginning, - string, STATE_OFFSET(&state, last), state.endpos - ); - if (!item) - goto error; - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - - state_fini(&state); - return list; - -error: - Py_DECREF(list); - state_fini(&state); - return NULL; - -} - -static PyObject* -pattern_subx(_sremodulestate* module_state, - PatternObject* self, - PyObject* ptemplate, - PyObject* string, - Py_ssize_t count, - Py_ssize_t subn) -{ - SRE_STATE state; - PyObject* list; - PyObject* joiner; - PyObject* item; - PyObject* filter; - PyObject* match; - const void* ptr; - Py_ssize_t status; - Py_ssize_t n; - Py_ssize_t i, b, e; - int isbytes, charsize; - int filter_is_callable; - Py_buffer view; - - if (PyCallable_Check(ptemplate)) { - /* sub/subn takes either a function or a template */ - filter = ptemplate; - Py_INCREF(filter); - filter_is_callable = 1; - } else { - /* if not callable, check if it's a literal string */ - int literal; - view.buf = NULL; - ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view); - if (ptr) { - if (charsize == 1) - literal = memchr(ptr, '\\', n) == NULL; - else - literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1; - } else { - PyErr_Clear(); - literal = 0; - } - if (view.buf) - PyBuffer_Release(&view); - if (literal) { - filter = ptemplate; - Py_INCREF(filter); - filter_is_callable = 0; - } else { - /* not a literal; hand it over to the template compiler */ - filter = call( - SRE_PY_MODULE, "_subx", - PyTuple_Pack(2, self, ptemplate) - ); - if (!filter) - return NULL; - filter_is_callable = PyCallable_Check(filter); - } - } - - if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) { - Py_DECREF(filter); - return NULL; - } - - list = PyList_New(0); - if (!list) { - Py_DECREF(filter); - state_fini(&state); - return NULL; - } - - n = i = 0; - - while (!count || n < count) { - - state_reset(&state); - - state.ptr = state.start; - - status = sre_search(&state, PatternObject_GetCode(self)); - if (PyErr_Occurred()) - goto error; - - if (status <= 0) { - if (status == 0) - break; - pattern_error(status); - goto error; - } - - b = STATE_OFFSET(&state, state.start); - e = STATE_OFFSET(&state, state.ptr); - - if (i < b) { - /* get segment before this match */ - item = getslice(state.isbytes, state.beginning, - string, i, b); - if (!item) - goto error; - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - - } - - if (filter_is_callable) { - /* pass match object through filter */ - match = pattern_new_match(module_state, self, &state, 1); - if (!match) - goto error; - item = PyObject_CallOneArg(filter, match); - Py_DECREF(match); - if (!item) - goto error; - } else { - /* filter is literal string */ - item = filter; - Py_INCREF(item); - } - - /* add to list */ - if (item != Py_None) { - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - } - - i = e; - n = n + 1; - state.must_advance = (state.ptr == state.start); - state.start = state.ptr; - } - - /* get segment following last match */ - if (i < state.endpos) { - item = getslice(state.isbytes, state.beginning, - string, i, state.endpos); - if (!item) - goto error; - status = PyList_Append(list, item); - Py_DECREF(item); - if (status < 0) - goto error; - } - - state_fini(&state); - - Py_DECREF(filter); - - /* convert list to single string (also removes list) */ - joiner = getslice(state.isbytes, state.beginning, string, 0, 0); - if (!joiner) { - Py_DECREF(list); - return NULL; - } - if (PyList_GET_SIZE(list) == 0) { - Py_DECREF(list); - item = joiner; - } - else { - if (state.isbytes) - item = _PyBytes_Join(joiner, list); - else - item = PyUnicode_Join(joiner, list); - Py_DECREF(joiner); - Py_DECREF(list); - if (!item) - return NULL; - } - - if (subn) - return Py_BuildValue("Nn", item, n); - - return item; - -error: - Py_DECREF(list); - state_fini(&state); - Py_DECREF(filter); - return NULL; - -} - -/*[clinic input] -_sre.SRE_Pattern.sub - - cls: defining_class - / - repl: object - string: object - count: Py_ssize_t = 0 - -Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl. -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls, - PyObject *repl, PyObject *string, Py_ssize_t count) -/*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/ -{ - _sremodulestate *module_state = get_sre_module_state_by_class(cls); - - return pattern_subx(module_state, self, repl, string, count, 0); -} - -/*[clinic input] -_sre.SRE_Pattern.subn - - cls: defining_class - / - repl: object - string: object - count: Py_ssize_t = 0 - -Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl. -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls, - PyObject *repl, PyObject *string, - Py_ssize_t count) -/*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/ -{ - _sremodulestate *module_state = get_sre_module_state_by_class(cls); - - return pattern_subx(module_state, self, repl, string, count, 1); -} - -/*[clinic input] -_sre.SRE_Pattern.__copy__ - -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Pattern___copy___impl(PatternObject *self) -/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/ -{ - Py_INCREF(self); - return (PyObject *)self; -} - -/*[clinic input] -_sre.SRE_Pattern.__deepcopy__ - - memo: object - / - -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo) -/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/ -{ - Py_INCREF(self); - return (PyObject *)self; -} - -static PyObject * -pattern_repr(PatternObject *obj) -{ - static const struct { - const char *name; - int value; - } flag_names[] = { - {"re.TEMPLATE", SRE_FLAG_TEMPLATE}, - {"re.IGNORECASE", SRE_FLAG_IGNORECASE}, - {"re.LOCALE", SRE_FLAG_LOCALE}, - {"re.MULTILINE", SRE_FLAG_MULTILINE}, - {"re.DOTALL", SRE_FLAG_DOTALL}, - {"re.UNICODE", SRE_FLAG_UNICODE}, - {"re.VERBOSE", SRE_FLAG_VERBOSE}, - {"re.DEBUG", SRE_FLAG_DEBUG}, - {"re.ASCII", SRE_FLAG_ASCII}, - }; - PyObject *result = NULL; - PyObject *flag_items; - size_t i; - int flags = obj->flags; - - /* Omit re.UNICODE for valid string patterns. */ - if (obj->isbytes == 0 && - (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) == - SRE_FLAG_UNICODE) - flags &= ~SRE_FLAG_UNICODE; - - flag_items = PyList_New(0); - if (!flag_items) - return NULL; - - for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) { - if (flags & flag_names[i].value) { - PyObject *item = PyUnicode_FromString(flag_names[i].name); - if (!item) - goto done; - - if (PyList_Append(flag_items, item) < 0) { - Py_DECREF(item); - goto done; - } - Py_DECREF(item); - flags &= ~flag_names[i].value; - } - } - if (flags) { - PyObject *item = PyUnicode_FromFormat("0x%x", flags); - if (!item) - goto done; - - if (PyList_Append(flag_items, item) < 0) { - Py_DECREF(item); - goto done; - } - Py_DECREF(item); - } - - if (PyList_Size(flag_items) > 0) { - PyObject *flags_result; - PyObject *sep = PyUnicode_FromString("|"); - if (!sep) - goto done; - flags_result = PyUnicode_Join(sep, flag_items); - Py_DECREF(sep); - if (!flags_result) - goto done; - result = PyUnicode_FromFormat("re.compile(%.200R, %S)", - obj->pattern, flags_result); - Py_DECREF(flags_result); - } - else { - result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern); - } - -done: - Py_DECREF(flag_items); - return result; -} - -PyDoc_STRVAR(pattern_doc, "Compiled regular expression object."); - -/* PatternObject's 'groupindex' method. */ -static PyObject * -pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored)) -{ - if (self->groupindex == NULL) - return PyDict_New(); - return PyDictProxy_New(self->groupindex); -} - -static int _validate(PatternObject *self); /* Forward */ - -/*[clinic input] -_sre.compile - - pattern: object - flags: int - code: object(subclass_of='&PyList_Type') - groups: Py_ssize_t - groupindex: object(subclass_of='&PyDict_Type') - indexgroup: object(subclass_of='&PyTuple_Type') - repeat_count: Py_ssize_t - -[clinic start generated code]*/ - -static PyObject * -_sre_compile_impl(PyObject *module, PyObject *pattern, int flags, - PyObject *code, Py_ssize_t groups, PyObject *groupindex, - PyObject *indexgroup, Py_ssize_t repeat_count) -/*[clinic end generated code: output=922af562d51b1657 input=77e39c322501ec2a]*/ -{ - /* "compile" pattern descriptor to pattern object */ - - _sremodulestate *module_state = get_sre_module_state(module); - PatternObject* self; - Py_ssize_t i, n; - - n = PyList_GET_SIZE(code); - /* coverity[ampersand_in_size] */ - self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n); - if (!self) - return NULL; - self->weakreflist = NULL; - self->pattern = NULL; - self->groupindex = NULL; - self->indexgroup = NULL; - - self->codesize = n; - - for (i = 0; i < n; i++) { - PyObject *o = PyList_GET_ITEM(code, i); - unsigned long value = PyLong_AsUnsignedLong(o); - self->code[i] = (SRE_CODE) value; - if ((unsigned long) self->code[i] != value) { - PyErr_SetString(PyExc_OverflowError, - "regular expression code size limit exceeded"); - break; - } - } - PyObject_GC_Track(self); - - if (PyErr_Occurred()) { - Py_DECREF(self); - return NULL; - } - - if (pattern == Py_None) { - self->isbytes = -1; - } - else { - Py_ssize_t p_length; - int charsize; - Py_buffer view; - view.buf = NULL; - if (!getstring(pattern, &p_length, &self->isbytes, - &charsize, &view)) { - Py_DECREF(self); - return NULL; - } - if (view.buf) - PyBuffer_Release(&view); - } - - Py_INCREF(pattern); - self->pattern = pattern; - - self->flags = flags; - self->groups = groups; - self->repeat_count = repeat_count; - - if (PyDict_GET_SIZE(groupindex) > 0) { - Py_INCREF(groupindex); - self->groupindex = groupindex; - if (PyTuple_GET_SIZE(indexgroup) > 0) { - Py_INCREF(indexgroup); - self->indexgroup = indexgroup; - } - } - - if (!_validate(self)) { - Py_DECREF(self); - return NULL; - } - - return (PyObject*) self; -} - -/* -------------------------------------------------------------------- */ -/* Code validation */ - -/* To learn more about this code, have a look at the _compile() function in - Lib/sre_compile.py. The validation functions below checks the code array - for conformance with the code patterns generated there. - - The nice thing about the generated code is that it is position-independent: - all jumps are relative jumps forward. Also, jumps don't cross each other: - the target of a later jump is always earlier than the target of an earlier - jump. IOW, this is okay: - - J---------J-------T--------T - \ \_____/ / - \______________________/ - - but this is not: - - J---------J-------T--------T - \_________\_____/ / - \____________/ - - It also helps that SRE_CODE is always an unsigned type. -*/ - -/* Defining this one enables tracing of the validator */ -#undef VVERBOSE - -/* Trace macro for the validator */ -#if defined(VVERBOSE) -#define VTRACE(v) printf v -#else -#define VTRACE(v) do {} while(0) /* do nothing */ -#endif - -/* Report failure */ -#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0) - -/* Extract opcode, argument, or skip count from code array */ -#define GET_OP \ - do { \ - VTRACE(("%p: ", code)); \ - if (code >= end) FAIL; \ - op = *code++; \ - VTRACE(("%lu (op)\n", (unsigned long)op)); \ - } while (0) -#define GET_ARG \ - do { \ - VTRACE(("%p= ", code)); \ - if (code >= end) FAIL; \ - arg = *code++; \ - VTRACE(("%lu (arg)\n", (unsigned long)arg)); \ - } while (0) -#define GET_SKIP_ADJ(adj) \ - do { \ - VTRACE(("%p= ", code)); \ - if (code >= end) FAIL; \ - skip = *code; \ - VTRACE(("%lu (skip to %p)\n", \ - (unsigned long)skip, code+skip)); \ - if (skip-adj > (uintptr_t)(end - code)) \ - FAIL; \ - code++; \ - } while (0) -#define GET_SKIP GET_SKIP_ADJ(0) - -static int -_validate_charset(SRE_CODE *code, SRE_CODE *end) -{ - /* Some variables are manipulated by the macros above */ - SRE_CODE op; - SRE_CODE arg; - SRE_CODE offset; - int i; - - while (code < end) { - GET_OP; - switch (op) { - - case SRE_OP_NEGATE: - break; - - case SRE_OP_LITERAL: - GET_ARG; - break; - - case SRE_OP_RANGE: - case SRE_OP_RANGE_UNI_IGNORE: - GET_ARG; - GET_ARG; - break; - - case SRE_OP_CHARSET: - offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */ - if (offset > (uintptr_t)(end - code)) - FAIL; - code += offset; - break; - - case SRE_OP_BIGCHARSET: - GET_ARG; /* Number of blocks */ - offset = 256/sizeof(SRE_CODE); /* 256-byte table */ - if (offset > (uintptr_t)(end - code)) - FAIL; - /* Make sure that each byte points to a valid block */ - for (i = 0; i < 256; i++) { - if (((unsigned char *)code)[i] >= arg) - FAIL; - } - code += offset; - offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */ - if (offset > (uintptr_t)(end - code)) - FAIL; - code += offset; - break; - - case SRE_OP_CATEGORY: - GET_ARG; - switch (arg) { - case SRE_CATEGORY_DIGIT: - case SRE_CATEGORY_NOT_DIGIT: - case SRE_CATEGORY_SPACE: - case SRE_CATEGORY_NOT_SPACE: - case SRE_CATEGORY_WORD: - case SRE_CATEGORY_NOT_WORD: - case SRE_CATEGORY_LINEBREAK: - case SRE_CATEGORY_NOT_LINEBREAK: - case SRE_CATEGORY_LOC_WORD: - case SRE_CATEGORY_LOC_NOT_WORD: - case SRE_CATEGORY_UNI_DIGIT: - case SRE_CATEGORY_UNI_NOT_DIGIT: - case SRE_CATEGORY_UNI_SPACE: - case SRE_CATEGORY_UNI_NOT_SPACE: - case SRE_CATEGORY_UNI_WORD: - case SRE_CATEGORY_UNI_NOT_WORD: - case SRE_CATEGORY_UNI_LINEBREAK: - case SRE_CATEGORY_UNI_NOT_LINEBREAK: - break; - default: - FAIL; - } - break; - - default: - FAIL; - - } - } - - return 1; -} - -static int -_validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self) -{ - /* Some variables are manipulated by the macros above */ - SRE_CODE op; - SRE_CODE arg; - SRE_CODE skip; - - VTRACE(("code=%p, end=%p\n", code, end)); - - if (code > end) - FAIL; - - while (code < end) { - GET_OP; - switch (op) { - - case SRE_OP_MARK: - /* We don't check whether marks are properly nested; the - sre_match() code is robust even if they don't, and the worst - you can get is nonsensical match results. */ - GET_ARG; - if (arg > 2 * (size_t)self->groups + 1) { - VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)self->groups)); - FAIL; - } - break; - - case SRE_OP_LITERAL: - case SRE_OP_NOT_LITERAL: - case SRE_OP_LITERAL_IGNORE: - case SRE_OP_NOT_LITERAL_IGNORE: - case SRE_OP_LITERAL_UNI_IGNORE: - case SRE_OP_NOT_LITERAL_UNI_IGNORE: - case SRE_OP_LITERAL_LOC_IGNORE: - case SRE_OP_NOT_LITERAL_LOC_IGNORE: - GET_ARG; - /* The arg is just a character, nothing to check */ - break; - - case SRE_OP_SUCCESS: - case SRE_OP_FAILURE: - /* Nothing to check; these normally end the matching process */ - break; - - case SRE_OP_AT: - GET_ARG; - switch (arg) { - case SRE_AT_BEGINNING: - case SRE_AT_BEGINNING_STRING: - case SRE_AT_BEGINNING_LINE: - case SRE_AT_END: - case SRE_AT_END_LINE: - case SRE_AT_END_STRING: - case SRE_AT_BOUNDARY: - case SRE_AT_NON_BOUNDARY: - case SRE_AT_LOC_BOUNDARY: - case SRE_AT_LOC_NON_BOUNDARY: - case SRE_AT_UNI_BOUNDARY: - case SRE_AT_UNI_NON_BOUNDARY: - break; - default: - FAIL; - } - break; - - case SRE_OP_ANY: - case SRE_OP_ANY_ALL: - /* These have no operands */ - break; - - case SRE_OP_IN: - case SRE_OP_IN_IGNORE: - case SRE_OP_IN_UNI_IGNORE: - case SRE_OP_IN_LOC_IGNORE: - GET_SKIP; - /* Stop 1 before the end; we check the FAILURE below */ - if (!_validate_charset(code, code+skip-2)) - FAIL; - if (code[skip-2] != SRE_OP_FAILURE) - FAIL; - code += skip-1; - break; - - case SRE_OP_INFO: - { - /* A minimal info field is - <1=skip> <2=flags> <3=min> <4=max>; - If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags, - more follows. */ - SRE_CODE flags, i; - SRE_CODE *newcode; - GET_SKIP; - newcode = code+skip-1; - GET_ARG; flags = arg; - GET_ARG; - GET_ARG; - /* Check that only valid flags are present */ - if ((flags & ~(SRE_INFO_PREFIX | - SRE_INFO_LITERAL | - SRE_INFO_CHARSET)) != 0) - FAIL; - /* PREFIX and CHARSET are mutually exclusive */ - if ((flags & SRE_INFO_PREFIX) && - (flags & SRE_INFO_CHARSET)) - FAIL; - /* LITERAL implies PREFIX */ - if ((flags & SRE_INFO_LITERAL) && - !(flags & SRE_INFO_PREFIX)) - FAIL; - /* Validate the prefix */ - if (flags & SRE_INFO_PREFIX) { - SRE_CODE prefix_len; - GET_ARG; prefix_len = arg; - GET_ARG; - /* Here comes the prefix string */ - if (prefix_len > (uintptr_t)(newcode - code)) - FAIL; - code += prefix_len; - /* And here comes the overlap table */ - if (prefix_len > (uintptr_t)(newcode - code)) - FAIL; - /* Each overlap value should be < prefix_len */ - for (i = 0; i < prefix_len; i++) { - if (code[i] >= prefix_len) - FAIL; - } - code += prefix_len; - } - /* Validate the charset */ - if (flags & SRE_INFO_CHARSET) { - if (!_validate_charset(code, newcode-1)) - FAIL; - if (newcode[-1] != SRE_OP_FAILURE) - FAIL; - code = newcode; - } - else if (code != newcode) { - VTRACE(("code=%p, newcode=%p\n", code, newcode)); - FAIL; - } - } - break; - - case SRE_OP_BRANCH: - { - SRE_CODE *target = NULL; - for (;;) { - GET_SKIP; - if (skip == 0) - break; - /* Stop 2 before the end; we check the JUMP below */ - if (!_validate_inner(code, code+skip-3, self)) - FAIL; - code += skip-3; - /* Check that it ends with a JUMP, and that each JUMP - has the same target */ - GET_OP; - if (op != SRE_OP_JUMP) - FAIL; - GET_SKIP; - if (target == NULL) - target = code+skip-1; - else if (code+skip-1 != target) - FAIL; - } - } - break; - - case SRE_OP_REPEAT_ONE: - case SRE_OP_MIN_REPEAT_ONE: - case SRE_OP_POSSESSIVE_REPEAT_ONE: - { - SRE_CODE min, max; - GET_SKIP; - GET_ARG; min = arg; - GET_ARG; max = arg; - if (min > max) - FAIL; - if (max > SRE_MAXREPEAT) - FAIL; - if (!_validate_inner(code, code+skip-4, self)) - FAIL; - code += skip-4; - GET_OP; - if (op != SRE_OP_SUCCESS) - FAIL; - } - break; - - case SRE_OP_REPEAT: - case SRE_OP_POSSESSIVE_REPEAT: - { - SRE_CODE op1 = op, min, max, repeat_index; - GET_SKIP; - GET_ARG; min = arg; - GET_ARG; max = arg; - if (min > max) - FAIL; - if (max > SRE_MAXREPEAT) - FAIL; - if (op1 == SRE_OP_REPEAT) { - GET_ARG; repeat_index = arg; - if (repeat_index >= (size_t)self->repeat_count) - FAIL; - skip -= 4; - } else { - skip -= 3; - } - if (!_validate_inner(code, code+skip, self)) - FAIL; - code += skip; - GET_OP; - if (op1 == SRE_OP_POSSESSIVE_REPEAT) { - if (op != SRE_OP_SUCCESS) - FAIL; - } - else { - if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL) - FAIL; - } - } - break; - - case SRE_OP_ATOMIC_GROUP: - { - GET_SKIP; - if (!_validate_inner(code, code+skip-2, self)) - FAIL; - code += skip-2; - GET_OP; - if (op != SRE_OP_SUCCESS) - FAIL; - } - break; - - case SRE_OP_GROUPREF: - case SRE_OP_GROUPREF_IGNORE: - case SRE_OP_GROUPREF_UNI_IGNORE: - case SRE_OP_GROUPREF_LOC_IGNORE: - GET_ARG; - if (arg >= (size_t)self->groups) - FAIL; - break; - - case SRE_OP_GROUPREF_EXISTS: - /* The regex syntax for this is: '(?(group)then|else)', where - 'group' is either an integer group number or a group name, - 'then' and 'else' are sub-regexes, and 'else' is optional. */ - GET_ARG; - if (arg >= (size_t)self->groups) - FAIL; - GET_SKIP_ADJ(1); - code--; /* The skip is relative to the first arg! */ - /* There are two possibilities here: if there is both a 'then' - part and an 'else' part, the generated code looks like: - - GROUPREF_EXISTS - - - ...then part... - JUMP - - ( jumps here) - ...else part... - ( jumps here) - - If there is only a 'then' part, it looks like: - - GROUPREF_EXISTS - - - ...then part... - ( jumps here) - - There is no direct way to decide which it is, and we don't want - to allow arbitrary jumps anywhere in the code; so we just look - for a JUMP opcode preceding our skip target. - */ - if (skip >= 3 && skip-3 < (uintptr_t)(end - code) && - code[skip-3] == SRE_OP_JUMP) - { - VTRACE(("both then and else parts present\n")); - if (!_validate_inner(code+1, code+skip-3, self)) - FAIL; - code += skip-2; /* Position after JUMP, at */ - GET_SKIP; - if (!_validate_inner(code, code+skip-1, self)) - FAIL; - code += skip-1; - } - else { - VTRACE(("only a then part present\n")); - if (!_validate_inner(code+1, code+skip-1, self)) - FAIL; - code += skip-1; - } - break; - - case SRE_OP_ASSERT: - case SRE_OP_ASSERT_NOT: - GET_SKIP; - GET_ARG; /* 0 for lookahead, width for lookbehind */ - code--; /* Back up over arg to simplify math below */ - if (arg & 0x80000000) - FAIL; /* Width too large */ - /* Stop 1 before the end; we check the SUCCESS below */ - if (!_validate_inner(code+1, code+skip-2, self)) - FAIL; - code += skip-2; - GET_OP; - if (op != SRE_OP_SUCCESS) - FAIL; - break; - - default: - FAIL; - - } - } - - VTRACE(("okay\n")); - return 1; -} - -static int -_validate_outer(SRE_CODE *code, SRE_CODE *end, PatternObject *self) -{ - if (self->groups < 0 || (size_t)self->groups > SRE_MAXGROUPS || - self->repeat_count < 0 || - code >= end || end[-1] != SRE_OP_SUCCESS) - FAIL; - return _validate_inner(code, end-1, self); -} - -static int -_validate(PatternObject *self) -{ - if (!_validate_outer(self->code, self->code+self->codesize, self)) - { - PyErr_SetString(PyExc_RuntimeError, "invalid SRE code"); - return 0; - } - else - VTRACE(("Success!\n")); - return 1; -} - -/* -------------------------------------------------------------------- */ -/* match methods */ - -static int -match_traverse(MatchObject *self, visitproc visit, void *arg) -{ - Py_VISIT(Py_TYPE(self)); - Py_VISIT(self->string); - Py_VISIT(self->regs); - Py_VISIT(self->pattern); - return 0; -} - -static int -match_clear(MatchObject *self) -{ - Py_CLEAR(self->string); - Py_CLEAR(self->regs); - Py_CLEAR(self->pattern); - return 0; -} - -static void -match_dealloc(MatchObject* self) -{ - PyTypeObject *tp = Py_TYPE(self); - - PyObject_GC_UnTrack(self); - (void)match_clear(self); - tp->tp_free(self); - Py_DECREF(tp); -} - -static PyObject* -match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def) -{ - Py_ssize_t length; - int isbytes, charsize; - Py_buffer view; - PyObject *result; - const void* ptr; - Py_ssize_t i, j; - - assert(0 <= index && index < self->groups); - index *= 2; - - if (self->string == Py_None || self->mark[index] < 0) { - /* return default value if the string or group is undefined */ - Py_INCREF(def); - return def; - } - - ptr = getstring(self->string, &length, &isbytes, &charsize, &view); - if (ptr == NULL) - return NULL; - - i = self->mark[index]; - j = self->mark[index+1]; - i = Py_MIN(i, length); - j = Py_MIN(j, length); - result = getslice(isbytes, ptr, self->string, i, j); - if (isbytes && view.buf != NULL) - PyBuffer_Release(&view); - return result; -} - -static Py_ssize_t -match_getindex(MatchObject* self, PyObject* index) -{ - Py_ssize_t i; - - if (index == NULL) - /* Default value */ - return 0; - - if (PyIndex_Check(index)) { - i = PyNumber_AsSsize_t(index, NULL); - } - else { - i = -1; - - if (self->pattern->groupindex) { - index = PyDict_GetItemWithError(self->pattern->groupindex, index); - if (index && PyLong_Check(index)) { - i = PyLong_AsSsize_t(index); - } - } - } - if (i < 0 || i >= self->groups) { - /* raise IndexError if we were given a bad group number */ - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_IndexError, "no such group"); - } - return -1; - } - - return i; -} - -static PyObject* -match_getslice(MatchObject* self, PyObject* index, PyObject* def) -{ - Py_ssize_t i = match_getindex(self, index); - - if (i < 0) { - return NULL; - } - - return match_getslice_by_index(self, i, def); -} - -/*[clinic input] -_sre.SRE_Match.expand - - template: object - -Return the string obtained by doing backslash substitution on the string template, as done by the sub() method. -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template) -/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/ -{ - /* delegate to Python code */ - return call( - SRE_PY_MODULE, "_expand", - PyTuple_Pack(3, self->pattern, self, template) - ); -} - -static PyObject* -match_group(MatchObject* self, PyObject* args) -{ - PyObject* result; - Py_ssize_t i, size; - - size = PyTuple_GET_SIZE(args); - - switch (size) { - case 0: - result = match_getslice(self, _PyLong_GetZero(), Py_None); - break; - case 1: - result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None); - break; - default: - /* fetch multiple items */ - result = PyTuple_New(size); - if (!result) - return NULL; - for (i = 0; i < size; i++) { - PyObject* item = match_getslice( - self, PyTuple_GET_ITEM(args, i), Py_None - ); - if (!item) { - Py_DECREF(result); - return NULL; - } - PyTuple_SET_ITEM(result, i, item); - } - break; - } - return result; -} - -static PyObject* -match_getitem(MatchObject* self, PyObject* name) -{ - return match_getslice(self, name, Py_None); -} - -/*[clinic input] -_sre.SRE_Match.groups - - default: object = None - Is used for groups that did not participate in the match. - -Return a tuple containing all the subgroups of the match, from 1. -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value) -/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/ -{ - PyObject* result; - Py_ssize_t index; - - result = PyTuple_New(self->groups-1); - if (!result) - return NULL; - - for (index = 1; index < self->groups; index++) { - PyObject* item; - item = match_getslice_by_index(self, index, default_value); - if (!item) { - Py_DECREF(result); - return NULL; - } - PyTuple_SET_ITEM(result, index-1, item); - } - - return result; -} - -/*[clinic input] -_sre.SRE_Match.groupdict - - default: object = None - Is used for groups that did not participate in the match. - -Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name. -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value) -/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/ -{ - PyObject *result; - PyObject *key; - PyObject *value; - Py_ssize_t pos = 0; - Py_hash_t hash; - - result = PyDict_New(); - if (!result || !self->pattern->groupindex) - return result; - - while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) { - int status; - Py_INCREF(key); - value = match_getslice(self, key, default_value); - if (!value) { - Py_DECREF(key); - goto failed; - } - status = _PyDict_SetItem_KnownHash(result, key, value, hash); - Py_DECREF(value); - Py_DECREF(key); - if (status < 0) - goto failed; - } - - return result; - -failed: - Py_DECREF(result); - return NULL; -} - -/*[clinic input] -_sre.SRE_Match.start -> Py_ssize_t - - group: object(c_default="NULL") = 0 - / - -Return index of the start of the substring matched by group. -[clinic start generated code]*/ - -static Py_ssize_t -_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group) -/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/ -{ - Py_ssize_t index = match_getindex(self, group); - - if (index < 0) { - return -1; - } - - /* mark is -1 if group is undefined */ - return self->mark[index*2]; -} - -/*[clinic input] -_sre.SRE_Match.end -> Py_ssize_t - - group: object(c_default="NULL") = 0 - / - -Return index of the end of the substring matched by group. -[clinic start generated code]*/ - -static Py_ssize_t -_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group) -/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/ -{ - Py_ssize_t index = match_getindex(self, group); - - if (index < 0) { - return -1; - } - - /* mark is -1 if group is undefined */ - return self->mark[index*2+1]; -} - -LOCAL(PyObject*) -_pair(Py_ssize_t i1, Py_ssize_t i2) -{ - PyObject* pair; - PyObject* item; - - pair = PyTuple_New(2); - if (!pair) - return NULL; - - item = PyLong_FromSsize_t(i1); - if (!item) - goto error; - PyTuple_SET_ITEM(pair, 0, item); - - item = PyLong_FromSsize_t(i2); - if (!item) - goto error; - PyTuple_SET_ITEM(pair, 1, item); - - return pair; - - error: - Py_DECREF(pair); - return NULL; -} - -/*[clinic input] -_sre.SRE_Match.span - - group: object(c_default="NULL") = 0 - / - -For match object m, return the 2-tuple (m.start(group), m.end(group)). -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group) -/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/ -{ - Py_ssize_t index = match_getindex(self, group); - - if (index < 0) { - return NULL; - } - - /* marks are -1 if group is undefined */ - return _pair(self->mark[index*2], self->mark[index*2+1]); -} - -static PyObject* -match_regs(MatchObject* self) -{ - PyObject* regs; - PyObject* item; - Py_ssize_t index; - - regs = PyTuple_New(self->groups); - if (!regs) - return NULL; - - for (index = 0; index < self->groups; index++) { - item = _pair(self->mark[index*2], self->mark[index*2+1]); - if (!item) { - Py_DECREF(regs); - return NULL; - } - PyTuple_SET_ITEM(regs, index, item); - } - - Py_INCREF(regs); - self->regs = regs; - - return regs; -} - -/*[clinic input] -_sre.SRE_Match.__copy__ - -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Match___copy___impl(MatchObject *self) -/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/ -{ - Py_INCREF(self); - return (PyObject *)self; -} - -/*[clinic input] -_sre.SRE_Match.__deepcopy__ - - memo: object - / - -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo) -/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/ -{ - Py_INCREF(self); - return (PyObject *)self; -} - -PyDoc_STRVAR(match_doc, -"The result of re.match() and re.search().\n\ -Match objects always have a boolean value of True."); - -PyDoc_STRVAR(match_group_doc, -"group([group1, ...]) -> str or tuple.\n\ - Return subgroup(s) of the match by indices or names.\n\ - For 0 returns the entire match."); - -static PyObject * -match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored)) -{ - if (self->lastindex >= 0) - return PyLong_FromSsize_t(self->lastindex); - Py_RETURN_NONE; -} - -static PyObject * -match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored)) -{ - if (self->pattern->indexgroup && - self->lastindex >= 0 && - self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup)) - { - PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup, - self->lastindex); - Py_INCREF(result); - return result; - } - Py_RETURN_NONE; -} - -static PyObject * -match_regs_get(MatchObject *self, void *Py_UNUSED(ignored)) -{ - if (self->regs) { - Py_INCREF(self->regs); - return self->regs; - } else - return match_regs(self); -} - -static PyObject * -match_repr(MatchObject *self) -{ - PyObject *result; - PyObject *group0 = match_getslice_by_index(self, 0, Py_None); - if (group0 == NULL) - return NULL; - result = PyUnicode_FromFormat( - "<%s object; span=(%zd, %zd), match=%.50R>", - Py_TYPE(self)->tp_name, - self->mark[0], self->mark[1], group0); - Py_DECREF(group0); - return result; -} - - -static PyObject* -pattern_new_match(_sremodulestate* module_state, - PatternObject* pattern, - SRE_STATE* state, - Py_ssize_t status) -{ - /* create match object (from state object) */ - - MatchObject* match; - Py_ssize_t i, j; - char* base; - int n; - - if (status > 0) { - - /* create match object (with room for extra group marks) */ - /* coverity[ampersand_in_size] */ - match = PyObject_GC_NewVar(MatchObject, - module_state->Match_Type, - 2*(pattern->groups+1)); - if (!match) - return NULL; - - Py_INCREF(pattern); - match->pattern = pattern; - - Py_INCREF(state->string); - match->string = state->string; - - match->regs = NULL; - match->groups = pattern->groups+1; - - /* fill in group slices */ - - base = (char*) state->beginning; - n = state->charsize; - - match->mark[0] = ((char*) state->start - base) / n; - match->mark[1] = ((char*) state->ptr - base) / n; - - for (i = j = 0; i < pattern->groups; i++, j+=2) - if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) { - match->mark[j+2] = ((char*) state->mark[j] - base) / n; - match->mark[j+3] = ((char*) state->mark[j+1] - base) / n; - - /* check wrong span */ - if (match->mark[j+2] > match->mark[j+3]) { - PyErr_SetString(PyExc_SystemError, - "The span of capturing group is wrong," - " please report a bug for the re module."); - Py_DECREF(match); - return NULL; - } - } else - match->mark[j+2] = match->mark[j+3] = -1; /* undefined */ - - match->pos = state->pos; - match->endpos = state->endpos; - - match->lastindex = state->lastindex; - - PyObject_GC_Track(match); - return (PyObject*) match; - - } else if (status == 0) { - - /* no match */ - Py_RETURN_NONE; - - } - - /* internal error */ - pattern_error(status); - return NULL; -} - - -/* -------------------------------------------------------------------- */ -/* scanner methods (experimental) */ - -static int -scanner_traverse(ScannerObject *self, visitproc visit, void *arg) -{ - Py_VISIT(Py_TYPE(self)); - Py_VISIT(self->pattern); - return 0; -} - -static int -scanner_clear(ScannerObject *self) -{ - Py_CLEAR(self->pattern); - return 0; -} - -static void -scanner_dealloc(ScannerObject* self) -{ - PyTypeObject *tp = Py_TYPE(self); - - PyObject_GC_UnTrack(self); - state_fini(&self->state); - (void)scanner_clear(self); - tp->tp_free(self); - Py_DECREF(tp); -} - -static int -scanner_begin(ScannerObject* self) -{ - if (self->executing) { - PyErr_SetString(PyExc_ValueError, - "regular expression scanner already executing"); - return 0; - } - self->executing = 1; - return 1; -} - -static void -scanner_end(ScannerObject* self) -{ - assert(self->executing); - self->executing = 0; -} - -/*[clinic input] -_sre.SRE_Scanner.match - - cls: defining_class - / - -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls) -/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/ -{ - _sremodulestate *module_state = get_sre_module_state_by_class(cls); - SRE_STATE* state = &self->state; - PyObject* match; - Py_ssize_t status; - - if (!scanner_begin(self)) { - return NULL; - } - if (state->start == NULL) { - scanner_end(self); - Py_RETURN_NONE; - } - - state_reset(state); - - state->ptr = state->start; - - status = sre_match(state, PatternObject_GetCode(self->pattern)); - if (PyErr_Occurred()) { - scanner_end(self); - return NULL; - } - - match = pattern_new_match(module_state, (PatternObject*) self->pattern, - state, status); - - if (status == 0) - state->start = NULL; - else { - state->must_advance = (state->ptr == state->start); - state->start = state->ptr; - } - - scanner_end(self); - return match; -} - - -/*[clinic input] -_sre.SRE_Scanner.search - - cls: defining_class - / - -[clinic start generated code]*/ - -static PyObject * -_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls) -/*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/ -{ - _sremodulestate *module_state = get_sre_module_state_by_class(cls); - SRE_STATE* state = &self->state; - PyObject* match; - Py_ssize_t status; - - if (!scanner_begin(self)) { - return NULL; - } - if (state->start == NULL) { - scanner_end(self); - Py_RETURN_NONE; - } - - state_reset(state); - - state->ptr = state->start; - - status = sre_search(state, PatternObject_GetCode(self->pattern)); - if (PyErr_Occurred()) { - scanner_end(self); - return NULL; - } - - match = pattern_new_match(module_state, (PatternObject*) self->pattern, - state, status); - - if (status == 0) - state->start = NULL; - else { - state->must_advance = (state->ptr == state->start); - state->start = state->ptr; - } - - scanner_end(self); - return match; -} - -static PyObject * -pattern_scanner(_sremodulestate *module_state, - PatternObject *self, - PyObject *string, - Py_ssize_t pos, - Py_ssize_t endpos) -{ - ScannerObject* scanner; - - /* create scanner object */ - scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type); - if (!scanner) - return NULL; - scanner->pattern = NULL; - scanner->executing = 0; - - /* create search state object */ - if (!state_init(&scanner->state, self, string, pos, endpos)) { - Py_DECREF(scanner); - return NULL; - } - - Py_INCREF(self); - scanner->pattern = (PyObject*) self; - - PyObject_GC_Track(scanner); - return (PyObject*) scanner; -} - -static Py_hash_t -pattern_hash(PatternObject *self) -{ - Py_hash_t hash, hash2; - - hash = PyObject_Hash(self->pattern); - if (hash == -1) { - return -1; - } - - hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize); - hash ^= hash2; - - hash ^= self->flags; - hash ^= self->isbytes; - hash ^= self->codesize; - - if (hash == -1) { - hash = -2; - } - return hash; -} - -static PyObject* -pattern_richcompare(PyObject *lefto, PyObject *righto, int op) -{ - PyTypeObject *tp = Py_TYPE(lefto); - _sremodulestate *module_state = get_sre_module_state_by_class(tp); - PatternObject *left, *right; - int cmp; - - if (op != Py_EQ && op != Py_NE) { - Py_RETURN_NOTIMPLEMENTED; - } - - if (!Py_IS_TYPE(righto, module_state->Pattern_Type)) - { - Py_RETURN_NOTIMPLEMENTED; - } - - if (lefto == righto) { - /* a pattern is equal to itself */ - return PyBool_FromLong(op == Py_EQ); - } - - left = (PatternObject *)lefto; - right = (PatternObject *)righto; - - cmp = (left->flags == right->flags - && left->isbytes == right->isbytes - && left->codesize == right->codesize); - if (cmp) { - /* Compare the code and the pattern because the same pattern can - produce different codes depending on the locale used to compile the - pattern when the re.LOCALE flag is used. Don't compare groups, - indexgroup nor groupindex: they are derivated from the pattern. */ - cmp = (memcmp(left->code, right->code, - sizeof(left->code[0]) * left->codesize) == 0); - } - if (cmp) { - cmp = PyObject_RichCompareBool(left->pattern, right->pattern, - Py_EQ); - if (cmp < 0) { - return NULL; - } - } - if (op == Py_NE) { - cmp = !cmp; - } - return PyBool_FromLong(cmp); -} - -#include "clinic/_sre.c.h" - -static PyMethodDef pattern_methods[] = { - _SRE_SRE_PATTERN_MATCH_METHODDEF - _SRE_SRE_PATTERN_FULLMATCH_METHODDEF - _SRE_SRE_PATTERN_SEARCH_METHODDEF - _SRE_SRE_PATTERN_SUB_METHODDEF - _SRE_SRE_PATTERN_SUBN_METHODDEF - _SRE_SRE_PATTERN_FINDALL_METHODDEF - _SRE_SRE_PATTERN_SPLIT_METHODDEF - _SRE_SRE_PATTERN_FINDITER_METHODDEF - _SRE_SRE_PATTERN_SCANNER_METHODDEF - _SRE_SRE_PATTERN___COPY___METHODDEF - _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF - {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, - PyDoc_STR("See PEP 585")}, - {NULL, NULL} -}; - -static PyGetSetDef pattern_getset[] = { - {"groupindex", (getter)pattern_groupindex, (setter)NULL, - "A dictionary mapping group names to group numbers."}, - {NULL} /* Sentinel */ -}; - -#define PAT_OFF(x) offsetof(PatternObject, x) -static PyMemberDef pattern_members[] = { - {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY, - "The pattern string from which the RE object was compiled."}, - {"flags", T_INT, PAT_OFF(flags), READONLY, - "The regex matching flags."}, - {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY, - "The number of capturing groups in the pattern."}, - {"__weaklistoffset__", T_PYSSIZET, offsetof(PatternObject, weakreflist), READONLY}, - {NULL} /* Sentinel */ -}; - -static PyType_Slot pattern_slots[] = { - {Py_tp_dealloc, (destructor)pattern_dealloc}, - {Py_tp_repr, (reprfunc)pattern_repr}, - {Py_tp_hash, (hashfunc)pattern_hash}, - {Py_tp_doc, (void *)pattern_doc}, - {Py_tp_richcompare, pattern_richcompare}, - {Py_tp_methods, pattern_methods}, - {Py_tp_members, pattern_members}, - {Py_tp_getset, pattern_getset}, - {Py_tp_traverse, pattern_traverse}, - {Py_tp_clear, pattern_clear}, - {0, NULL}, -}; - -static PyType_Spec pattern_spec = { - .name = "re.Pattern", - .basicsize = sizeof(PatternObject), - .itemsize = sizeof(SRE_CODE), - .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | - Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), - .slots = pattern_slots, -}; - -static PyMethodDef match_methods[] = { - {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc}, - _SRE_SRE_MATCH_START_METHODDEF - _SRE_SRE_MATCH_END_METHODDEF - _SRE_SRE_MATCH_SPAN_METHODDEF - _SRE_SRE_MATCH_GROUPS_METHODDEF - _SRE_SRE_MATCH_GROUPDICT_METHODDEF - _SRE_SRE_MATCH_EXPAND_METHODDEF - _SRE_SRE_MATCH___COPY___METHODDEF - _SRE_SRE_MATCH___DEEPCOPY___METHODDEF - {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, - PyDoc_STR("See PEP 585")}, - {NULL, NULL} -}; - -static PyGetSetDef match_getset[] = { - {"lastindex", (getter)match_lastindex_get, (setter)NULL, - "The integer index of the last matched capturing group."}, - {"lastgroup", (getter)match_lastgroup_get, (setter)NULL, - "The name of the last matched capturing group."}, - {"regs", (getter)match_regs_get, (setter)NULL}, - {NULL} -}; - -#define MATCH_OFF(x) offsetof(MatchObject, x) -static PyMemberDef match_members[] = { - {"string", T_OBJECT, MATCH_OFF(string), READONLY, - "The string passed to match() or search()."}, - {"re", T_OBJECT, MATCH_OFF(pattern), READONLY, - "The regular expression object."}, - {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY, - "The index into the string at which the RE engine started looking for a match."}, - {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY, - "The index into the string beyond which the RE engine will not go."}, - {NULL} -}; - -/* FIXME: implement setattr("string", None) as a special case (to - detach the associated string, if any */ -static PyType_Slot match_slots[] = { - {Py_tp_dealloc, match_dealloc}, - {Py_tp_repr, match_repr}, - {Py_tp_doc, (void *)match_doc}, - {Py_tp_methods, match_methods}, - {Py_tp_members, match_members}, - {Py_tp_getset, match_getset}, - {Py_tp_traverse, match_traverse}, - {Py_tp_clear, match_clear}, - - /* As mapping. - * - * Match objects do not support length or assignment, but do support - * __getitem__. - */ - {Py_mp_subscript, match_getitem}, - - {0, NULL}, -}; - -static PyType_Spec match_spec = { - .name = "re.Match", - .basicsize = sizeof(MatchObject), - .itemsize = sizeof(Py_ssize_t), - .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | - Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), - .slots = match_slots, -}; - -static PyMethodDef scanner_methods[] = { - _SRE_SRE_SCANNER_MATCH_METHODDEF - _SRE_SRE_SCANNER_SEARCH_METHODDEF - {NULL, NULL} -}; - -#define SCAN_OFF(x) offsetof(ScannerObject, x) -static PyMemberDef scanner_members[] = { - {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY}, - {NULL} /* Sentinel */ -}; - -static PyType_Slot scanner_slots[] = { - {Py_tp_dealloc, scanner_dealloc}, - {Py_tp_methods, scanner_methods}, - {Py_tp_members, scanner_members}, - {Py_tp_traverse, scanner_traverse}, - {Py_tp_clear, scanner_clear}, - {0, NULL}, -}; - -static PyType_Spec scanner_spec = { - .name = "_" SRE_MODULE ".SRE_Scanner", - .basicsize = sizeof(ScannerObject), - .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | - Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), - .slots = scanner_slots, -}; - -static PyMethodDef _functions[] = { - _SRE_COMPILE_METHODDEF - _SRE_GETCODESIZE_METHODDEF - _SRE_ASCII_ISCASED_METHODDEF - _SRE_UNICODE_ISCASED_METHODDEF - _SRE_ASCII_TOLOWER_METHODDEF - _SRE_UNICODE_TOLOWER_METHODDEF - {NULL, NULL} -}; - -static int -sre_traverse(PyObject *module, visitproc visit, void *arg) -{ - _sremodulestate *state = get_sre_module_state(module); - - Py_VISIT(state->Pattern_Type); - Py_VISIT(state->Match_Type); - Py_VISIT(state->Scanner_Type); - - return 0; -} - -static int -sre_clear(PyObject *module) -{ - _sremodulestate *state = get_sre_module_state(module); - - Py_CLEAR(state->Pattern_Type); - Py_CLEAR(state->Match_Type); - Py_CLEAR(state->Scanner_Type); - - return 0; -} - -static void -sre_free(void *module) -{ - sre_clear((PyObject *)module); -} - -#define CREATE_TYPE(m, type, spec) \ -do { \ - type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \ - if (type == NULL) { \ - goto error; \ - } \ -} while (0) - -#define ADD_ULONG_CONSTANT(module, name, value) \ - do { \ - PyObject *o = PyLong_FromUnsignedLong(value); \ - if (!o) \ - goto error; \ - int res = PyModule_AddObjectRef(module, name, o); \ - Py_DECREF(o); \ - if (res < 0) { \ - goto error; \ - } \ -} while (0) - -static int -sre_exec(PyObject *m) -{ - _sremodulestate *state; - - /* Create heap types */ - state = get_sre_module_state(m); - CREATE_TYPE(m, state->Pattern_Type, &pattern_spec); - CREATE_TYPE(m, state->Match_Type, &match_spec); - CREATE_TYPE(m, state->Scanner_Type, &scanner_spec); - - if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) { - goto error; - } - - if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) { - goto error; - } - - ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT); - ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS); - - if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) { - goto error; - } - - return 0; - -error: - return -1; -} - -static PyModuleDef_Slot sre_slots[] = { - {Py_mod_exec, sre_exec}, - {0, NULL}, -}; - -static struct PyModuleDef sremodule = { - .m_base = PyModuleDef_HEAD_INIT, - .m_name = "_" SRE_MODULE, - .m_size = sizeof(_sremodulestate), - .m_methods = _functions, - .m_slots = sre_slots, - .m_traverse = sre_traverse, - .m_free = sre_free, - .m_clear = sre_clear, -}; - -PyMODINIT_FUNC -PyInit__sre(void) -{ - return PyModuleDef_Init(&sremodule); -} - -/* vim:ts=4:sw=4:et -*/ diff --git a/Modules/_sre/clinic/sre.c.h b/Modules/_sre/clinic/sre.c.h new file mode 100644 index 0000000..34cbe21 --- /dev/null +++ b/Modules/_sre/clinic/sre.c.h @@ -0,0 +1,926 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +PyDoc_STRVAR(_sre_getcodesize__doc__, +"getcodesize($module, /)\n" +"--\n" +"\n"); + +#define _SRE_GETCODESIZE_METHODDEF \ + {"getcodesize", (PyCFunction)_sre_getcodesize, METH_NOARGS, _sre_getcodesize__doc__}, + +static int +_sre_getcodesize_impl(PyObject *module); + +static PyObject * +_sre_getcodesize(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + int _return_value; + + _return_value = _sre_getcodesize_impl(module); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromLong((long)_return_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_ascii_iscased__doc__, +"ascii_iscased($module, character, /)\n" +"--\n" +"\n"); + +#define _SRE_ASCII_ISCASED_METHODDEF \ + {"ascii_iscased", (PyCFunction)_sre_ascii_iscased, METH_O, _sre_ascii_iscased__doc__}, + +static int +_sre_ascii_iscased_impl(PyObject *module, int character); + +static PyObject * +_sre_ascii_iscased(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int character; + int _return_value; + + character = _PyLong_AsInt(arg); + if (character == -1 && PyErr_Occurred()) { + goto exit; + } + _return_value = _sre_ascii_iscased_impl(module, character); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyBool_FromLong((long)_return_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_unicode_iscased__doc__, +"unicode_iscased($module, character, /)\n" +"--\n" +"\n"); + +#define _SRE_UNICODE_ISCASED_METHODDEF \ + {"unicode_iscased", (PyCFunction)_sre_unicode_iscased, METH_O, _sre_unicode_iscased__doc__}, + +static int +_sre_unicode_iscased_impl(PyObject *module, int character); + +static PyObject * +_sre_unicode_iscased(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int character; + int _return_value; + + character = _PyLong_AsInt(arg); + if (character == -1 && PyErr_Occurred()) { + goto exit; + } + _return_value = _sre_unicode_iscased_impl(module, character); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyBool_FromLong((long)_return_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_ascii_tolower__doc__, +"ascii_tolower($module, character, /)\n" +"--\n" +"\n"); + +#define _SRE_ASCII_TOLOWER_METHODDEF \ + {"ascii_tolower", (PyCFunction)_sre_ascii_tolower, METH_O, _sre_ascii_tolower__doc__}, + +static int +_sre_ascii_tolower_impl(PyObject *module, int character); + +static PyObject * +_sre_ascii_tolower(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int character; + int _return_value; + + character = _PyLong_AsInt(arg); + if (character == -1 && PyErr_Occurred()) { + goto exit; + } + _return_value = _sre_ascii_tolower_impl(module, character); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromLong((long)_return_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_unicode_tolower__doc__, +"unicode_tolower($module, character, /)\n" +"--\n" +"\n"); + +#define _SRE_UNICODE_TOLOWER_METHODDEF \ + {"unicode_tolower", (PyCFunction)_sre_unicode_tolower, METH_O, _sre_unicode_tolower__doc__}, + +static int +_sre_unicode_tolower_impl(PyObject *module, int character); + +static PyObject * +_sre_unicode_tolower(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int character; + int _return_value; + + character = _PyLong_AsInt(arg); + if (character == -1 && PyErr_Occurred()) { + goto exit; + } + _return_value = _sre_unicode_tolower_impl(module, character); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromLong((long)_return_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_match__doc__, +"match($self, /, string, pos=0, endpos=sys.maxsize)\n" +"--\n" +"\n" +"Matches zero or more characters at the beginning of the string."); + +#define _SRE_SRE_PATTERN_MATCH_METHODDEF \ + {"match", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_match, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_match__doc__}, + +static PyObject * +_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos); + +static PyObject * +_sre_SRE_Pattern_match(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; + static _PyArg_Parser _parser = {"O|nn:match", _keywords, 0}; + PyObject *string; + Py_ssize_t pos = 0; + Py_ssize_t endpos = PY_SSIZE_T_MAX; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &string, &pos, &endpos)) { + goto exit; + } + return_value = _sre_SRE_Pattern_match_impl(self, cls, string, pos, endpos); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_fullmatch__doc__, +"fullmatch($self, /, string, pos=0, endpos=sys.maxsize)\n" +"--\n" +"\n" +"Matches against all of the string."); + +#define _SRE_SRE_PATTERN_FULLMATCH_METHODDEF \ + {"fullmatch", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_fullmatch, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_fullmatch__doc__}, + +static PyObject * +_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos); + +static PyObject * +_sre_SRE_Pattern_fullmatch(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; + static _PyArg_Parser _parser = {"O|nn:fullmatch", _keywords, 0}; + PyObject *string; + Py_ssize_t pos = 0; + Py_ssize_t endpos = PY_SSIZE_T_MAX; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &string, &pos, &endpos)) { + goto exit; + } + return_value = _sre_SRE_Pattern_fullmatch_impl(self, cls, string, pos, endpos); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_search__doc__, +"search($self, /, string, pos=0, endpos=sys.maxsize)\n" +"--\n" +"\n" +"Scan through string looking for a match, and return a corresponding match object instance.\n" +"\n" +"Return None if no position in the string matches."); + +#define _SRE_SRE_PATTERN_SEARCH_METHODDEF \ + {"search", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_search, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_search__doc__}, + +static PyObject * +_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos); + +static PyObject * +_sre_SRE_Pattern_search(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; + static _PyArg_Parser _parser = {"O|nn:search", _keywords, 0}; + PyObject *string; + Py_ssize_t pos = 0; + Py_ssize_t endpos = PY_SSIZE_T_MAX; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &string, &pos, &endpos)) { + goto exit; + } + return_value = _sre_SRE_Pattern_search_impl(self, cls, string, pos, endpos); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_findall__doc__, +"findall($self, /, string, pos=0, endpos=sys.maxsize)\n" +"--\n" +"\n" +"Return a list of all non-overlapping matches of pattern in string."); + +#define _SRE_SRE_PATTERN_FINDALL_METHODDEF \ + {"findall", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_findall, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_findall__doc__}, + +static PyObject * +_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, + Py_ssize_t pos, Py_ssize_t endpos); + +static PyObject * +_sre_SRE_Pattern_findall(PatternObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "findall", 0}; + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + PyObject *string; + Py_ssize_t pos = 0; + Py_ssize_t endpos = PY_SSIZE_T_MAX; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); + if (!args) { + goto exit; + } + string = args[0]; + if (!noptargs) { + goto skip_optional_pos; + } + if (args[1]) { + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[1]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + pos = ival; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[2]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + endpos = ival; + } +skip_optional_pos: + return_value = _sre_SRE_Pattern_findall_impl(self, string, pos, endpos); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_finditer__doc__, +"finditer($self, /, string, pos=0, endpos=sys.maxsize)\n" +"--\n" +"\n" +"Return an iterator over all non-overlapping matches for the RE pattern in string.\n" +"\n" +"For each match, the iterator returns a match object."); + +#define _SRE_SRE_PATTERN_FINDITER_METHODDEF \ + {"finditer", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_finditer, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_finditer__doc__}, + +static PyObject * +_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos); + +static PyObject * +_sre_SRE_Pattern_finditer(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; + static _PyArg_Parser _parser = {"O|nn:finditer", _keywords, 0}; + PyObject *string; + Py_ssize_t pos = 0; + Py_ssize_t endpos = PY_SSIZE_T_MAX; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &string, &pos, &endpos)) { + goto exit; + } + return_value = _sre_SRE_Pattern_finditer_impl(self, cls, string, pos, endpos); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_scanner__doc__, +"scanner($self, /, string, pos=0, endpos=sys.maxsize)\n" +"--\n" +"\n"); + +#define _SRE_SRE_PATTERN_SCANNER_METHODDEF \ + {"scanner", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_scanner, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_scanner__doc__}, + +static PyObject * +_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos); + +static PyObject * +_sre_SRE_Pattern_scanner(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; + static _PyArg_Parser _parser = {"O|nn:scanner", _keywords, 0}; + PyObject *string; + Py_ssize_t pos = 0; + Py_ssize_t endpos = PY_SSIZE_T_MAX; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &string, &pos, &endpos)) { + goto exit; + } + return_value = _sre_SRE_Pattern_scanner_impl(self, cls, string, pos, endpos); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_split__doc__, +"split($self, /, string, maxsplit=0)\n" +"--\n" +"\n" +"Split string by the occurrences of pattern."); + +#define _SRE_SRE_PATTERN_SPLIT_METHODDEF \ + {"split", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_split, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_split__doc__}, + +static PyObject * +_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, + Py_ssize_t maxsplit); + +static PyObject * +_sre_SRE_Pattern_split(PatternObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"string", "maxsplit", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "split", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + PyObject *string; + Py_ssize_t maxsplit = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf); + if (!args) { + goto exit; + } + string = args[0]; + if (!noptargs) { + goto skip_optional_pos; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[1]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + maxsplit = ival; + } +skip_optional_pos: + return_value = _sre_SRE_Pattern_split_impl(self, string, maxsplit); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_sub__doc__, +"sub($self, /, repl, string, count=0)\n" +"--\n" +"\n" +"Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl."); + +#define _SRE_SRE_PATTERN_SUB_METHODDEF \ + {"sub", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_sub, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_sub__doc__}, + +static PyObject * +_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls, + PyObject *repl, PyObject *string, Py_ssize_t count); + +static PyObject * +_sre_SRE_Pattern_sub(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"repl", "string", "count", NULL}; + static _PyArg_Parser _parser = {"OO|n:sub", _keywords, 0}; + PyObject *repl; + PyObject *string; + Py_ssize_t count = 0; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &repl, &string, &count)) { + goto exit; + } + return_value = _sre_SRE_Pattern_sub_impl(self, cls, repl, string, count); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_subn__doc__, +"subn($self, /, repl, string, count=0)\n" +"--\n" +"\n" +"Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl."); + +#define _SRE_SRE_PATTERN_SUBN_METHODDEF \ + {"subn", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_subn, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_subn__doc__}, + +static PyObject * +_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls, + PyObject *repl, PyObject *string, + Py_ssize_t count); + +static PyObject * +_sre_SRE_Pattern_subn(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"repl", "string", "count", NULL}; + static _PyArg_Parser _parser = {"OO|n:subn", _keywords, 0}; + PyObject *repl; + PyObject *string; + Py_ssize_t count = 0; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &repl, &string, &count)) { + goto exit; + } + return_value = _sre_SRE_Pattern_subn_impl(self, cls, repl, string, count); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern___copy____doc__, +"__copy__($self, /)\n" +"--\n" +"\n"); + +#define _SRE_SRE_PATTERN___COPY___METHODDEF \ + {"__copy__", (PyCFunction)_sre_SRE_Pattern___copy__, METH_NOARGS, _sre_SRE_Pattern___copy____doc__}, + +static PyObject * +_sre_SRE_Pattern___copy___impl(PatternObject *self); + +static PyObject * +_sre_SRE_Pattern___copy__(PatternObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _sre_SRE_Pattern___copy___impl(self); +} + +PyDoc_STRVAR(_sre_SRE_Pattern___deepcopy____doc__, +"__deepcopy__($self, memo, /)\n" +"--\n" +"\n"); + +#define _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF \ + {"__deepcopy__", (PyCFunction)_sre_SRE_Pattern___deepcopy__, METH_O, _sre_SRE_Pattern___deepcopy____doc__}, + +PyDoc_STRVAR(_sre_compile__doc__, +"compile($module, /, pattern, flags, code, groups, groupindex,\n" +" indexgroup, repeat_count)\n" +"--\n" +"\n"); + +#define _SRE_COMPILE_METHODDEF \ + {"compile", (PyCFunction)(void(*)(void))_sre_compile, METH_FASTCALL|METH_KEYWORDS, _sre_compile__doc__}, + +static PyObject * +_sre_compile_impl(PyObject *module, PyObject *pattern, int flags, + PyObject *code, Py_ssize_t groups, PyObject *groupindex, + PyObject *indexgroup, Py_ssize_t repeat_count); + +static PyObject * +_sre_compile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"pattern", "flags", "code", "groups", "groupindex", "indexgroup", "repeat_count", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "compile", 0}; + PyObject *argsbuf[7]; + PyObject *pattern; + int flags; + PyObject *code; + Py_ssize_t groups; + PyObject *groupindex; + PyObject *indexgroup; + Py_ssize_t repeat_count; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 7, 7, 0, argsbuf); + if (!args) { + goto exit; + } + pattern = args[0]; + flags = _PyLong_AsInt(args[1]); + if (flags == -1 && PyErr_Occurred()) { + goto exit; + } + if (!PyList_Check(args[2])) { + _PyArg_BadArgument("compile", "argument 'code'", "list", args[2]); + goto exit; + } + code = args[2]; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[3]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + groups = ival; + } + if (!PyDict_Check(args[4])) { + _PyArg_BadArgument("compile", "argument 'groupindex'", "dict", args[4]); + goto exit; + } + groupindex = args[4]; + if (!PyTuple_Check(args[5])) { + _PyArg_BadArgument("compile", "argument 'indexgroup'", "tuple", args[5]); + goto exit; + } + indexgroup = args[5]; + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[6]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + repeat_count = ival; + } + return_value = _sre_compile_impl(module, pattern, flags, code, groups, groupindex, indexgroup, repeat_count); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match_expand__doc__, +"expand($self, /, template)\n" +"--\n" +"\n" +"Return the string obtained by doing backslash substitution on the string template, as done by the sub() method."); + +#define _SRE_SRE_MATCH_EXPAND_METHODDEF \ + {"expand", (PyCFunction)(void(*)(void))_sre_SRE_Match_expand, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Match_expand__doc__}, + +static PyObject * +_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template); + +static PyObject * +_sre_SRE_Match_expand(MatchObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"template", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "expand", 0}; + PyObject *argsbuf[1]; + PyObject *template; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + template = args[0]; + return_value = _sre_SRE_Match_expand_impl(self, template); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match_groups__doc__, +"groups($self, /, default=None)\n" +"--\n" +"\n" +"Return a tuple containing all the subgroups of the match, from 1.\n" +"\n" +" default\n" +" Is used for groups that did not participate in the match."); + +#define _SRE_SRE_MATCH_GROUPS_METHODDEF \ + {"groups", (PyCFunction)(void(*)(void))_sre_SRE_Match_groups, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Match_groups__doc__}, + +static PyObject * +_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value); + +static PyObject * +_sre_SRE_Match_groups(MatchObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"default", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "groups", 0}; + PyObject *argsbuf[1]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + PyObject *default_value = Py_None; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + default_value = args[0]; +skip_optional_pos: + return_value = _sre_SRE_Match_groups_impl(self, default_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match_groupdict__doc__, +"groupdict($self, /, default=None)\n" +"--\n" +"\n" +"Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.\n" +"\n" +" default\n" +" Is used for groups that did not participate in the match."); + +#define _SRE_SRE_MATCH_GROUPDICT_METHODDEF \ + {"groupdict", (PyCFunction)(void(*)(void))_sre_SRE_Match_groupdict, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Match_groupdict__doc__}, + +static PyObject * +_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value); + +static PyObject * +_sre_SRE_Match_groupdict(MatchObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"default", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "groupdict", 0}; + PyObject *argsbuf[1]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + PyObject *default_value = Py_None; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + default_value = args[0]; +skip_optional_pos: + return_value = _sre_SRE_Match_groupdict_impl(self, default_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match_start__doc__, +"start($self, group=0, /)\n" +"--\n" +"\n" +"Return index of the start of the substring matched by group."); + +#define _SRE_SRE_MATCH_START_METHODDEF \ + {"start", (PyCFunction)(void(*)(void))_sre_SRE_Match_start, METH_FASTCALL, _sre_SRE_Match_start__doc__}, + +static Py_ssize_t +_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group); + +static PyObject * +_sre_SRE_Match_start(MatchObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *group = NULL; + Py_ssize_t _return_value; + + if (!_PyArg_CheckPositional("start", nargs, 0, 1)) { + goto exit; + } + if (nargs < 1) { + goto skip_optional; + } + group = args[0]; +skip_optional: + _return_value = _sre_SRE_Match_start_impl(self, group); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromSsize_t(_return_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match_end__doc__, +"end($self, group=0, /)\n" +"--\n" +"\n" +"Return index of the end of the substring matched by group."); + +#define _SRE_SRE_MATCH_END_METHODDEF \ + {"end", (PyCFunction)(void(*)(void))_sre_SRE_Match_end, METH_FASTCALL, _sre_SRE_Match_end__doc__}, + +static Py_ssize_t +_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group); + +static PyObject * +_sre_SRE_Match_end(MatchObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *group = NULL; + Py_ssize_t _return_value; + + if (!_PyArg_CheckPositional("end", nargs, 0, 1)) { + goto exit; + } + if (nargs < 1) { + goto skip_optional; + } + group = args[0]; +skip_optional: + _return_value = _sre_SRE_Match_end_impl(self, group); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromSsize_t(_return_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match_span__doc__, +"span($self, group=0, /)\n" +"--\n" +"\n" +"For match object m, return the 2-tuple (m.start(group), m.end(group))."); + +#define _SRE_SRE_MATCH_SPAN_METHODDEF \ + {"span", (PyCFunction)(void(*)(void))_sre_SRE_Match_span, METH_FASTCALL, _sre_SRE_Match_span__doc__}, + +static PyObject * +_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group); + +static PyObject * +_sre_SRE_Match_span(MatchObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *group = NULL; + + if (!_PyArg_CheckPositional("span", nargs, 0, 1)) { + goto exit; + } + if (nargs < 1) { + goto skip_optional; + } + group = args[0]; +skip_optional: + return_value = _sre_SRE_Match_span_impl(self, group); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match___copy____doc__, +"__copy__($self, /)\n" +"--\n" +"\n"); + +#define _SRE_SRE_MATCH___COPY___METHODDEF \ + {"__copy__", (PyCFunction)_sre_SRE_Match___copy__, METH_NOARGS, _sre_SRE_Match___copy____doc__}, + +static PyObject * +_sre_SRE_Match___copy___impl(MatchObject *self); + +static PyObject * +_sre_SRE_Match___copy__(MatchObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _sre_SRE_Match___copy___impl(self); +} + +PyDoc_STRVAR(_sre_SRE_Match___deepcopy____doc__, +"__deepcopy__($self, memo, /)\n" +"--\n" +"\n"); + +#define _SRE_SRE_MATCH___DEEPCOPY___METHODDEF \ + {"__deepcopy__", (PyCFunction)_sre_SRE_Match___deepcopy__, METH_O, _sre_SRE_Match___deepcopy____doc__}, + +PyDoc_STRVAR(_sre_SRE_Scanner_match__doc__, +"match($self, /)\n" +"--\n" +"\n"); + +#define _SRE_SRE_SCANNER_MATCH_METHODDEF \ + {"match", (PyCFunction)(void(*)(void))_sre_SRE_Scanner_match, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Scanner_match__doc__}, + +static PyObject * +_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls); + +static PyObject * +_sre_SRE_Scanner_match(ScannerObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = { NULL}; + static _PyArg_Parser _parser = {":match", _keywords, 0}; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser + )) { + goto exit; + } + return_value = _sre_SRE_Scanner_match_impl(self, cls); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Scanner_search__doc__, +"search($self, /)\n" +"--\n" +"\n"); + +#define _SRE_SRE_SCANNER_SEARCH_METHODDEF \ + {"search", (PyCFunction)(void(*)(void))_sre_SRE_Scanner_search, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Scanner_search__doc__}, + +static PyObject * +_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls); + +static PyObject * +_sre_SRE_Scanner_search(ScannerObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = { NULL}; + static _PyArg_Parser _parser = {":search", _keywords, 0}; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser + )) { + goto exit; + } + return_value = _sre_SRE_Scanner_search_impl(self, cls); + +exit: + return return_value; +} +/*[clinic end generated code: output=9d7510a57a157a38 input=a9049054013a1b77]*/ diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c new file mode 100644 index 0000000..491734f --- /dev/null +++ b/Modules/_sre/sre.c @@ -0,0 +1,3063 @@ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * partial history: + * 1999-10-24 fl created (based on existing template matcher code) + * 2000-03-06 fl first alpha, sort of + * 2000-08-01 fl fixes for 1.6b1 + * 2000-08-07 fl use PyOS_CheckStack() if available + * 2000-09-20 fl added expand method + * 2001-03-20 fl lots of fixes for 2.1b2 + * 2001-04-15 fl export copyright as Python attribute, not global + * 2001-04-28 fl added __copy__ methods (work in progress) + * 2001-05-14 fl fixes for 1.5.2 compatibility + * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) + * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) + * 2001-10-20 fl added split primitive; re-enable unicode for 1.6/2.0/2.1 + * 2001-10-21 fl added sub/subn primitive + * 2001-10-24 fl added finditer primitive (for 2.2 only) + * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) + * 2002-11-09 fl fixed empty sub/subn return type + * 2003-04-18 mvl fully support 4-byte codes + * 2003-10-17 gn implemented non recursive scheme + * 2013-02-04 mrab added fullmatch primitive + * + * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. + * + * This version of the SRE library can be redistributed under CNRI's + * Python 1.6 license. For any other use, please contact Secret Labs + * AB (info@pythonware.com). + * + * Portions of this engine have been developed in cooperation with + * CNRI. Hewlett-Packard provided funding for 1.6 integration and + * other compatibility work. + */ + +static const char copyright[] = + " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB "; + +#define PY_SSIZE_T_CLEAN + +#include "Python.h" +#include "pycore_long.h" // _PyLong_GetZero() +#include "pycore_moduleobject.h" // _PyModule_GetState() +#include "structmember.h" // PyMemberDef + +#include "sre.h" + +#define SRE_CODE_BITS (8 * sizeof(SRE_CODE)) + +#include + +/* name of this module, minus the leading underscore */ +#if !defined(SRE_MODULE) +#define SRE_MODULE "sre" +#endif + +#define SRE_PY_MODULE "re" + +/* defining this one enables tracing */ +#undef VERBOSE + +/* -------------------------------------------------------------------- */ + +#if defined(_MSC_VER) +#pragma optimize("agtw", on) /* doesn't seem to make much difference... */ +#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */ +/* fastest possible local call under MSVC */ +#define LOCAL(type) static __inline type __fastcall +#else +#define LOCAL(type) static inline type +#endif + +/* error codes */ +#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */ +#define SRE_ERROR_STATE -2 /* illegal state */ +#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */ +#define SRE_ERROR_MEMORY -9 /* out of memory */ +#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */ + +#if defined(VERBOSE) +#define TRACE(v) printf v +#else +#define TRACE(v) +#endif + +/* -------------------------------------------------------------------- */ +/* search engine state */ + +#define SRE_IS_DIGIT(ch)\ + ((ch) <= '9' && Py_ISDIGIT(ch)) +#define SRE_IS_SPACE(ch)\ + ((ch) <= ' ' && Py_ISSPACE(ch)) +#define SRE_IS_LINEBREAK(ch)\ + ((ch) == '\n') +#define SRE_IS_WORD(ch)\ + ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_')) + +static unsigned int sre_lower_ascii(unsigned int ch) +{ + return ((ch) < 128 ? Py_TOLOWER(ch) : ch); +} + +/* locale-specific character predicates */ +/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids + * warnings when c's type supports only numbers < N+1 */ +#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0) +#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_') + +static unsigned int sre_lower_locale(unsigned int ch) +{ + return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch); +} + +static unsigned int sre_upper_locale(unsigned int ch) +{ + return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch); +} + +/* unicode-specific character predicates */ + +#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch) +#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch) +#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch) +#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch) +#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_') + +static unsigned int sre_lower_unicode(unsigned int ch) +{ + return (unsigned int) Py_UNICODE_TOLOWER(ch); +} + +static unsigned int sre_upper_unicode(unsigned int ch) +{ + return (unsigned int) Py_UNICODE_TOUPPER(ch); +} + +LOCAL(int) +sre_category(SRE_CODE category, unsigned int ch) +{ + switch (category) { + + case SRE_CATEGORY_DIGIT: + return SRE_IS_DIGIT(ch); + case SRE_CATEGORY_NOT_DIGIT: + return !SRE_IS_DIGIT(ch); + case SRE_CATEGORY_SPACE: + return SRE_IS_SPACE(ch); + case SRE_CATEGORY_NOT_SPACE: + return !SRE_IS_SPACE(ch); + case SRE_CATEGORY_WORD: + return SRE_IS_WORD(ch); + case SRE_CATEGORY_NOT_WORD: + return !SRE_IS_WORD(ch); + case SRE_CATEGORY_LINEBREAK: + return SRE_IS_LINEBREAK(ch); + case SRE_CATEGORY_NOT_LINEBREAK: + return !SRE_IS_LINEBREAK(ch); + + case SRE_CATEGORY_LOC_WORD: + return SRE_LOC_IS_WORD(ch); + case SRE_CATEGORY_LOC_NOT_WORD: + return !SRE_LOC_IS_WORD(ch); + + case SRE_CATEGORY_UNI_DIGIT: + return SRE_UNI_IS_DIGIT(ch); + case SRE_CATEGORY_UNI_NOT_DIGIT: + return !SRE_UNI_IS_DIGIT(ch); + case SRE_CATEGORY_UNI_SPACE: + return SRE_UNI_IS_SPACE(ch); + case SRE_CATEGORY_UNI_NOT_SPACE: + return !SRE_UNI_IS_SPACE(ch); + case SRE_CATEGORY_UNI_WORD: + return SRE_UNI_IS_WORD(ch); + case SRE_CATEGORY_UNI_NOT_WORD: + return !SRE_UNI_IS_WORD(ch); + case SRE_CATEGORY_UNI_LINEBREAK: + return SRE_UNI_IS_LINEBREAK(ch); + case SRE_CATEGORY_UNI_NOT_LINEBREAK: + return !SRE_UNI_IS_LINEBREAK(ch); + } + return 0; +} + +LOCAL(int) +char_loc_ignore(SRE_CODE pattern, SRE_CODE ch) +{ + return ch == pattern + || (SRE_CODE) sre_lower_locale(ch) == pattern + || (SRE_CODE) sre_upper_locale(ch) == pattern; +} + + +/* helpers */ + +static void +data_stack_dealloc(SRE_STATE* state) +{ + if (state->data_stack) { + PyMem_Free(state->data_stack); + state->data_stack = NULL; + } + state->data_stack_size = state->data_stack_base = 0; +} + +static int +data_stack_grow(SRE_STATE* state, Py_ssize_t size) +{ + Py_ssize_t minsize, cursize; + minsize = state->data_stack_base+size; + cursize = state->data_stack_size; + if (cursize < minsize) { + void* stack; + cursize = minsize+minsize/4+1024; + TRACE(("allocate/grow stack %zd\n", cursize)); + stack = PyMem_Realloc(state->data_stack, cursize); + if (!stack) { + data_stack_dealloc(state); + return SRE_ERROR_MEMORY; + } + state->data_stack = (char *)stack; + state->data_stack_size = cursize; + } + return 0; +} + +/* generate 8-bit version */ + +#define SRE_CHAR Py_UCS1 +#define SIZEOF_SRE_CHAR 1 +#define SRE(F) sre_ucs1_##F +#include "sre_lib.h" + +/* generate 16-bit unicode version */ + +#define SRE_CHAR Py_UCS2 +#define SIZEOF_SRE_CHAR 2 +#define SRE(F) sre_ucs2_##F +#include "sre_lib.h" + +/* generate 32-bit unicode version */ + +#define SRE_CHAR Py_UCS4 +#define SIZEOF_SRE_CHAR 4 +#define SRE(F) sre_ucs4_##F +#include "sre_lib.h" + +/* -------------------------------------------------------------------- */ +/* factories and destructors */ + +/* module state */ +typedef struct { + PyTypeObject *Pattern_Type; + PyTypeObject *Match_Type; + PyTypeObject *Scanner_Type; +} _sremodulestate; + +static _sremodulestate * +get_sre_module_state(PyObject *m) +{ + _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m); + assert(state); + return state; +} + +static struct PyModuleDef sremodule; +#define get_sre_module_state_by_class(cls) \ + (get_sre_module_state(PyType_GetModule(cls))) + +/* see sre.h for object declarations */ +static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t); +static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t); + +/*[clinic input] +module _sre +class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type" +class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type" +class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/ + +/*[clinic input] +_sre.getcodesize -> int +[clinic start generated code]*/ + +static int +_sre_getcodesize_impl(PyObject *module) +/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/ +{ + return sizeof(SRE_CODE); +} + +/*[clinic input] +_sre.ascii_iscased -> bool + + character: int + / + +[clinic start generated code]*/ + +static int +_sre_ascii_iscased_impl(PyObject *module, int character) +/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/ +{ + unsigned int ch = (unsigned int)character; + return ch < 128 && Py_ISALPHA(ch); +} + +/*[clinic input] +_sre.unicode_iscased -> bool + + character: int + / + +[clinic start generated code]*/ + +static int +_sre_unicode_iscased_impl(PyObject *module, int character) +/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/ +{ + unsigned int ch = (unsigned int)character; + return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch); +} + +/*[clinic input] +_sre.ascii_tolower -> int + + character: int + / + +[clinic start generated code]*/ + +static int +_sre_ascii_tolower_impl(PyObject *module, int character) +/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/ +{ + return sre_lower_ascii(character); +} + +/*[clinic input] +_sre.unicode_tolower -> int + + character: int + / + +[clinic start generated code]*/ + +static int +_sre_unicode_tolower_impl(PyObject *module, int character) +/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/ +{ + return sre_lower_unicode(character); +} + +LOCAL(void) +state_reset(SRE_STATE* state) +{ + /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */ + /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/ + + state->lastmark = -1; + state->lastindex = -1; + + state->repeat = NULL; + + data_stack_dealloc(state); +} + +static const void* +getstring(PyObject* string, Py_ssize_t* p_length, + int* p_isbytes, int* p_charsize, + Py_buffer *view) +{ + /* given a python object, return a data pointer, a length (in + characters), and a character size. return NULL if the object + is not a string (or not compatible) */ + + /* Unicode objects do not support the buffer API. So, get the data + directly instead. */ + if (PyUnicode_Check(string)) { + if (PyUnicode_READY(string) == -1) + return NULL; + *p_length = PyUnicode_GET_LENGTH(string); + *p_charsize = PyUnicode_KIND(string); + *p_isbytes = 0; + return PyUnicode_DATA(string); + } + + /* get pointer to byte string buffer */ + if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) { + PyErr_Format(PyExc_TypeError, "expected string or bytes-like " + "object, got '%.200s'", Py_TYPE(string)->tp_name); + return NULL; + } + + *p_length = view->len; + *p_charsize = 1; + *p_isbytes = 1; + + if (view->buf == NULL) { + PyErr_SetString(PyExc_ValueError, "Buffer is NULL"); + PyBuffer_Release(view); + view->buf = NULL; + return NULL; + } + return view->buf; +} + +LOCAL(PyObject*) +state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, + Py_ssize_t start, Py_ssize_t end) +{ + /* prepare state object */ + + Py_ssize_t length; + int isbytes, charsize; + const void* ptr; + + memset(state, 0, sizeof(SRE_STATE)); + + state->mark = PyMem_New(const void *, pattern->groups * 2); + if (!state->mark) { + PyErr_NoMemory(); + goto err; + } + state->lastmark = -1; + state->lastindex = -1; + + state->repeats_array = PyMem_New(SRE_REPEAT, pattern->repeat_count); + if (!state->repeats_array) { + PyErr_NoMemory(); + goto err; + } + + state->buffer.buf = NULL; + ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer); + if (!ptr) + goto err; + + if (isbytes && pattern->isbytes == 0) { + PyErr_SetString(PyExc_TypeError, + "cannot use a string pattern on a bytes-like object"); + goto err; + } + if (!isbytes && pattern->isbytes > 0) { + PyErr_SetString(PyExc_TypeError, + "cannot use a bytes pattern on a string-like object"); + goto err; + } + + /* adjust boundaries */ + if (start < 0) + start = 0; + else if (start > length) + start = length; + + if (end < 0) + end = 0; + else if (end > length) + end = length; + + state->isbytes = isbytes; + state->charsize = charsize; + state->match_all = 0; + state->must_advance = 0; + + state->beginning = ptr; + + state->start = (void*) ((char*) ptr + start * state->charsize); + state->end = (void*) ((char*) ptr + end * state->charsize); + + Py_INCREF(string); + state->string = string; + state->pos = start; + state->endpos = end; + + return string; + err: + /* We add an explicit cast here because MSVC has a bug when + compiling C code where it believes that `const void**` cannot be + safely casted to `void*`, see bpo-39943 for details. */ + PyMem_Free((void*) state->mark); + state->mark = NULL; + PyMem_Free(state->repeats_array); + state->repeats_array = NULL; + + if (state->buffer.buf) + PyBuffer_Release(&state->buffer); + return NULL; +} + +LOCAL(void) +state_fini(SRE_STATE* state) +{ + if (state->buffer.buf) + PyBuffer_Release(&state->buffer); + Py_XDECREF(state->string); + data_stack_dealloc(state); + /* See above PyMem_Del for why we explicitly cast here. */ + PyMem_Free((void*) state->mark); + state->mark = NULL; + PyMem_Free(state->repeats_array); + state->repeats_array = NULL; +} + +/* calculate offset from start of string */ +#define STATE_OFFSET(state, member)\ + (((char*)(member) - (char*)(state)->beginning) / (state)->charsize) + +LOCAL(PyObject*) +getslice(int isbytes, const void *ptr, + PyObject* string, Py_ssize_t start, Py_ssize_t end) +{ + if (isbytes) { + if (PyBytes_CheckExact(string) && + start == 0 && end == PyBytes_GET_SIZE(string)) { + Py_INCREF(string); + return string; + } + return PyBytes_FromStringAndSize( + (const char *)ptr + start, end - start); + } + else { + return PyUnicode_Substring(string, start, end); + } +} + +LOCAL(PyObject*) +state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty) +{ + Py_ssize_t i, j; + + index = (index - 1) * 2; + + if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) { + if (empty) + /* want empty string */ + i = j = 0; + else { + Py_RETURN_NONE; + } + } else { + i = STATE_OFFSET(state, state->mark[index]); + j = STATE_OFFSET(state, state->mark[index+1]); + + /* check wrong span */ + if (i > j) { + PyErr_SetString(PyExc_SystemError, + "The span of capturing group is wrong," + " please report a bug for the re module."); + return NULL; + } + } + + return getslice(state->isbytes, state->beginning, string, i, j); +} + +static void +pattern_error(Py_ssize_t status) +{ + switch (status) { + case SRE_ERROR_RECURSION_LIMIT: + /* This error code seems to be unused. */ + PyErr_SetString( + PyExc_RecursionError, + "maximum recursion limit exceeded" + ); + break; + case SRE_ERROR_MEMORY: + PyErr_NoMemory(); + break; + case SRE_ERROR_INTERRUPTED: + /* An exception has already been raised, so let it fly */ + break; + default: + /* other error codes indicate compiler/engine bugs */ + PyErr_SetString( + PyExc_RuntimeError, + "internal error in regular expression engine" + ); + } +} + +static int +pattern_traverse(PatternObject *self, visitproc visit, void *arg) +{ + Py_VISIT(Py_TYPE(self)); + Py_VISIT(self->groupindex); + Py_VISIT(self->indexgroup); + Py_VISIT(self->pattern); + return 0; +} + +static int +pattern_clear(PatternObject *self) +{ + Py_CLEAR(self->groupindex); + Py_CLEAR(self->indexgroup); + Py_CLEAR(self->pattern); + return 0; +} + +static void +pattern_dealloc(PatternObject* self) +{ + PyTypeObject *tp = Py_TYPE(self); + + PyObject_GC_UnTrack(self); + if (self->weakreflist != NULL) { + PyObject_ClearWeakRefs((PyObject *) self); + } + (void)pattern_clear(self); + tp->tp_free(self); + Py_DECREF(tp); +} + +LOCAL(Py_ssize_t) +sre_match(SRE_STATE* state, SRE_CODE* pattern) +{ + if (state->charsize == 1) + return sre_ucs1_match(state, pattern, 1); + if (state->charsize == 2) + return sre_ucs2_match(state, pattern, 1); + assert(state->charsize == 4); + return sre_ucs4_match(state, pattern, 1); +} + +LOCAL(Py_ssize_t) +sre_search(SRE_STATE* state, SRE_CODE* pattern) +{ + if (state->charsize == 1) + return sre_ucs1_search(state, pattern); + if (state->charsize == 2) + return sre_ucs2_search(state, pattern); + assert(state->charsize == 4); + return sre_ucs4_search(state, pattern); +} + +/*[clinic input] +_sre.SRE_Pattern.match + + cls: defining_class + / + string: object + pos: Py_ssize_t = 0 + endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize + +Matches zero or more characters at the beginning of the string. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos) +/*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/ +{ + _sremodulestate *module_state = get_sre_module_state_by_class(cls); + SRE_STATE state; + Py_ssize_t status; + PyObject *match; + + if (!state_init(&state, (PatternObject *)self, string, pos, endpos)) + return NULL; + + state.ptr = state.start; + + TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr)); + + status = sre_match(&state, PatternObject_GetCode(self)); + + TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); + if (PyErr_Occurred()) { + state_fini(&state); + return NULL; + } + + match = pattern_new_match(module_state, self, &state, status); + state_fini(&state); + return match; +} + +/*[clinic input] +_sre.SRE_Pattern.fullmatch + + cls: defining_class + / + string: object + pos: Py_ssize_t = 0 + endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize + +Matches against all of the string. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos) +/*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/ +{ + _sremodulestate *module_state = get_sre_module_state_by_class(cls); + SRE_STATE state; + Py_ssize_t status; + PyObject *match; + + if (!state_init(&state, self, string, pos, endpos)) + return NULL; + + state.ptr = state.start; + + TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr)); + + state.match_all = 1; + status = sre_match(&state, PatternObject_GetCode(self)); + + TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); + if (PyErr_Occurred()) { + state_fini(&state); + return NULL; + } + + match = pattern_new_match(module_state, self, &state, status); + state_fini(&state); + return match; +} + +/*[clinic input] +_sre.SRE_Pattern.search + + cls: defining_class + / + string: object + pos: Py_ssize_t = 0 + endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize + +Scan through string looking for a match, and return a corresponding match object instance. + +Return None if no position in the string matches. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos) +/*[clinic end generated code: output=bd7f2d9d583e1463 input=afa9afb66a74a4b3]*/ +{ + _sremodulestate *module_state = get_sre_module_state_by_class(cls); + SRE_STATE state; + Py_ssize_t status; + PyObject *match; + + if (!state_init(&state, self, string, pos, endpos)) + return NULL; + + TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr)); + + status = sre_search(&state, PatternObject_GetCode(self)); + + TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); + + if (PyErr_Occurred()) { + state_fini(&state); + return NULL; + } + + match = pattern_new_match(module_state, self, &state, status); + state_fini(&state); + return match; +} + +static PyObject* +call(const char* module, const char* function, PyObject* args) +{ + PyObject* name; + PyObject* mod; + PyObject* func; + PyObject* result; + + if (!args) + return NULL; + name = PyUnicode_FromString(module); + if (!name) + return NULL; + mod = PyImport_Import(name); + Py_DECREF(name); + if (!mod) + return NULL; + func = PyObject_GetAttrString(mod, function); + Py_DECREF(mod); + if (!func) + return NULL; + result = PyObject_CallObject(func, args); + Py_DECREF(func); + Py_DECREF(args); + return result; +} + +/*[clinic input] +_sre.SRE_Pattern.findall + + string: object + pos: Py_ssize_t = 0 + endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize + +Return a list of all non-overlapping matches of pattern in string. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, + Py_ssize_t pos, Py_ssize_t endpos) +/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/ +{ + SRE_STATE state; + PyObject* list; + Py_ssize_t status; + Py_ssize_t i, b, e; + + if (!state_init(&state, self, string, pos, endpos)) + return NULL; + + list = PyList_New(0); + if (!list) { + state_fini(&state); + return NULL; + } + + while (state.start <= state.end) { + + PyObject* item; + + state_reset(&state); + + state.ptr = state.start; + + status = sre_search(&state, PatternObject_GetCode(self)); + if (PyErr_Occurred()) + goto error; + + if (status <= 0) { + if (status == 0) + break; + pattern_error(status); + goto error; + } + + /* don't bother to build a match object */ + switch (self->groups) { + case 0: + b = STATE_OFFSET(&state, state.start); + e = STATE_OFFSET(&state, state.ptr); + item = getslice(state.isbytes, state.beginning, + string, b, e); + if (!item) + goto error; + break; + case 1: + item = state_getslice(&state, 1, string, 1); + if (!item) + goto error; + break; + default: + item = PyTuple_New(self->groups); + if (!item) + goto error; + for (i = 0; i < self->groups; i++) { + PyObject* o = state_getslice(&state, i+1, string, 1); + if (!o) { + Py_DECREF(item); + goto error; + } + PyTuple_SET_ITEM(item, i, o); + } + break; + } + + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + state.must_advance = (state.ptr == state.start); + state.start = state.ptr; + } + + state_fini(&state); + return list; + +error: + Py_DECREF(list); + state_fini(&state); + return NULL; + +} + +/*[clinic input] +_sre.SRE_Pattern.finditer + + cls: defining_class + / + string: object + pos: Py_ssize_t = 0 + endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize + +Return an iterator over all non-overlapping matches for the RE pattern in string. + +For each match, the iterator returns a match object. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos) +/*[clinic end generated code: output=1791dbf3618ade56 input=812e332a4848cbaf]*/ +{ + _sremodulestate *module_state = get_sre_module_state_by_class(cls); + PyObject* scanner; + PyObject* search; + PyObject* iterator; + + scanner = pattern_scanner(module_state, self, string, pos, endpos); + if (!scanner) + return NULL; + + search = PyObject_GetAttrString(scanner, "search"); + Py_DECREF(scanner); + if (!search) + return NULL; + + iterator = PyCallIter_New(search, Py_None); + Py_DECREF(search); + + return iterator; +} + +/*[clinic input] +_sre.SRE_Pattern.scanner + + cls: defining_class + / + string: object + pos: Py_ssize_t = 0 + endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos) +/*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/ +{ + _sremodulestate *module_state = get_sre_module_state_by_class(cls); + + return pattern_scanner(module_state, self, string, pos, endpos); +} + +/*[clinic input] +_sre.SRE_Pattern.split + + string: object + maxsplit: Py_ssize_t = 0 + +Split string by the occurrences of pattern. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, + Py_ssize_t maxsplit) +/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/ +{ + SRE_STATE state; + PyObject* list; + PyObject* item; + Py_ssize_t status; + Py_ssize_t n; + Py_ssize_t i; + const void* last; + + assert(self->codesize != 0); + + if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) + return NULL; + + list = PyList_New(0); + if (!list) { + state_fini(&state); + return NULL; + } + + n = 0; + last = state.start; + + while (!maxsplit || n < maxsplit) { + + state_reset(&state); + + state.ptr = state.start; + + status = sre_search(&state, PatternObject_GetCode(self)); + if (PyErr_Occurred()) + goto error; + + if (status <= 0) { + if (status == 0) + break; + pattern_error(status); + goto error; + } + + /* get segment before this match */ + item = getslice(state.isbytes, state.beginning, + string, STATE_OFFSET(&state, last), + STATE_OFFSET(&state, state.start) + ); + if (!item) + goto error; + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + /* add groups (if any) */ + for (i = 0; i < self->groups; i++) { + item = state_getslice(&state, i+1, string, 0); + if (!item) + goto error; + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + n = n + 1; + state.must_advance = (state.ptr == state.start); + last = state.start = state.ptr; + + } + + /* get segment following last match (even if empty) */ + item = getslice(state.isbytes, state.beginning, + string, STATE_OFFSET(&state, last), state.endpos + ); + if (!item) + goto error; + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + state_fini(&state); + return list; + +error: + Py_DECREF(list); + state_fini(&state); + return NULL; + +} + +static PyObject* +pattern_subx(_sremodulestate* module_state, + PatternObject* self, + PyObject* ptemplate, + PyObject* string, + Py_ssize_t count, + Py_ssize_t subn) +{ + SRE_STATE state; + PyObject* list; + PyObject* joiner; + PyObject* item; + PyObject* filter; + PyObject* match; + const void* ptr; + Py_ssize_t status; + Py_ssize_t n; + Py_ssize_t i, b, e; + int isbytes, charsize; + int filter_is_callable; + Py_buffer view; + + if (PyCallable_Check(ptemplate)) { + /* sub/subn takes either a function or a template */ + filter = ptemplate; + Py_INCREF(filter); + filter_is_callable = 1; + } else { + /* if not callable, check if it's a literal string */ + int literal; + view.buf = NULL; + ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view); + if (ptr) { + if (charsize == 1) + literal = memchr(ptr, '\\', n) == NULL; + else + literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1; + } else { + PyErr_Clear(); + literal = 0; + } + if (view.buf) + PyBuffer_Release(&view); + if (literal) { + filter = ptemplate; + Py_INCREF(filter); + filter_is_callable = 0; + } else { + /* not a literal; hand it over to the template compiler */ + filter = call( + SRE_PY_MODULE, "_subx", + PyTuple_Pack(2, self, ptemplate) + ); + if (!filter) + return NULL; + filter_is_callable = PyCallable_Check(filter); + } + } + + if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) { + Py_DECREF(filter); + return NULL; + } + + list = PyList_New(0); + if (!list) { + Py_DECREF(filter); + state_fini(&state); + return NULL; + } + + n = i = 0; + + while (!count || n < count) { + + state_reset(&state); + + state.ptr = state.start; + + status = sre_search(&state, PatternObject_GetCode(self)); + if (PyErr_Occurred()) + goto error; + + if (status <= 0) { + if (status == 0) + break; + pattern_error(status); + goto error; + } + + b = STATE_OFFSET(&state, state.start); + e = STATE_OFFSET(&state, state.ptr); + + if (i < b) { + /* get segment before this match */ + item = getslice(state.isbytes, state.beginning, + string, i, b); + if (!item) + goto error; + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + + } + + if (filter_is_callable) { + /* pass match object through filter */ + match = pattern_new_match(module_state, self, &state, 1); + if (!match) + goto error; + item = PyObject_CallOneArg(filter, match); + Py_DECREF(match); + if (!item) + goto error; + } else { + /* filter is literal string */ + item = filter; + Py_INCREF(item); + } + + /* add to list */ + if (item != Py_None) { + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + i = e; + n = n + 1; + state.must_advance = (state.ptr == state.start); + state.start = state.ptr; + } + + /* get segment following last match */ + if (i < state.endpos) { + item = getslice(state.isbytes, state.beginning, + string, i, state.endpos); + if (!item) + goto error; + status = PyList_Append(list, item); + Py_DECREF(item); + if (status < 0) + goto error; + } + + state_fini(&state); + + Py_DECREF(filter); + + /* convert list to single string (also removes list) */ + joiner = getslice(state.isbytes, state.beginning, string, 0, 0); + if (!joiner) { + Py_DECREF(list); + return NULL; + } + if (PyList_GET_SIZE(list) == 0) { + Py_DECREF(list); + item = joiner; + } + else { + if (state.isbytes) + item = _PyBytes_Join(joiner, list); + else + item = PyUnicode_Join(joiner, list); + Py_DECREF(joiner); + Py_DECREF(list); + if (!item) + return NULL; + } + + if (subn) + return Py_BuildValue("Nn", item, n); + + return item; + +error: + Py_DECREF(list); + state_fini(&state); + Py_DECREF(filter); + return NULL; + +} + +/*[clinic input] +_sre.SRE_Pattern.sub + + cls: defining_class + / + repl: object + string: object + count: Py_ssize_t = 0 + +Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls, + PyObject *repl, PyObject *string, Py_ssize_t count) +/*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/ +{ + _sremodulestate *module_state = get_sre_module_state_by_class(cls); + + return pattern_subx(module_state, self, repl, string, count, 0); +} + +/*[clinic input] +_sre.SRE_Pattern.subn + + cls: defining_class + / + repl: object + string: object + count: Py_ssize_t = 0 + +Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls, + PyObject *repl, PyObject *string, + Py_ssize_t count) +/*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/ +{ + _sremodulestate *module_state = get_sre_module_state_by_class(cls); + + return pattern_subx(module_state, self, repl, string, count, 1); +} + +/*[clinic input] +_sre.SRE_Pattern.__copy__ + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern___copy___impl(PatternObject *self) +/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/ +{ + Py_INCREF(self); + return (PyObject *)self; +} + +/*[clinic input] +_sre.SRE_Pattern.__deepcopy__ + + memo: object + / + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo) +/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/ +{ + Py_INCREF(self); + return (PyObject *)self; +} + +static PyObject * +pattern_repr(PatternObject *obj) +{ + static const struct { + const char *name; + int value; + } flag_names[] = { + {"re.TEMPLATE", SRE_FLAG_TEMPLATE}, + {"re.IGNORECASE", SRE_FLAG_IGNORECASE}, + {"re.LOCALE", SRE_FLAG_LOCALE}, + {"re.MULTILINE", SRE_FLAG_MULTILINE}, + {"re.DOTALL", SRE_FLAG_DOTALL}, + {"re.UNICODE", SRE_FLAG_UNICODE}, + {"re.VERBOSE", SRE_FLAG_VERBOSE}, + {"re.DEBUG", SRE_FLAG_DEBUG}, + {"re.ASCII", SRE_FLAG_ASCII}, + }; + PyObject *result = NULL; + PyObject *flag_items; + size_t i; + int flags = obj->flags; + + /* Omit re.UNICODE for valid string patterns. */ + if (obj->isbytes == 0 && + (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) == + SRE_FLAG_UNICODE) + flags &= ~SRE_FLAG_UNICODE; + + flag_items = PyList_New(0); + if (!flag_items) + return NULL; + + for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) { + if (flags & flag_names[i].value) { + PyObject *item = PyUnicode_FromString(flag_names[i].name); + if (!item) + goto done; + + if (PyList_Append(flag_items, item) < 0) { + Py_DECREF(item); + goto done; + } + Py_DECREF(item); + flags &= ~flag_names[i].value; + } + } + if (flags) { + PyObject *item = PyUnicode_FromFormat("0x%x", flags); + if (!item) + goto done; + + if (PyList_Append(flag_items, item) < 0) { + Py_DECREF(item); + goto done; + } + Py_DECREF(item); + } + + if (PyList_Size(flag_items) > 0) { + PyObject *flags_result; + PyObject *sep = PyUnicode_FromString("|"); + if (!sep) + goto done; + flags_result = PyUnicode_Join(sep, flag_items); + Py_DECREF(sep); + if (!flags_result) + goto done; + result = PyUnicode_FromFormat("re.compile(%.200R, %S)", + obj->pattern, flags_result); + Py_DECREF(flags_result); + } + else { + result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern); + } + +done: + Py_DECREF(flag_items); + return result; +} + +PyDoc_STRVAR(pattern_doc, "Compiled regular expression object."); + +/* PatternObject's 'groupindex' method. */ +static PyObject * +pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored)) +{ + if (self->groupindex == NULL) + return PyDict_New(); + return PyDictProxy_New(self->groupindex); +} + +static int _validate(PatternObject *self); /* Forward */ + +/*[clinic input] +_sre.compile + + pattern: object + flags: int + code: object(subclass_of='&PyList_Type') + groups: Py_ssize_t + groupindex: object(subclass_of='&PyDict_Type') + indexgroup: object(subclass_of='&PyTuple_Type') + repeat_count: Py_ssize_t + +[clinic start generated code]*/ + +static PyObject * +_sre_compile_impl(PyObject *module, PyObject *pattern, int flags, + PyObject *code, Py_ssize_t groups, PyObject *groupindex, + PyObject *indexgroup, Py_ssize_t repeat_count) +/*[clinic end generated code: output=922af562d51b1657 input=77e39c322501ec2a]*/ +{ + /* "compile" pattern descriptor to pattern object */ + + _sremodulestate *module_state = get_sre_module_state(module); + PatternObject* self; + Py_ssize_t i, n; + + n = PyList_GET_SIZE(code); + /* coverity[ampersand_in_size] */ + self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n); + if (!self) + return NULL; + self->weakreflist = NULL; + self->pattern = NULL; + self->groupindex = NULL; + self->indexgroup = NULL; + + self->codesize = n; + + for (i = 0; i < n; i++) { + PyObject *o = PyList_GET_ITEM(code, i); + unsigned long value = PyLong_AsUnsignedLong(o); + self->code[i] = (SRE_CODE) value; + if ((unsigned long) self->code[i] != value) { + PyErr_SetString(PyExc_OverflowError, + "regular expression code size limit exceeded"); + break; + } + } + PyObject_GC_Track(self); + + if (PyErr_Occurred()) { + Py_DECREF(self); + return NULL; + } + + if (pattern == Py_None) { + self->isbytes = -1; + } + else { + Py_ssize_t p_length; + int charsize; + Py_buffer view; + view.buf = NULL; + if (!getstring(pattern, &p_length, &self->isbytes, + &charsize, &view)) { + Py_DECREF(self); + return NULL; + } + if (view.buf) + PyBuffer_Release(&view); + } + + Py_INCREF(pattern); + self->pattern = pattern; + + self->flags = flags; + self->groups = groups; + self->repeat_count = repeat_count; + + if (PyDict_GET_SIZE(groupindex) > 0) { + Py_INCREF(groupindex); + self->groupindex = groupindex; + if (PyTuple_GET_SIZE(indexgroup) > 0) { + Py_INCREF(indexgroup); + self->indexgroup = indexgroup; + } + } + + if (!_validate(self)) { + Py_DECREF(self); + return NULL; + } + + return (PyObject*) self; +} + +/* -------------------------------------------------------------------- */ +/* Code validation */ + +/* To learn more about this code, have a look at the _compile() function in + Lib/sre_compile.py. The validation functions below checks the code array + for conformance with the code patterns generated there. + + The nice thing about the generated code is that it is position-independent: + all jumps are relative jumps forward. Also, jumps don't cross each other: + the target of a later jump is always earlier than the target of an earlier + jump. IOW, this is okay: + + J---------J-------T--------T + \ \_____/ / + \______________________/ + + but this is not: + + J---------J-------T--------T + \_________\_____/ / + \____________/ + + It also helps that SRE_CODE is always an unsigned type. +*/ + +/* Defining this one enables tracing of the validator */ +#undef VVERBOSE + +/* Trace macro for the validator */ +#if defined(VVERBOSE) +#define VTRACE(v) printf v +#else +#define VTRACE(v) do {} while(0) /* do nothing */ +#endif + +/* Report failure */ +#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0) + +/* Extract opcode, argument, or skip count from code array */ +#define GET_OP \ + do { \ + VTRACE(("%p: ", code)); \ + if (code >= end) FAIL; \ + op = *code++; \ + VTRACE(("%lu (op)\n", (unsigned long)op)); \ + } while (0) +#define GET_ARG \ + do { \ + VTRACE(("%p= ", code)); \ + if (code >= end) FAIL; \ + arg = *code++; \ + VTRACE(("%lu (arg)\n", (unsigned long)arg)); \ + } while (0) +#define GET_SKIP_ADJ(adj) \ + do { \ + VTRACE(("%p= ", code)); \ + if (code >= end) FAIL; \ + skip = *code; \ + VTRACE(("%lu (skip to %p)\n", \ + (unsigned long)skip, code+skip)); \ + if (skip-adj > (uintptr_t)(end - code)) \ + FAIL; \ + code++; \ + } while (0) +#define GET_SKIP GET_SKIP_ADJ(0) + +static int +_validate_charset(SRE_CODE *code, SRE_CODE *end) +{ + /* Some variables are manipulated by the macros above */ + SRE_CODE op; + SRE_CODE arg; + SRE_CODE offset; + int i; + + while (code < end) { + GET_OP; + switch (op) { + + case SRE_OP_NEGATE: + break; + + case SRE_OP_LITERAL: + GET_ARG; + break; + + case SRE_OP_RANGE: + case SRE_OP_RANGE_UNI_IGNORE: + GET_ARG; + GET_ARG; + break; + + case SRE_OP_CHARSET: + offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */ + if (offset > (uintptr_t)(end - code)) + FAIL; + code += offset; + break; + + case SRE_OP_BIGCHARSET: + GET_ARG; /* Number of blocks */ + offset = 256/sizeof(SRE_CODE); /* 256-byte table */ + if (offset > (uintptr_t)(end - code)) + FAIL; + /* Make sure that each byte points to a valid block */ + for (i = 0; i < 256; i++) { + if (((unsigned char *)code)[i] >= arg) + FAIL; + } + code += offset; + offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */ + if (offset > (uintptr_t)(end - code)) + FAIL; + code += offset; + break; + + case SRE_OP_CATEGORY: + GET_ARG; + switch (arg) { + case SRE_CATEGORY_DIGIT: + case SRE_CATEGORY_NOT_DIGIT: + case SRE_CATEGORY_SPACE: + case SRE_CATEGORY_NOT_SPACE: + case SRE_CATEGORY_WORD: + case SRE_CATEGORY_NOT_WORD: + case SRE_CATEGORY_LINEBREAK: + case SRE_CATEGORY_NOT_LINEBREAK: + case SRE_CATEGORY_LOC_WORD: + case SRE_CATEGORY_LOC_NOT_WORD: + case SRE_CATEGORY_UNI_DIGIT: + case SRE_CATEGORY_UNI_NOT_DIGIT: + case SRE_CATEGORY_UNI_SPACE: + case SRE_CATEGORY_UNI_NOT_SPACE: + case SRE_CATEGORY_UNI_WORD: + case SRE_CATEGORY_UNI_NOT_WORD: + case SRE_CATEGORY_UNI_LINEBREAK: + case SRE_CATEGORY_UNI_NOT_LINEBREAK: + break; + default: + FAIL; + } + break; + + default: + FAIL; + + } + } + + return 1; +} + +static int +_validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self) +{ + /* Some variables are manipulated by the macros above */ + SRE_CODE op; + SRE_CODE arg; + SRE_CODE skip; + + VTRACE(("code=%p, end=%p\n", code, end)); + + if (code > end) + FAIL; + + while (code < end) { + GET_OP; + switch (op) { + + case SRE_OP_MARK: + /* We don't check whether marks are properly nested; the + sre_match() code is robust even if they don't, and the worst + you can get is nonsensical match results. */ + GET_ARG; + if (arg > 2 * (size_t)self->groups + 1) { + VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)self->groups)); + FAIL; + } + break; + + case SRE_OP_LITERAL: + case SRE_OP_NOT_LITERAL: + case SRE_OP_LITERAL_IGNORE: + case SRE_OP_NOT_LITERAL_IGNORE: + case SRE_OP_LITERAL_UNI_IGNORE: + case SRE_OP_NOT_LITERAL_UNI_IGNORE: + case SRE_OP_LITERAL_LOC_IGNORE: + case SRE_OP_NOT_LITERAL_LOC_IGNORE: + GET_ARG; + /* The arg is just a character, nothing to check */ + break; + + case SRE_OP_SUCCESS: + case SRE_OP_FAILURE: + /* Nothing to check; these normally end the matching process */ + break; + + case SRE_OP_AT: + GET_ARG; + switch (arg) { + case SRE_AT_BEGINNING: + case SRE_AT_BEGINNING_STRING: + case SRE_AT_BEGINNING_LINE: + case SRE_AT_END: + case SRE_AT_END_LINE: + case SRE_AT_END_STRING: + case SRE_AT_BOUNDARY: + case SRE_AT_NON_BOUNDARY: + case SRE_AT_LOC_BOUNDARY: + case SRE_AT_LOC_NON_BOUNDARY: + case SRE_AT_UNI_BOUNDARY: + case SRE_AT_UNI_NON_BOUNDARY: + break; + default: + FAIL; + } + break; + + case SRE_OP_ANY: + case SRE_OP_ANY_ALL: + /* These have no operands */ + break; + + case SRE_OP_IN: + case SRE_OP_IN_IGNORE: + case SRE_OP_IN_UNI_IGNORE: + case SRE_OP_IN_LOC_IGNORE: + GET_SKIP; + /* Stop 1 before the end; we check the FAILURE below */ + if (!_validate_charset(code, code+skip-2)) + FAIL; + if (code[skip-2] != SRE_OP_FAILURE) + FAIL; + code += skip-1; + break; + + case SRE_OP_INFO: + { + /* A minimal info field is + <1=skip> <2=flags> <3=min> <4=max>; + If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags, + more follows. */ + SRE_CODE flags, i; + SRE_CODE *newcode; + GET_SKIP; + newcode = code+skip-1; + GET_ARG; flags = arg; + GET_ARG; + GET_ARG; + /* Check that only valid flags are present */ + if ((flags & ~(SRE_INFO_PREFIX | + SRE_INFO_LITERAL | + SRE_INFO_CHARSET)) != 0) + FAIL; + /* PREFIX and CHARSET are mutually exclusive */ + if ((flags & SRE_INFO_PREFIX) && + (flags & SRE_INFO_CHARSET)) + FAIL; + /* LITERAL implies PREFIX */ + if ((flags & SRE_INFO_LITERAL) && + !(flags & SRE_INFO_PREFIX)) + FAIL; + /* Validate the prefix */ + if (flags & SRE_INFO_PREFIX) { + SRE_CODE prefix_len; + GET_ARG; prefix_len = arg; + GET_ARG; + /* Here comes the prefix string */ + if (prefix_len > (uintptr_t)(newcode - code)) + FAIL; + code += prefix_len; + /* And here comes the overlap table */ + if (prefix_len > (uintptr_t)(newcode - code)) + FAIL; + /* Each overlap value should be < prefix_len */ + for (i = 0; i < prefix_len; i++) { + if (code[i] >= prefix_len) + FAIL; + } + code += prefix_len; + } + /* Validate the charset */ + if (flags & SRE_INFO_CHARSET) { + if (!_validate_charset(code, newcode-1)) + FAIL; + if (newcode[-1] != SRE_OP_FAILURE) + FAIL; + code = newcode; + } + else if (code != newcode) { + VTRACE(("code=%p, newcode=%p\n", code, newcode)); + FAIL; + } + } + break; + + case SRE_OP_BRANCH: + { + SRE_CODE *target = NULL; + for (;;) { + GET_SKIP; + if (skip == 0) + break; + /* Stop 2 before the end; we check the JUMP below */ + if (!_validate_inner(code, code+skip-3, self)) + FAIL; + code += skip-3; + /* Check that it ends with a JUMP, and that each JUMP + has the same target */ + GET_OP; + if (op != SRE_OP_JUMP) + FAIL; + GET_SKIP; + if (target == NULL) + target = code+skip-1; + else if (code+skip-1 != target) + FAIL; + } + } + break; + + case SRE_OP_REPEAT_ONE: + case SRE_OP_MIN_REPEAT_ONE: + case SRE_OP_POSSESSIVE_REPEAT_ONE: + { + SRE_CODE min, max; + GET_SKIP; + GET_ARG; min = arg; + GET_ARG; max = arg; + if (min > max) + FAIL; + if (max > SRE_MAXREPEAT) + FAIL; + if (!_validate_inner(code, code+skip-4, self)) + FAIL; + code += skip-4; + GET_OP; + if (op != SRE_OP_SUCCESS) + FAIL; + } + break; + + case SRE_OP_REPEAT: + case SRE_OP_POSSESSIVE_REPEAT: + { + SRE_CODE op1 = op, min, max, repeat_index; + GET_SKIP; + GET_ARG; min = arg; + GET_ARG; max = arg; + if (min > max) + FAIL; + if (max > SRE_MAXREPEAT) + FAIL; + if (op1 == SRE_OP_REPEAT) { + GET_ARG; repeat_index = arg; + if (repeat_index >= (size_t)self->repeat_count) + FAIL; + skip -= 4; + } else { + skip -= 3; + } + if (!_validate_inner(code, code+skip, self)) + FAIL; + code += skip; + GET_OP; + if (op1 == SRE_OP_POSSESSIVE_REPEAT) { + if (op != SRE_OP_SUCCESS) + FAIL; + } + else { + if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL) + FAIL; + } + } + break; + + case SRE_OP_ATOMIC_GROUP: + { + GET_SKIP; + if (!_validate_inner(code, code+skip-2, self)) + FAIL; + code += skip-2; + GET_OP; + if (op != SRE_OP_SUCCESS) + FAIL; + } + break; + + case SRE_OP_GROUPREF: + case SRE_OP_GROUPREF_IGNORE: + case SRE_OP_GROUPREF_UNI_IGNORE: + case SRE_OP_GROUPREF_LOC_IGNORE: + GET_ARG; + if (arg >= (size_t)self->groups) + FAIL; + break; + + case SRE_OP_GROUPREF_EXISTS: + /* The regex syntax for this is: '(?(group)then|else)', where + 'group' is either an integer group number or a group name, + 'then' and 'else' are sub-regexes, and 'else' is optional. */ + GET_ARG; + if (arg >= (size_t)self->groups) + FAIL; + GET_SKIP_ADJ(1); + code--; /* The skip is relative to the first arg! */ + /* There are two possibilities here: if there is both a 'then' + part and an 'else' part, the generated code looks like: + + GROUPREF_EXISTS + + + ...then part... + JUMP + + ( jumps here) + ...else part... + ( jumps here) + + If there is only a 'then' part, it looks like: + + GROUPREF_EXISTS + + + ...then part... + ( jumps here) + + There is no direct way to decide which it is, and we don't want + to allow arbitrary jumps anywhere in the code; so we just look + for a JUMP opcode preceding our skip target. + */ + if (skip >= 3 && skip-3 < (uintptr_t)(end - code) && + code[skip-3] == SRE_OP_JUMP) + { + VTRACE(("both then and else parts present\n")); + if (!_validate_inner(code+1, code+skip-3, self)) + FAIL; + code += skip-2; /* Position after JUMP, at */ + GET_SKIP; + if (!_validate_inner(code, code+skip-1, self)) + FAIL; + code += skip-1; + } + else { + VTRACE(("only a then part present\n")); + if (!_validate_inner(code+1, code+skip-1, self)) + FAIL; + code += skip-1; + } + break; + + case SRE_OP_ASSERT: + case SRE_OP_ASSERT_NOT: + GET_SKIP; + GET_ARG; /* 0 for lookahead, width for lookbehind */ + code--; /* Back up over arg to simplify math below */ + if (arg & 0x80000000) + FAIL; /* Width too large */ + /* Stop 1 before the end; we check the SUCCESS below */ + if (!_validate_inner(code+1, code+skip-2, self)) + FAIL; + code += skip-2; + GET_OP; + if (op != SRE_OP_SUCCESS) + FAIL; + break; + + default: + FAIL; + + } + } + + VTRACE(("okay\n")); + return 1; +} + +static int +_validate_outer(SRE_CODE *code, SRE_CODE *end, PatternObject *self) +{ + if (self->groups < 0 || (size_t)self->groups > SRE_MAXGROUPS || + self->repeat_count < 0 || + code >= end || end[-1] != SRE_OP_SUCCESS) + FAIL; + return _validate_inner(code, end-1, self); +} + +static int +_validate(PatternObject *self) +{ + if (!_validate_outer(self->code, self->code+self->codesize, self)) + { + PyErr_SetString(PyExc_RuntimeError, "invalid SRE code"); + return 0; + } + else + VTRACE(("Success!\n")); + return 1; +} + +/* -------------------------------------------------------------------- */ +/* match methods */ + +static int +match_traverse(MatchObject *self, visitproc visit, void *arg) +{ + Py_VISIT(Py_TYPE(self)); + Py_VISIT(self->string); + Py_VISIT(self->regs); + Py_VISIT(self->pattern); + return 0; +} + +static int +match_clear(MatchObject *self) +{ + Py_CLEAR(self->string); + Py_CLEAR(self->regs); + Py_CLEAR(self->pattern); + return 0; +} + +static void +match_dealloc(MatchObject* self) +{ + PyTypeObject *tp = Py_TYPE(self); + + PyObject_GC_UnTrack(self); + (void)match_clear(self); + tp->tp_free(self); + Py_DECREF(tp); +} + +static PyObject* +match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def) +{ + Py_ssize_t length; + int isbytes, charsize; + Py_buffer view; + PyObject *result; + const void* ptr; + Py_ssize_t i, j; + + assert(0 <= index && index < self->groups); + index *= 2; + + if (self->string == Py_None || self->mark[index] < 0) { + /* return default value if the string or group is undefined */ + Py_INCREF(def); + return def; + } + + ptr = getstring(self->string, &length, &isbytes, &charsize, &view); + if (ptr == NULL) + return NULL; + + i = self->mark[index]; + j = self->mark[index+1]; + i = Py_MIN(i, length); + j = Py_MIN(j, length); + result = getslice(isbytes, ptr, self->string, i, j); + if (isbytes && view.buf != NULL) + PyBuffer_Release(&view); + return result; +} + +static Py_ssize_t +match_getindex(MatchObject* self, PyObject* index) +{ + Py_ssize_t i; + + if (index == NULL) + /* Default value */ + return 0; + + if (PyIndex_Check(index)) { + i = PyNumber_AsSsize_t(index, NULL); + } + else { + i = -1; + + if (self->pattern->groupindex) { + index = PyDict_GetItemWithError(self->pattern->groupindex, index); + if (index && PyLong_Check(index)) { + i = PyLong_AsSsize_t(index); + } + } + } + if (i < 0 || i >= self->groups) { + /* raise IndexError if we were given a bad group number */ + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_IndexError, "no such group"); + } + return -1; + } + + return i; +} + +static PyObject* +match_getslice(MatchObject* self, PyObject* index, PyObject* def) +{ + Py_ssize_t i = match_getindex(self, index); + + if (i < 0) { + return NULL; + } + + return match_getslice_by_index(self, i, def); +} + +/*[clinic input] +_sre.SRE_Match.expand + + template: object + +Return the string obtained by doing backslash substitution on the string template, as done by the sub() method. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template) +/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/ +{ + /* delegate to Python code */ + return call( + SRE_PY_MODULE, "_expand", + PyTuple_Pack(3, self->pattern, self, template) + ); +} + +static PyObject* +match_group(MatchObject* self, PyObject* args) +{ + PyObject* result; + Py_ssize_t i, size; + + size = PyTuple_GET_SIZE(args); + + switch (size) { + case 0: + result = match_getslice(self, _PyLong_GetZero(), Py_None); + break; + case 1: + result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None); + break; + default: + /* fetch multiple items */ + result = PyTuple_New(size); + if (!result) + return NULL; + for (i = 0; i < size; i++) { + PyObject* item = match_getslice( + self, PyTuple_GET_ITEM(args, i), Py_None + ); + if (!item) { + Py_DECREF(result); + return NULL; + } + PyTuple_SET_ITEM(result, i, item); + } + break; + } + return result; +} + +static PyObject* +match_getitem(MatchObject* self, PyObject* name) +{ + return match_getslice(self, name, Py_None); +} + +/*[clinic input] +_sre.SRE_Match.groups + + default: object = None + Is used for groups that did not participate in the match. + +Return a tuple containing all the subgroups of the match, from 1. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value) +/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/ +{ + PyObject* result; + Py_ssize_t index; + + result = PyTuple_New(self->groups-1); + if (!result) + return NULL; + + for (index = 1; index < self->groups; index++) { + PyObject* item; + item = match_getslice_by_index(self, index, default_value); + if (!item) { + Py_DECREF(result); + return NULL; + } + PyTuple_SET_ITEM(result, index-1, item); + } + + return result; +} + +/*[clinic input] +_sre.SRE_Match.groupdict + + default: object = None + Is used for groups that did not participate in the match. + +Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value) +/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/ +{ + PyObject *result; + PyObject *key; + PyObject *value; + Py_ssize_t pos = 0; + Py_hash_t hash; + + result = PyDict_New(); + if (!result || !self->pattern->groupindex) + return result; + + while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) { + int status; + Py_INCREF(key); + value = match_getslice(self, key, default_value); + if (!value) { + Py_DECREF(key); + goto failed; + } + status = _PyDict_SetItem_KnownHash(result, key, value, hash); + Py_DECREF(value); + Py_DECREF(key); + if (status < 0) + goto failed; + } + + return result; + +failed: + Py_DECREF(result); + return NULL; +} + +/*[clinic input] +_sre.SRE_Match.start -> Py_ssize_t + + group: object(c_default="NULL") = 0 + / + +Return index of the start of the substring matched by group. +[clinic start generated code]*/ + +static Py_ssize_t +_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group) +/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/ +{ + Py_ssize_t index = match_getindex(self, group); + + if (index < 0) { + return -1; + } + + /* mark is -1 if group is undefined */ + return self->mark[index*2]; +} + +/*[clinic input] +_sre.SRE_Match.end -> Py_ssize_t + + group: object(c_default="NULL") = 0 + / + +Return index of the end of the substring matched by group. +[clinic start generated code]*/ + +static Py_ssize_t +_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group) +/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/ +{ + Py_ssize_t index = match_getindex(self, group); + + if (index < 0) { + return -1; + } + + /* mark is -1 if group is undefined */ + return self->mark[index*2+1]; +} + +LOCAL(PyObject*) +_pair(Py_ssize_t i1, Py_ssize_t i2) +{ + PyObject* pair; + PyObject* item; + + pair = PyTuple_New(2); + if (!pair) + return NULL; + + item = PyLong_FromSsize_t(i1); + if (!item) + goto error; + PyTuple_SET_ITEM(pair, 0, item); + + item = PyLong_FromSsize_t(i2); + if (!item) + goto error; + PyTuple_SET_ITEM(pair, 1, item); + + return pair; + + error: + Py_DECREF(pair); + return NULL; +} + +/*[clinic input] +_sre.SRE_Match.span + + group: object(c_default="NULL") = 0 + / + +For match object m, return the 2-tuple (m.start(group), m.end(group)). +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group) +/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/ +{ + Py_ssize_t index = match_getindex(self, group); + + if (index < 0) { + return NULL; + } + + /* marks are -1 if group is undefined */ + return _pair(self->mark[index*2], self->mark[index*2+1]); +} + +static PyObject* +match_regs(MatchObject* self) +{ + PyObject* regs; + PyObject* item; + Py_ssize_t index; + + regs = PyTuple_New(self->groups); + if (!regs) + return NULL; + + for (index = 0; index < self->groups; index++) { + item = _pair(self->mark[index*2], self->mark[index*2+1]); + if (!item) { + Py_DECREF(regs); + return NULL; + } + PyTuple_SET_ITEM(regs, index, item); + } + + Py_INCREF(regs); + self->regs = regs; + + return regs; +} + +/*[clinic input] +_sre.SRE_Match.__copy__ + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Match___copy___impl(MatchObject *self) +/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/ +{ + Py_INCREF(self); + return (PyObject *)self; +} + +/*[clinic input] +_sre.SRE_Match.__deepcopy__ + + memo: object + / + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo) +/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/ +{ + Py_INCREF(self); + return (PyObject *)self; +} + +PyDoc_STRVAR(match_doc, +"The result of re.match() and re.search().\n\ +Match objects always have a boolean value of True."); + +PyDoc_STRVAR(match_group_doc, +"group([group1, ...]) -> str or tuple.\n\ + Return subgroup(s) of the match by indices or names.\n\ + For 0 returns the entire match."); + +static PyObject * +match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored)) +{ + if (self->lastindex >= 0) + return PyLong_FromSsize_t(self->lastindex); + Py_RETURN_NONE; +} + +static PyObject * +match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored)) +{ + if (self->pattern->indexgroup && + self->lastindex >= 0 && + self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup)) + { + PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup, + self->lastindex); + Py_INCREF(result); + return result; + } + Py_RETURN_NONE; +} + +static PyObject * +match_regs_get(MatchObject *self, void *Py_UNUSED(ignored)) +{ + if (self->regs) { + Py_INCREF(self->regs); + return self->regs; + } else + return match_regs(self); +} + +static PyObject * +match_repr(MatchObject *self) +{ + PyObject *result; + PyObject *group0 = match_getslice_by_index(self, 0, Py_None); + if (group0 == NULL) + return NULL; + result = PyUnicode_FromFormat( + "<%s object; span=(%zd, %zd), match=%.50R>", + Py_TYPE(self)->tp_name, + self->mark[0], self->mark[1], group0); + Py_DECREF(group0); + return result; +} + + +static PyObject* +pattern_new_match(_sremodulestate* module_state, + PatternObject* pattern, + SRE_STATE* state, + Py_ssize_t status) +{ + /* create match object (from state object) */ + + MatchObject* match; + Py_ssize_t i, j; + char* base; + int n; + + if (status > 0) { + + /* create match object (with room for extra group marks) */ + /* coverity[ampersand_in_size] */ + match = PyObject_GC_NewVar(MatchObject, + module_state->Match_Type, + 2*(pattern->groups+1)); + if (!match) + return NULL; + + Py_INCREF(pattern); + match->pattern = pattern; + + Py_INCREF(state->string); + match->string = state->string; + + match->regs = NULL; + match->groups = pattern->groups+1; + + /* fill in group slices */ + + base = (char*) state->beginning; + n = state->charsize; + + match->mark[0] = ((char*) state->start - base) / n; + match->mark[1] = ((char*) state->ptr - base) / n; + + for (i = j = 0; i < pattern->groups; i++, j+=2) + if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) { + match->mark[j+2] = ((char*) state->mark[j] - base) / n; + match->mark[j+3] = ((char*) state->mark[j+1] - base) / n; + + /* check wrong span */ + if (match->mark[j+2] > match->mark[j+3]) { + PyErr_SetString(PyExc_SystemError, + "The span of capturing group is wrong," + " please report a bug for the re module."); + Py_DECREF(match); + return NULL; + } + } else + match->mark[j+2] = match->mark[j+3] = -1; /* undefined */ + + match->pos = state->pos; + match->endpos = state->endpos; + + match->lastindex = state->lastindex; + + PyObject_GC_Track(match); + return (PyObject*) match; + + } else if (status == 0) { + + /* no match */ + Py_RETURN_NONE; + + } + + /* internal error */ + pattern_error(status); + return NULL; +} + + +/* -------------------------------------------------------------------- */ +/* scanner methods (experimental) */ + +static int +scanner_traverse(ScannerObject *self, visitproc visit, void *arg) +{ + Py_VISIT(Py_TYPE(self)); + Py_VISIT(self->pattern); + return 0; +} + +static int +scanner_clear(ScannerObject *self) +{ + Py_CLEAR(self->pattern); + return 0; +} + +static void +scanner_dealloc(ScannerObject* self) +{ + PyTypeObject *tp = Py_TYPE(self); + + PyObject_GC_UnTrack(self); + state_fini(&self->state); + (void)scanner_clear(self); + tp->tp_free(self); + Py_DECREF(tp); +} + +static int +scanner_begin(ScannerObject* self) +{ + if (self->executing) { + PyErr_SetString(PyExc_ValueError, + "regular expression scanner already executing"); + return 0; + } + self->executing = 1; + return 1; +} + +static void +scanner_end(ScannerObject* self) +{ + assert(self->executing); + self->executing = 0; +} + +/*[clinic input] +_sre.SRE_Scanner.match + + cls: defining_class + / + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls) +/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/ +{ + _sremodulestate *module_state = get_sre_module_state_by_class(cls); + SRE_STATE* state = &self->state; + PyObject* match; + Py_ssize_t status; + + if (!scanner_begin(self)) { + return NULL; + } + if (state->start == NULL) { + scanner_end(self); + Py_RETURN_NONE; + } + + state_reset(state); + + state->ptr = state->start; + + status = sre_match(state, PatternObject_GetCode(self->pattern)); + if (PyErr_Occurred()) { + scanner_end(self); + return NULL; + } + + match = pattern_new_match(module_state, (PatternObject*) self->pattern, + state, status); + + if (status == 0) + state->start = NULL; + else { + state->must_advance = (state->ptr == state->start); + state->start = state->ptr; + } + + scanner_end(self); + return match; +} + + +/*[clinic input] +_sre.SRE_Scanner.search + + cls: defining_class + / + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls) +/*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/ +{ + _sremodulestate *module_state = get_sre_module_state_by_class(cls); + SRE_STATE* state = &self->state; + PyObject* match; + Py_ssize_t status; + + if (!scanner_begin(self)) { + return NULL; + } + if (state->start == NULL) { + scanner_end(self); + Py_RETURN_NONE; + } + + state_reset(state); + + state->ptr = state->start; + + status = sre_search(state, PatternObject_GetCode(self->pattern)); + if (PyErr_Occurred()) { + scanner_end(self); + return NULL; + } + + match = pattern_new_match(module_state, (PatternObject*) self->pattern, + state, status); + + if (status == 0) + state->start = NULL; + else { + state->must_advance = (state->ptr == state->start); + state->start = state->ptr; + } + + scanner_end(self); + return match; +} + +static PyObject * +pattern_scanner(_sremodulestate *module_state, + PatternObject *self, + PyObject *string, + Py_ssize_t pos, + Py_ssize_t endpos) +{ + ScannerObject* scanner; + + /* create scanner object */ + scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type); + if (!scanner) + return NULL; + scanner->pattern = NULL; + scanner->executing = 0; + + /* create search state object */ + if (!state_init(&scanner->state, self, string, pos, endpos)) { + Py_DECREF(scanner); + return NULL; + } + + Py_INCREF(self); + scanner->pattern = (PyObject*) self; + + PyObject_GC_Track(scanner); + return (PyObject*) scanner; +} + +static Py_hash_t +pattern_hash(PatternObject *self) +{ + Py_hash_t hash, hash2; + + hash = PyObject_Hash(self->pattern); + if (hash == -1) { + return -1; + } + + hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize); + hash ^= hash2; + + hash ^= self->flags; + hash ^= self->isbytes; + hash ^= self->codesize; + + if (hash == -1) { + hash = -2; + } + return hash; +} + +static PyObject* +pattern_richcompare(PyObject *lefto, PyObject *righto, int op) +{ + PyTypeObject *tp = Py_TYPE(lefto); + _sremodulestate *module_state = get_sre_module_state_by_class(tp); + PatternObject *left, *right; + int cmp; + + if (op != Py_EQ && op != Py_NE) { + Py_RETURN_NOTIMPLEMENTED; + } + + if (!Py_IS_TYPE(righto, module_state->Pattern_Type)) + { + Py_RETURN_NOTIMPLEMENTED; + } + + if (lefto == righto) { + /* a pattern is equal to itself */ + return PyBool_FromLong(op == Py_EQ); + } + + left = (PatternObject *)lefto; + right = (PatternObject *)righto; + + cmp = (left->flags == right->flags + && left->isbytes == right->isbytes + && left->codesize == right->codesize); + if (cmp) { + /* Compare the code and the pattern because the same pattern can + produce different codes depending on the locale used to compile the + pattern when the re.LOCALE flag is used. Don't compare groups, + indexgroup nor groupindex: they are derivated from the pattern. */ + cmp = (memcmp(left->code, right->code, + sizeof(left->code[0]) * left->codesize) == 0); + } + if (cmp) { + cmp = PyObject_RichCompareBool(left->pattern, right->pattern, + Py_EQ); + if (cmp < 0) { + return NULL; + } + } + if (op == Py_NE) { + cmp = !cmp; + } + return PyBool_FromLong(cmp); +} + +#include "clinic/sre.c.h" + +static PyMethodDef pattern_methods[] = { + _SRE_SRE_PATTERN_MATCH_METHODDEF + _SRE_SRE_PATTERN_FULLMATCH_METHODDEF + _SRE_SRE_PATTERN_SEARCH_METHODDEF + _SRE_SRE_PATTERN_SUB_METHODDEF + _SRE_SRE_PATTERN_SUBN_METHODDEF + _SRE_SRE_PATTERN_FINDALL_METHODDEF + _SRE_SRE_PATTERN_SPLIT_METHODDEF + _SRE_SRE_PATTERN_FINDITER_METHODDEF + _SRE_SRE_PATTERN_SCANNER_METHODDEF + _SRE_SRE_PATTERN___COPY___METHODDEF + _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF + {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, + PyDoc_STR("See PEP 585")}, + {NULL, NULL} +}; + +static PyGetSetDef pattern_getset[] = { + {"groupindex", (getter)pattern_groupindex, (setter)NULL, + "A dictionary mapping group names to group numbers."}, + {NULL} /* Sentinel */ +}; + +#define PAT_OFF(x) offsetof(PatternObject, x) +static PyMemberDef pattern_members[] = { + {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY, + "The pattern string from which the RE object was compiled."}, + {"flags", T_INT, PAT_OFF(flags), READONLY, + "The regex matching flags."}, + {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY, + "The number of capturing groups in the pattern."}, + {"__weaklistoffset__", T_PYSSIZET, offsetof(PatternObject, weakreflist), READONLY}, + {NULL} /* Sentinel */ +}; + +static PyType_Slot pattern_slots[] = { + {Py_tp_dealloc, (destructor)pattern_dealloc}, + {Py_tp_repr, (reprfunc)pattern_repr}, + {Py_tp_hash, (hashfunc)pattern_hash}, + {Py_tp_doc, (void *)pattern_doc}, + {Py_tp_richcompare, pattern_richcompare}, + {Py_tp_methods, pattern_methods}, + {Py_tp_members, pattern_members}, + {Py_tp_getset, pattern_getset}, + {Py_tp_traverse, pattern_traverse}, + {Py_tp_clear, pattern_clear}, + {0, NULL}, +}; + +static PyType_Spec pattern_spec = { + .name = "re.Pattern", + .basicsize = sizeof(PatternObject), + .itemsize = sizeof(SRE_CODE), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | + Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), + .slots = pattern_slots, +}; + +static PyMethodDef match_methods[] = { + {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc}, + _SRE_SRE_MATCH_START_METHODDEF + _SRE_SRE_MATCH_END_METHODDEF + _SRE_SRE_MATCH_SPAN_METHODDEF + _SRE_SRE_MATCH_GROUPS_METHODDEF + _SRE_SRE_MATCH_GROUPDICT_METHODDEF + _SRE_SRE_MATCH_EXPAND_METHODDEF + _SRE_SRE_MATCH___COPY___METHODDEF + _SRE_SRE_MATCH___DEEPCOPY___METHODDEF + {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, + PyDoc_STR("See PEP 585")}, + {NULL, NULL} +}; + +static PyGetSetDef match_getset[] = { + {"lastindex", (getter)match_lastindex_get, (setter)NULL, + "The integer index of the last matched capturing group."}, + {"lastgroup", (getter)match_lastgroup_get, (setter)NULL, + "The name of the last matched capturing group."}, + {"regs", (getter)match_regs_get, (setter)NULL}, + {NULL} +}; + +#define MATCH_OFF(x) offsetof(MatchObject, x) +static PyMemberDef match_members[] = { + {"string", T_OBJECT, MATCH_OFF(string), READONLY, + "The string passed to match() or search()."}, + {"re", T_OBJECT, MATCH_OFF(pattern), READONLY, + "The regular expression object."}, + {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY, + "The index into the string at which the RE engine started looking for a match."}, + {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY, + "The index into the string beyond which the RE engine will not go."}, + {NULL} +}; + +/* FIXME: implement setattr("string", None) as a special case (to + detach the associated string, if any */ +static PyType_Slot match_slots[] = { + {Py_tp_dealloc, match_dealloc}, + {Py_tp_repr, match_repr}, + {Py_tp_doc, (void *)match_doc}, + {Py_tp_methods, match_methods}, + {Py_tp_members, match_members}, + {Py_tp_getset, match_getset}, + {Py_tp_traverse, match_traverse}, + {Py_tp_clear, match_clear}, + + /* As mapping. + * + * Match objects do not support length or assignment, but do support + * __getitem__. + */ + {Py_mp_subscript, match_getitem}, + + {0, NULL}, +}; + +static PyType_Spec match_spec = { + .name = "re.Match", + .basicsize = sizeof(MatchObject), + .itemsize = sizeof(Py_ssize_t), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | + Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), + .slots = match_slots, +}; + +static PyMethodDef scanner_methods[] = { + _SRE_SRE_SCANNER_MATCH_METHODDEF + _SRE_SRE_SCANNER_SEARCH_METHODDEF + {NULL, NULL} +}; + +#define SCAN_OFF(x) offsetof(ScannerObject, x) +static PyMemberDef scanner_members[] = { + {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY}, + {NULL} /* Sentinel */ +}; + +static PyType_Slot scanner_slots[] = { + {Py_tp_dealloc, scanner_dealloc}, + {Py_tp_methods, scanner_methods}, + {Py_tp_members, scanner_members}, + {Py_tp_traverse, scanner_traverse}, + {Py_tp_clear, scanner_clear}, + {0, NULL}, +}; + +static PyType_Spec scanner_spec = { + .name = "_" SRE_MODULE ".SRE_Scanner", + .basicsize = sizeof(ScannerObject), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | + Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), + .slots = scanner_slots, +}; + +static PyMethodDef _functions[] = { + _SRE_COMPILE_METHODDEF + _SRE_GETCODESIZE_METHODDEF + _SRE_ASCII_ISCASED_METHODDEF + _SRE_UNICODE_ISCASED_METHODDEF + _SRE_ASCII_TOLOWER_METHODDEF + _SRE_UNICODE_TOLOWER_METHODDEF + {NULL, NULL} +}; + +static int +sre_traverse(PyObject *module, visitproc visit, void *arg) +{ + _sremodulestate *state = get_sre_module_state(module); + + Py_VISIT(state->Pattern_Type); + Py_VISIT(state->Match_Type); + Py_VISIT(state->Scanner_Type); + + return 0; +} + +static int +sre_clear(PyObject *module) +{ + _sremodulestate *state = get_sre_module_state(module); + + Py_CLEAR(state->Pattern_Type); + Py_CLEAR(state->Match_Type); + Py_CLEAR(state->Scanner_Type); + + return 0; +} + +static void +sre_free(void *module) +{ + sre_clear((PyObject *)module); +} + +#define CREATE_TYPE(m, type, spec) \ +do { \ + type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \ + if (type == NULL) { \ + goto error; \ + } \ +} while (0) + +#define ADD_ULONG_CONSTANT(module, name, value) \ + do { \ + PyObject *o = PyLong_FromUnsignedLong(value); \ + if (!o) \ + goto error; \ + int res = PyModule_AddObjectRef(module, name, o); \ + Py_DECREF(o); \ + if (res < 0) { \ + goto error; \ + } \ +} while (0) + +static int +sre_exec(PyObject *m) +{ + _sremodulestate *state; + + /* Create heap types */ + state = get_sre_module_state(m); + CREATE_TYPE(m, state->Pattern_Type, &pattern_spec); + CREATE_TYPE(m, state->Match_Type, &match_spec); + CREATE_TYPE(m, state->Scanner_Type, &scanner_spec); + + if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) { + goto error; + } + + if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) { + goto error; + } + + ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT); + ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS); + + if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) { + goto error; + } + + return 0; + +error: + return -1; +} + +static PyModuleDef_Slot sre_slots[] = { + {Py_mod_exec, sre_exec}, + {0, NULL}, +}; + +static struct PyModuleDef sremodule = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "_" SRE_MODULE, + .m_size = sizeof(_sremodulestate), + .m_methods = _functions, + .m_slots = sre_slots, + .m_traverse = sre_traverse, + .m_free = sre_free, + .m_clear = sre_clear, +}; + +PyMODINIT_FUNC +PyInit__sre(void) +{ + return PyModuleDef_Init(&sremodule); +} + +/* vim:ts=4:sw=4:et +*/ diff --git a/Modules/_sre/sre.h b/Modules/_sre/sre.h new file mode 100644 index 0000000..129f559 --- /dev/null +++ b/Modules/_sre/sre.h @@ -0,0 +1,99 @@ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. + * + * See the sre.c file for information on usage and redistribution. + */ + +#ifndef SRE_INCLUDED +#define SRE_INCLUDED + +#include "sre_constants.h" + +/* size of a code word (must be unsigned short or larger, and + large enough to hold a UCS4 character) */ +#define SRE_CODE Py_UCS4 +#if SIZEOF_SIZE_T > 4 +# define SRE_MAXREPEAT (~(SRE_CODE)0) +# define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2) +#else +# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX) +# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2) +#endif + +typedef struct { + PyObject_VAR_HEAD + Py_ssize_t groups; /* must be first! */ + PyObject* groupindex; /* dict */ + PyObject* indexgroup; /* tuple */ + /* the number of REPEATs */ + Py_ssize_t repeat_count; + /* compatibility */ + PyObject* pattern; /* pattern source (or None) */ + int flags; /* flags used when compiling pattern source */ + PyObject *weakreflist; /* List of weak references */ + int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */ + /* pattern code */ + Py_ssize_t codesize; + SRE_CODE code[1]; +} PatternObject; + +#define PatternObject_GetCode(o) (((PatternObject*)(o))->code) + +typedef struct { + PyObject_VAR_HEAD + PyObject* string; /* link to the target string (must be first) */ + PyObject* regs; /* cached list of matching spans */ + PatternObject* pattern; /* link to the regex (pattern) object */ + Py_ssize_t pos, endpos; /* current target slice */ + Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */ + Py_ssize_t groups; /* number of groups (start/end marks) */ + Py_ssize_t mark[1]; +} MatchObject; + +typedef struct SRE_REPEAT_T { + Py_ssize_t count; + const SRE_CODE* pattern; /* points to REPEAT operator arguments */ + const void* last_ptr; /* helper to check for infinite loops */ + struct SRE_REPEAT_T *prev; /* points to previous repeat context */ +} SRE_REPEAT; + +typedef struct { + /* string pointers */ + const void* ptr; /* current position (also end of current slice) */ + const void* beginning; /* start of original string */ + const void* start; /* start of current slice */ + const void* end; /* end of original string */ + /* attributes for the match object */ + PyObject* string; + Py_buffer buffer; + Py_ssize_t pos, endpos; + int isbytes; + int charsize; /* character size */ + /* registers */ + Py_ssize_t lastindex; + Py_ssize_t lastmark; + const void** mark; + int match_all; + int must_advance; + /* dynamically allocated stuff */ + char* data_stack; + size_t data_stack_size; + size_t data_stack_base; + /* current repeat context */ + SRE_REPEAT *repeat; + /* repeat contexts array */ + SRE_REPEAT *repeats_array; +} SRE_STATE; + +typedef struct { + PyObject_HEAD + PyObject* pattern; + SRE_STATE state; + int executing; +} ScannerObject; + +#endif diff --git a/Modules/_sre/sre_constants.h b/Modules/_sre/sre_constants.h new file mode 100644 index 0000000..3e36431 --- /dev/null +++ b/Modules/_sre/sre_constants.h @@ -0,0 +1,100 @@ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * NOTE: This file is generated by Lib/re/_constants.py. If you need + * to change anything in here, edit Lib/re/_constants.py and run it. + * + * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. + * + * See the sre.c file for information on usage and redistribution. + */ + +#define SRE_MAGIC 20220402 +#define SRE_OP_FAILURE 0 +#define SRE_OP_SUCCESS 1 +#define SRE_OP_ANY 2 +#define SRE_OP_ANY_ALL 3 +#define SRE_OP_ASSERT 4 +#define SRE_OP_ASSERT_NOT 5 +#define SRE_OP_AT 6 +#define SRE_OP_BRANCH 7 +#define SRE_OP_CALL 8 +#define SRE_OP_CATEGORY 9 +#define SRE_OP_CHARSET 10 +#define SRE_OP_BIGCHARSET 11 +#define SRE_OP_GROUPREF 12 +#define SRE_OP_GROUPREF_EXISTS 13 +#define SRE_OP_IN 14 +#define SRE_OP_INFO 15 +#define SRE_OP_JUMP 16 +#define SRE_OP_LITERAL 17 +#define SRE_OP_MARK 18 +#define SRE_OP_MAX_UNTIL 19 +#define SRE_OP_MIN_UNTIL 20 +#define SRE_OP_NOT_LITERAL 21 +#define SRE_OP_NEGATE 22 +#define SRE_OP_RANGE 23 +#define SRE_OP_REPEAT 24 +#define SRE_OP_REPEAT_ONE 25 +#define SRE_OP_SUBPATTERN 26 +#define SRE_OP_MIN_REPEAT_ONE 27 +#define SRE_OP_ATOMIC_GROUP 28 +#define SRE_OP_POSSESSIVE_REPEAT 29 +#define SRE_OP_POSSESSIVE_REPEAT_ONE 30 +#define SRE_OP_GROUPREF_IGNORE 31 +#define SRE_OP_IN_IGNORE 32 +#define SRE_OP_LITERAL_IGNORE 33 +#define SRE_OP_NOT_LITERAL_IGNORE 34 +#define SRE_OP_GROUPREF_LOC_IGNORE 35 +#define SRE_OP_IN_LOC_IGNORE 36 +#define SRE_OP_LITERAL_LOC_IGNORE 37 +#define SRE_OP_NOT_LITERAL_LOC_IGNORE 38 +#define SRE_OP_GROUPREF_UNI_IGNORE 39 +#define SRE_OP_IN_UNI_IGNORE 40 +#define SRE_OP_LITERAL_UNI_IGNORE 41 +#define SRE_OP_NOT_LITERAL_UNI_IGNORE 42 +#define SRE_OP_RANGE_UNI_IGNORE 43 +#define SRE_AT_BEGINNING 0 +#define SRE_AT_BEGINNING_LINE 1 +#define SRE_AT_BEGINNING_STRING 2 +#define SRE_AT_BOUNDARY 3 +#define SRE_AT_NON_BOUNDARY 4 +#define SRE_AT_END 5 +#define SRE_AT_END_LINE 6 +#define SRE_AT_END_STRING 7 +#define SRE_AT_LOC_BOUNDARY 8 +#define SRE_AT_LOC_NON_BOUNDARY 9 +#define SRE_AT_UNI_BOUNDARY 10 +#define SRE_AT_UNI_NON_BOUNDARY 11 +#define SRE_CATEGORY_DIGIT 0 +#define SRE_CATEGORY_NOT_DIGIT 1 +#define SRE_CATEGORY_SPACE 2 +#define SRE_CATEGORY_NOT_SPACE 3 +#define SRE_CATEGORY_WORD 4 +#define SRE_CATEGORY_NOT_WORD 5 +#define SRE_CATEGORY_LINEBREAK 6 +#define SRE_CATEGORY_NOT_LINEBREAK 7 +#define SRE_CATEGORY_LOC_WORD 8 +#define SRE_CATEGORY_LOC_NOT_WORD 9 +#define SRE_CATEGORY_UNI_DIGIT 10 +#define SRE_CATEGORY_UNI_NOT_DIGIT 11 +#define SRE_CATEGORY_UNI_SPACE 12 +#define SRE_CATEGORY_UNI_NOT_SPACE 13 +#define SRE_CATEGORY_UNI_WORD 14 +#define SRE_CATEGORY_UNI_NOT_WORD 15 +#define SRE_CATEGORY_UNI_LINEBREAK 16 +#define SRE_CATEGORY_UNI_NOT_LINEBREAK 17 +#define SRE_FLAG_TEMPLATE 1 +#define SRE_FLAG_IGNORECASE 2 +#define SRE_FLAG_LOCALE 4 +#define SRE_FLAG_MULTILINE 8 +#define SRE_FLAG_DOTALL 16 +#define SRE_FLAG_UNICODE 32 +#define SRE_FLAG_VERBOSE 64 +#define SRE_FLAG_DEBUG 128 +#define SRE_FLAG_ASCII 256 +#define SRE_INFO_PREFIX 1 +#define SRE_INFO_LITERAL 2 +#define SRE_INFO_CHARSET 4 diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h new file mode 100644 index 0000000..34cd055 --- /dev/null +++ b/Modules/_sre/sre_lib.h @@ -0,0 +1,1759 @@ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. + * + * See the sre.c file for information on usage and redistribution. + */ + +/* String matching engine */ + +/* This file is included three times, with different character settings */ + +LOCAL(int) +SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at) +{ + /* check if pointer is at given position */ + + Py_ssize_t thisp, thatp; + + switch (at) { + + case SRE_AT_BEGINNING: + case SRE_AT_BEGINNING_STRING: + return ((void*) ptr == state->beginning); + + case SRE_AT_BEGINNING_LINE: + return ((void*) ptr == state->beginning || + SRE_IS_LINEBREAK((int) ptr[-1])); + + case SRE_AT_END: + return (((SRE_CHAR *)state->end - ptr == 1 && + SRE_IS_LINEBREAK((int) ptr[0])) || + ((void*) ptr == state->end)); + + case SRE_AT_END_LINE: + return ((void*) ptr == state->end || + SRE_IS_LINEBREAK((int) ptr[0])); + + case SRE_AT_END_STRING: + return ((void*) ptr == state->end); + + case SRE_AT_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_IS_WORD((int) ptr[0]) : 0; + return thisp != thatp; + + case SRE_AT_NON_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_IS_WORD((int) ptr[0]) : 0; + return thisp == thatp; + + case SRE_AT_LOC_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_LOC_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_LOC_IS_WORD((int) ptr[0]) : 0; + return thisp != thatp; + + case SRE_AT_LOC_NON_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_LOC_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_LOC_IS_WORD((int) ptr[0]) : 0; + return thisp == thatp; + + case SRE_AT_UNI_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_UNI_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_UNI_IS_WORD((int) ptr[0]) : 0; + return thisp != thatp; + + case SRE_AT_UNI_NON_BOUNDARY: + if (state->beginning == state->end) + return 0; + thatp = ((void*) ptr > state->beginning) ? + SRE_UNI_IS_WORD((int) ptr[-1]) : 0; + thisp = ((void*) ptr < state->end) ? + SRE_UNI_IS_WORD((int) ptr[0]) : 0; + return thisp == thatp; + + } + + return 0; +} + +LOCAL(int) +SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) +{ + /* check if character is a member of the given set */ + + int ok = 1; + + for (;;) { + switch (*set++) { + + case SRE_OP_FAILURE: + return !ok; + + case SRE_OP_LITERAL: + /* */ + if (ch == set[0]) + return ok; + set++; + break; + + case SRE_OP_CATEGORY: + /* */ + if (sre_category(set[0], (int) ch)) + return ok; + set++; + break; + + case SRE_OP_CHARSET: + /* */ + if (ch < 256 && + (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) + return ok; + set += 256/SRE_CODE_BITS; + break; + + case SRE_OP_RANGE: + /* */ + if (set[0] <= ch && ch <= set[1]) + return ok; + set += 2; + break; + + case SRE_OP_RANGE_UNI_IGNORE: + /* */ + { + SRE_CODE uch; + /* ch is already lower cased */ + if (set[0] <= ch && ch <= set[1]) + return ok; + uch = sre_upper_unicode(ch); + if (set[0] <= uch && uch <= set[1]) + return ok; + set += 2; + break; + } + + case SRE_OP_NEGATE: + ok = !ok; + break; + + case SRE_OP_BIGCHARSET: + /* <256 blockindices> */ + { + Py_ssize_t count, block; + count = *(set++); + + if (ch < 0x10000u) + block = ((unsigned char*)set)[ch >> 8]; + else + block = -1; + set += 256/sizeof(SRE_CODE); + if (block >=0 && + (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & + (1u << (ch & (SRE_CODE_BITS-1))))) + return ok; + set += count * (256/SRE_CODE_BITS); + break; + } + + default: + /* internal error -- there's not much we can do about it + here, so let's just pretend it didn't match... */ + return 0; + } + } +} + +LOCAL(int) +SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) +{ + SRE_CODE lo, up; + lo = sre_lower_locale(ch); + if (SRE(charset)(state, set, lo)) + return 1; + + up = sre_upper_locale(ch); + return up != lo && SRE(charset)(state, set, up); +} + +LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel); + +LOCAL(Py_ssize_t) +SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount) +{ + SRE_CODE chr; + SRE_CHAR c; + const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr; + const SRE_CHAR* end = (const SRE_CHAR *)state->end; + Py_ssize_t i; + + /* adjust end */ + if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) + end = ptr + maxcount; + + switch (pattern[0]) { + + case SRE_OP_IN: + /* repeated set */ + TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); + while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) + ptr++; + break; + + case SRE_OP_ANY: + /* repeated dot wildcard. */ + TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); + while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) + ptr++; + break; + + case SRE_OP_ANY_ALL: + /* repeated dot wildcard. skip to the end of the target + string, and backtrack from there */ + TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); + ptr = end; + break; + + case SRE_OP_LITERAL: + /* repeated literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); + c = (SRE_CHAR) chr; +#if SIZEOF_SRE_CHAR < 4 + if ((SRE_CODE) c != chr) + ; /* literal can't match: doesn't fit in char width */ + else +#endif + while (ptr < end && *ptr == c) + ptr++; + break; + + case SRE_OP_LITERAL_IGNORE: + /* repeated literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); + while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) + ptr++; + break; + + case SRE_OP_LITERAL_UNI_IGNORE: + /* repeated literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); + while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) + ptr++; + break; + + case SRE_OP_LITERAL_LOC_IGNORE: + /* repeated literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); + while (ptr < end && char_loc_ignore(chr, *ptr)) + ptr++; + break; + + case SRE_OP_NOT_LITERAL: + /* repeated non-literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); + c = (SRE_CHAR) chr; +#if SIZEOF_SRE_CHAR < 4 + if ((SRE_CODE) c != chr) + ptr = end; /* literal can't match: doesn't fit in char width */ + else +#endif + while (ptr < end && *ptr != c) + ptr++; + break; + + case SRE_OP_NOT_LITERAL_IGNORE: + /* repeated non-literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); + while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) + ptr++; + break; + + case SRE_OP_NOT_LITERAL_UNI_IGNORE: + /* repeated non-literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); + while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) + ptr++; + break; + + case SRE_OP_NOT_LITERAL_LOC_IGNORE: + /* repeated non-literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); + while (ptr < end && !char_loc_ignore(chr, *ptr)) + ptr++; + break; + + default: + /* repeated single character pattern */ + TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); + while ((SRE_CHAR*) state->ptr < end) { + i = SRE(match)(state, pattern, 0); + if (i < 0) + return i; + if (!i) + break; + } + TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, + (SRE_CHAR*) state->ptr - ptr)); + return (SRE_CHAR*) state->ptr - ptr; + } + + TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, + ptr - (SRE_CHAR*) state->ptr)); + return ptr - (SRE_CHAR*) state->ptr; +} + +/* The macros below should be used to protect recursive SRE(match)() + * calls that *failed* and do *not* return immediately (IOW, those + * that will backtrack). Explaining: + * + * - Recursive SRE(match)() returned true: that's usually a success + * (besides atypical cases like ASSERT_NOT), therefore there's no + * reason to restore lastmark; + * + * - Recursive SRE(match)() returned false but the current SRE(match)() + * is returning to the caller: If the current SRE(match)() is the + * top function of the recursion, returning false will be a matching + * failure, and it doesn't matter where lastmark is pointing to. + * If it's *not* the top function, it will be a recursive SRE(match)() + * failure by itself, and the calling SRE(match)() will have to deal + * with the failure by the same rules explained here (it will restore + * lastmark by itself if necessary); + * + * - Recursive SRE(match)() returned false, and will continue the + * outside 'for' loop: must be protected when breaking, since the next + * OP could potentially depend on lastmark; + * + * - Recursive SRE(match)() returned false, and will be called again + * inside a local for/while loop: must be protected between each + * loop iteration, since the recursive SRE(match)() could do anything, + * and could potentially depend on lastmark. + * + * For more information, check the discussion at SF patch #712900. + */ +#define LASTMARK_SAVE() \ + do { \ + ctx->lastmark = state->lastmark; \ + ctx->lastindex = state->lastindex; \ + } while (0) +#define LASTMARK_RESTORE() \ + do { \ + state->lastmark = ctx->lastmark; \ + state->lastindex = ctx->lastindex; \ + } while (0) + +#define RETURN_ERROR(i) do { return i; } while(0) +#define RETURN_FAILURE do { ret = 0; goto exit; } while(0) +#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0) + +#define RETURN_ON_ERROR(i) \ + do { if (i < 0) RETURN_ERROR(i); } while (0) +#define RETURN_ON_SUCCESS(i) \ + do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0) +#define RETURN_ON_FAILURE(i) \ + do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0) + +#define DATA_STACK_ALLOC(state, type, ptr) \ +do { \ + alloc_pos = state->data_stack_base; \ + TRACE(("allocating %s in %zd (%zd)\n", \ + Py_STRINGIFY(type), alloc_pos, sizeof(type))); \ + if (sizeof(type) > state->data_stack_size - alloc_pos) { \ + int j = data_stack_grow(state, sizeof(type)); \ + if (j < 0) return j; \ + if (ctx_pos != -1) \ + DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ + } \ + ptr = (type*)(state->data_stack+alloc_pos); \ + state->data_stack_base += sizeof(type); \ +} while (0) + +#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \ +do { \ + TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \ + ptr = (type*)(state->data_stack+pos); \ +} while (0) + +#define DATA_STACK_PUSH(state, data, size) \ +do { \ + TRACE(("copy data in %p to %zd (%zd)\n", \ + data, state->data_stack_base, size)); \ + if (size > state->data_stack_size - state->data_stack_base) { \ + int j = data_stack_grow(state, size); \ + if (j < 0) return j; \ + if (ctx_pos != -1) \ + DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ + } \ + memcpy(state->data_stack+state->data_stack_base, data, size); \ + state->data_stack_base += size; \ +} while (0) + +/* We add an explicit cast to memcpy here because MSVC has a bug when + compiling C code where it believes that `const void**` cannot be + safely casted to `void*`, see bpo-39943 for details. */ +#define DATA_STACK_POP(state, data, size, discard) \ +do { \ + TRACE(("copy data to %p from %zd (%zd)\n", \ + data, state->data_stack_base-size, size)); \ + memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \ + if (discard) \ + state->data_stack_base -= size; \ +} while (0) + +#define DATA_STACK_POP_DISCARD(state, size) \ +do { \ + TRACE(("discard data from %zd (%zd)\n", \ + state->data_stack_base-size, size)); \ + state->data_stack_base -= size; \ +} while(0) + +#define DATA_PUSH(x) \ + DATA_STACK_PUSH(state, (x), sizeof(*(x))) +#define DATA_POP(x) \ + DATA_STACK_POP(state, (x), sizeof(*(x)), 1) +#define DATA_POP_DISCARD(x) \ + DATA_STACK_POP_DISCARD(state, sizeof(*(x))) +#define DATA_ALLOC(t,p) \ + DATA_STACK_ALLOC(state, t, p) +#define DATA_LOOKUP_AT(t,p,pos) \ + DATA_STACK_LOOKUP_AT(state,t,p,pos) + +#define MARK_PUSH(lastmark) \ + do if (lastmark >= 0) { \ + i = lastmark; /* ctx->lastmark may change if reallocated */ \ + DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \ + } while (0) +#define MARK_POP(lastmark) \ + do if (lastmark >= 0) { \ + DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \ + } while (0) +#define MARK_POP_KEEP(lastmark) \ + do if (lastmark >= 0) { \ + DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \ + } while (0) +#define MARK_POP_DISCARD(lastmark) \ + do if (lastmark >= 0) { \ + DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \ + } while (0) + +#define JUMP_NONE 0 +#define JUMP_MAX_UNTIL_1 1 +#define JUMP_MAX_UNTIL_2 2 +#define JUMP_MAX_UNTIL_3 3 +#define JUMP_MIN_UNTIL_1 4 +#define JUMP_MIN_UNTIL_2 5 +#define JUMP_MIN_UNTIL_3 6 +#define JUMP_REPEAT 7 +#define JUMP_REPEAT_ONE_1 8 +#define JUMP_REPEAT_ONE_2 9 +#define JUMP_MIN_REPEAT_ONE 10 +#define JUMP_BRANCH 11 +#define JUMP_ASSERT 12 +#define JUMP_ASSERT_NOT 13 +#define JUMP_POSS_REPEAT_1 14 +#define JUMP_POSS_REPEAT_2 15 +#define JUMP_ATOMIC_GROUP 16 + +#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \ + DATA_ALLOC(SRE(match_context), nextctx); \ + nextctx->last_ctx_pos = ctx_pos; \ + nextctx->jump = jumpvalue; \ + nextctx->pattern = nextpattern; \ + nextctx->toplevel = toplevel_; \ + ctx_pos = alloc_pos; \ + ctx = nextctx; \ + goto entrance; \ + jumplabel: \ + while (0) /* gcc doesn't like labels at end of scopes */ \ + +#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ + DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel) + +#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \ + DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0) + +typedef struct { + Py_ssize_t last_ctx_pos; + Py_ssize_t jump; + const SRE_CHAR* ptr; + const SRE_CODE* pattern; + Py_ssize_t count; + Py_ssize_t lastmark; + Py_ssize_t lastindex; + union { + SRE_CODE chr; + SRE_REPEAT* rep; + } u; + int toplevel; +} SRE(match_context); + +/* check if string matches the given pattern. returns <0 for + error, 0 for failure, and 1 for success */ +LOCAL(Py_ssize_t) +SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) +{ + const SRE_CHAR* end = (const SRE_CHAR *)state->end; + Py_ssize_t alloc_pos, ctx_pos = -1; + Py_ssize_t i, ret = 0; + Py_ssize_t jump; + unsigned int sigcount=0; + + SRE(match_context)* ctx; + SRE(match_context)* nextctx; + + TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); + + DATA_ALLOC(SRE(match_context), ctx); + ctx->last_ctx_pos = -1; + ctx->jump = JUMP_NONE; + ctx->pattern = pattern; + ctx->toplevel = toplevel; + ctx_pos = alloc_pos; + +entrance: + + ctx->ptr = (SRE_CHAR *)state->ptr; + + if (ctx->pattern[0] == SRE_OP_INFO) { + /* optimization info block */ + /* <1=skip> <2=flags> <3=min> ... */ + if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) { + TRACE(("reject (got %zd chars, need %zd)\n", + end - ctx->ptr, (Py_ssize_t) ctx->pattern[3])); + RETURN_FAILURE; + } + ctx->pattern += ctx->pattern[1] + 1; + } + + for (;;) { + ++sigcount; + if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals()) + RETURN_ERROR(SRE_ERROR_INTERRUPTED); + + switch (*ctx->pattern++) { + + case SRE_OP_MARK: + /* set mark */ + /* */ + TRACE(("|%p|%p|MARK %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[0])); + i = ctx->pattern[0]; + if (i & 1) + state->lastindex = i/2 + 1; + if (i > state->lastmark) { + /* state->lastmark is the highest valid index in the + state->mark array. If it is increased by more than 1, + the intervening marks must be set to NULL to signal + that these marks have not been encountered. */ + Py_ssize_t j = state->lastmark + 1; + while (j < i) + state->mark[j++] = NULL; + state->lastmark = i; + } + state->mark[i] = ctx->ptr; + ctx->pattern++; + break; + + case SRE_OP_LITERAL: + /* match literal string */ + /* */ + TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern, + ctx->ptr, *ctx->pattern)); + if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0]) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_NOT_LITERAL: + /* match anything that is not literal character */ + /* */ + TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern, + ctx->ptr, *ctx->pattern)); + if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0]) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_SUCCESS: + /* end of pattern */ + TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr)); + if (ctx->toplevel && + ((state->match_all && ctx->ptr != state->end) || + (state->must_advance && ctx->ptr == state->start))) + { + RETURN_FAILURE; + } + state->ptr = ctx->ptr; + RETURN_SUCCESS; + + case SRE_OP_AT: + /* match at given position */ + /* */ + TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); + if (!SRE(at)(state, ctx->ptr, *ctx->pattern)) + RETURN_FAILURE; + ctx->pattern++; + break; + + case SRE_OP_CATEGORY: + /* match at given category */ + /* */ + TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern, + ctx->ptr, *ctx->pattern)); + if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0])) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_ANY: + /* match anything (except a newline) */ + /* */ + TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr)); + if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0])) + RETURN_FAILURE; + ctx->ptr++; + break; + + case SRE_OP_ANY_ALL: + /* match anything */ + /* */ + TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr)); + if (ctx->ptr >= end) + RETURN_FAILURE; + ctx->ptr++; + break; + + case SRE_OP_IN: + /* match set member (or non_member) */ + /* */ + TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr)); + if (ctx->ptr >= end || + !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr)) + RETURN_FAILURE; + ctx->pattern += ctx->pattern[0]; + ctx->ptr++; + break; + + case SRE_OP_LITERAL_IGNORE: + TRACE(("|%p|%p|LITERAL_IGNORE %d\n", + ctx->pattern, ctx->ptr, ctx->pattern[0])); + if (ctx->ptr >= end || + sre_lower_ascii(*ctx->ptr) != *ctx->pattern) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_LITERAL_UNI_IGNORE: + TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", + ctx->pattern, ctx->ptr, ctx->pattern[0])); + if (ctx->ptr >= end || + sre_lower_unicode(*ctx->ptr) != *ctx->pattern) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_LITERAL_LOC_IGNORE: + TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", + ctx->pattern, ctx->ptr, ctx->pattern[0])); + if (ctx->ptr >= end + || !char_loc_ignore(*ctx->pattern, *ctx->ptr)) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_NOT_LITERAL_IGNORE: + TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", + ctx->pattern, ctx->ptr, *ctx->pattern)); + if (ctx->ptr >= end || + sre_lower_ascii(*ctx->ptr) == *ctx->pattern) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_NOT_LITERAL_UNI_IGNORE: + TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", + ctx->pattern, ctx->ptr, *ctx->pattern)); + if (ctx->ptr >= end || + sre_lower_unicode(*ctx->ptr) == *ctx->pattern) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_NOT_LITERAL_LOC_IGNORE: + TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", + ctx->pattern, ctx->ptr, *ctx->pattern)); + if (ctx->ptr >= end + || char_loc_ignore(*ctx->pattern, *ctx->ptr)) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_IN_IGNORE: + TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr)); + if (ctx->ptr >= end + || !SRE(charset)(state, ctx->pattern+1, + (SRE_CODE)sre_lower_ascii(*ctx->ptr))) + RETURN_FAILURE; + ctx->pattern += ctx->pattern[0]; + ctx->ptr++; + break; + + case SRE_OP_IN_UNI_IGNORE: + TRACE(("|%p|%p|IN_UNI_IGNORE\n", ctx->pattern, ctx->ptr)); + if (ctx->ptr >= end + || !SRE(charset)(state, ctx->pattern+1, + (SRE_CODE)sre_lower_unicode(*ctx->ptr))) + RETURN_FAILURE; + ctx->pattern += ctx->pattern[0]; + ctx->ptr++; + break; + + case SRE_OP_IN_LOC_IGNORE: + TRACE(("|%p|%p|IN_LOC_IGNORE\n", ctx->pattern, ctx->ptr)); + if (ctx->ptr >= end + || !SRE(charset_loc_ignore)(state, ctx->pattern+1, *ctx->ptr)) + RETURN_FAILURE; + ctx->pattern += ctx->pattern[0]; + ctx->ptr++; + break; + + case SRE_OP_JUMP: + case SRE_OP_INFO: + /* jump forward */ + /* */ + TRACE(("|%p|%p|JUMP %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[0])); + ctx->pattern += ctx->pattern[0]; + break; + + case SRE_OP_BRANCH: + /* alternation */ + /* <0=skip> code ... */ + TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr)); + LASTMARK_SAVE(); + if (state->repeat) + MARK_PUSH(ctx->lastmark); + for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) { + if (ctx->pattern[1] == SRE_OP_LITERAL && + (ctx->ptr >= end || + (SRE_CODE) *ctx->ptr != ctx->pattern[2])) + continue; + if (ctx->pattern[1] == SRE_OP_IN && + (ctx->ptr >= end || + !SRE(charset)(state, ctx->pattern + 3, + (SRE_CODE) *ctx->ptr))) + continue; + state->ptr = ctx->ptr; + DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1); + if (ret) { + if (state->repeat) + MARK_POP_DISCARD(ctx->lastmark); + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + if (state->repeat) + MARK_POP_KEEP(ctx->lastmark); + LASTMARK_RESTORE(); + } + if (state->repeat) + MARK_POP_DISCARD(ctx->lastmark); + RETURN_FAILURE; + + case SRE_OP_REPEAT_ONE: + /* match repeated sequence (maximizing regexp) */ + + /* this operator only works if the repeated item is + exactly one character wide, and we're not already + collecting backtracking points. for other cases, + use the MAX_REPEAT operator */ + + /* <1=min> <2=max> item tail */ + + TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, + ctx->pattern[1], ctx->pattern[2])); + + if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) + RETURN_FAILURE; /* cannot match */ + + state->ptr = ctx->ptr; + + ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]); + RETURN_ON_ERROR(ret); + DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); + ctx->count = ret; + ctx->ptr += ctx->count; + + /* when we arrive here, count contains the number of + matches, and ctx->ptr points to the tail of the target + string. check if the rest of the pattern matches, + and backtrack if not. */ + + if (ctx->count < (Py_ssize_t) ctx->pattern[1]) + RETURN_FAILURE; + + if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && + ctx->ptr == state->end && + !(ctx->toplevel && state->must_advance && ctx->ptr == state->start)) + { + /* tail is empty. we're finished */ + state->ptr = ctx->ptr; + RETURN_SUCCESS; + } + + LASTMARK_SAVE(); + if (state->repeat) + MARK_PUSH(ctx->lastmark); + + if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) { + /* tail starts with a literal. skip positions where + the rest of the pattern cannot possibly match */ + ctx->u.chr = ctx->pattern[ctx->pattern[0]+1]; + for (;;) { + while (ctx->count >= (Py_ssize_t) ctx->pattern[1] && + (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) { + ctx->ptr--; + ctx->count--; + } + if (ctx->count < (Py_ssize_t) ctx->pattern[1]) + break; + state->ptr = ctx->ptr; + DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, + ctx->pattern+ctx->pattern[0]); + if (ret) { + if (state->repeat) + MARK_POP_DISCARD(ctx->lastmark); + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + if (state->repeat) + MARK_POP_KEEP(ctx->lastmark); + LASTMARK_RESTORE(); + + ctx->ptr--; + ctx->count--; + } + if (state->repeat) + MARK_POP_DISCARD(ctx->lastmark); + } else { + /* general case */ + while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) { + state->ptr = ctx->ptr; + DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, + ctx->pattern+ctx->pattern[0]); + if (ret) { + if (state->repeat) + MARK_POP_DISCARD(ctx->lastmark); + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + if (state->repeat) + MARK_POP_KEEP(ctx->lastmark); + LASTMARK_RESTORE(); + + ctx->ptr--; + ctx->count--; + } + if (state->repeat) + MARK_POP_DISCARD(ctx->lastmark); + } + RETURN_FAILURE; + + case SRE_OP_MIN_REPEAT_ONE: + /* match repeated sequence (minimizing regexp) */ + + /* this operator only works if the repeated item is + exactly one character wide, and we're not already + collecting backtracking points. for other cases, + use the MIN_REPEAT operator */ + + /* <1=min> <2=max> item tail */ + + TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, + ctx->pattern[1], ctx->pattern[2])); + + if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) + RETURN_FAILURE; /* cannot match */ + + state->ptr = ctx->ptr; + + if (ctx->pattern[1] == 0) + ctx->count = 0; + else { + /* count using pattern min as the maximum */ + ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]); + RETURN_ON_ERROR(ret); + DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); + if (ret < (Py_ssize_t) ctx->pattern[1]) + /* didn't match minimum number of times */ + RETURN_FAILURE; + /* advance past minimum matches of repeat */ + ctx->count = ret; + ctx->ptr += ctx->count; + } + + if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && + !(ctx->toplevel && + ((state->match_all && ctx->ptr != state->end) || + (state->must_advance && ctx->ptr == state->start)))) + { + /* tail is empty. we're finished */ + state->ptr = ctx->ptr; + RETURN_SUCCESS; + + } else { + /* general case */ + LASTMARK_SAVE(); + if (state->repeat) + MARK_PUSH(ctx->lastmark); + + while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT + || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { + state->ptr = ctx->ptr; + DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, + ctx->pattern+ctx->pattern[0]); + if (ret) { + if (state->repeat) + MARK_POP_DISCARD(ctx->lastmark); + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + if (state->repeat) + MARK_POP_KEEP(ctx->lastmark); + LASTMARK_RESTORE(); + + state->ptr = ctx->ptr; + ret = SRE(count)(state, ctx->pattern+3, 1); + RETURN_ON_ERROR(ret); + DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); + if (ret == 0) + break; + assert(ret == 1); + ctx->ptr++; + ctx->count++; + } + if (state->repeat) + MARK_POP_DISCARD(ctx->lastmark); + } + RETURN_FAILURE; + + case SRE_OP_POSSESSIVE_REPEAT_ONE: + /* match repeated sequence (maximizing regexp) without + backtracking */ + + /* this operator only works if the repeated item is + exactly one character wide, and we're not already + collecting backtracking points. for other cases, + use the MAX_REPEAT operator */ + + /* <1=min> <2=max> item + tail */ + + TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[1], ctx->pattern[2])); + + if (ctx->ptr + ctx->pattern[1] > end) { + RETURN_FAILURE; /* cannot match */ + } + + state->ptr = ctx->ptr; + + ret = SRE(count)(state, ctx->pattern + 3, ctx->pattern[2]); + RETURN_ON_ERROR(ret); + DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); + ctx->count = ret; + ctx->ptr += ctx->count; + + /* when we arrive here, count contains the number of + matches, and ctx->ptr points to the tail of the target + string. check if the rest of the pattern matches, + and fail if not. */ + + /* Test for not enough repetitions in match */ + if (ctx->count < (Py_ssize_t) ctx->pattern[1]) { + RETURN_FAILURE; + } + + /* Update the pattern to point to the next op code */ + ctx->pattern += ctx->pattern[0]; + + /* Let the tail be evaluated separately and consider this + match successful. */ + if (*ctx->pattern == SRE_OP_SUCCESS && + ctx->ptr == state->end && + !(ctx->toplevel && state->must_advance && ctx->ptr == state->start)) + { + /* tail is empty. we're finished */ + state->ptr = ctx->ptr; + RETURN_SUCCESS; + } + + /* Attempt to match the rest of the string */ + break; + + case SRE_OP_REPEAT: + /* create repeat context. all the hard work is done + by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ + /* <1=min> <2=max> + <3=repeat_index> item tail */ + TRACE(("|%p|%p|REPEAT %d %d %d\n", ctx->pattern, ctx->ptr, + ctx->pattern[1], ctx->pattern[2], ctx->pattern[3])); + + /* install repeat context */ + ctx->u.rep = &state->repeats_array[ctx->pattern[3]]; + + ctx->u.rep->count = -1; + ctx->u.rep->pattern = ctx->pattern; + ctx->u.rep->prev = state->repeat; + ctx->u.rep->last_ptr = NULL; + state->repeat = ctx->u.rep; + + state->ptr = ctx->ptr; + DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]); + state->repeat = ctx->u.rep->prev; + + if (ret) { + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + RETURN_FAILURE; + + case SRE_OP_MAX_UNTIL: + /* maximizing repeat */ + /* <1=min> <2=max> + <3=repeat_index> item tail */ + + /* FIXME: we probably need to deal with zero-width + matches in here... */ + + ctx->u.rep = state->repeat; + if (!ctx->u.rep) + RETURN_ERROR(SRE_ERROR_STATE); + + state->ptr = ctx->ptr; + + ctx->count = ctx->u.rep->count+1; + + TRACE(("|%p|%p|MAX_UNTIL %zd\n", ctx->pattern, + ctx->ptr, ctx->count)); + + if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { + /* not enough matches */ + ctx->u.rep->count = ctx->count; + DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, + ctx->u.rep->pattern+4); + if (ret) { + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + ctx->u.rep->count = ctx->count-1; + state->ptr = ctx->ptr; + RETURN_FAILURE; + } + + if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || + ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && + state->ptr != ctx->u.rep->last_ptr) { + /* we may have enough matches, but if we can + match another item, do so */ + ctx->u.rep->count = ctx->count; + LASTMARK_SAVE(); + MARK_PUSH(ctx->lastmark); + /* zero-width match protection */ + DATA_PUSH(&ctx->u.rep->last_ptr); + ctx->u.rep->last_ptr = state->ptr; + DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, + ctx->u.rep->pattern+4); + DATA_POP(&ctx->u.rep->last_ptr); + if (ret) { + MARK_POP_DISCARD(ctx->lastmark); + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + MARK_POP(ctx->lastmark); + LASTMARK_RESTORE(); + ctx->u.rep->count = ctx->count-1; + state->ptr = ctx->ptr; + } + + /* cannot match more repeated items here. make sure the + tail matches */ + state->repeat = ctx->u.rep->prev; + DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern); + state->repeat = ctx->u.rep; // restore repeat before return + + RETURN_ON_SUCCESS(ret); + state->ptr = ctx->ptr; + RETURN_FAILURE; + + case SRE_OP_MIN_UNTIL: + /* minimizing repeat */ + /* <1=min> <2=max> + <3=repeat_index> item tail */ + + ctx->u.rep = state->repeat; + if (!ctx->u.rep) + RETURN_ERROR(SRE_ERROR_STATE); + + state->ptr = ctx->ptr; + + ctx->count = ctx->u.rep->count+1; + + TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", ctx->pattern, + ctx->ptr, ctx->count, ctx->u.rep->pattern)); + + if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { + /* not enough matches */ + ctx->u.rep->count = ctx->count; + DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, + ctx->u.rep->pattern+4); + if (ret) { + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + ctx->u.rep->count = ctx->count-1; + state->ptr = ctx->ptr; + RETURN_FAILURE; + } + + /* see if the tail matches */ + state->repeat = ctx->u.rep->prev; + + LASTMARK_SAVE(); + if (state->repeat) + MARK_PUSH(ctx->lastmark); + + DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern); + SRE_REPEAT *repeat_of_tail = state->repeat; + state->repeat = ctx->u.rep; // restore repeat before return + + if (ret) { + if (repeat_of_tail) + MARK_POP_DISCARD(ctx->lastmark); + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + if (repeat_of_tail) + MARK_POP(ctx->lastmark); + LASTMARK_RESTORE(); + + state->ptr = ctx->ptr; + + if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] + && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || + state->ptr == ctx->u.rep->last_ptr) + RETURN_FAILURE; + + ctx->u.rep->count = ctx->count; + /* zero-width match protection */ + DATA_PUSH(&ctx->u.rep->last_ptr); + ctx->u.rep->last_ptr = state->ptr; + DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, + ctx->u.rep->pattern+4); + DATA_POP(&ctx->u.rep->last_ptr); + if (ret) { + RETURN_ON_ERROR(ret); + RETURN_SUCCESS; + } + ctx->u.rep->count = ctx->count-1; + state->ptr = ctx->ptr; + RETURN_FAILURE; + + case SRE_OP_POSSESSIVE_REPEAT: + /* create possessive repeat contexts. */ + /* <1=min> <2=max> pattern + tail */ + TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[1], ctx->pattern[2])); + + /* Set the global Input pointer to this context's Input + pointer */ + state->ptr = ctx->ptr; + + /* Initialize Count to 0 */ + ctx->count = 0; + + /* Check for minimum required matches. */ + while (ctx->count < (Py_ssize_t)ctx->pattern[1]) { + /* not enough matches */ + DO_JUMP(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, + &ctx->pattern[3]); + if (ret) { + RETURN_ON_ERROR(ret); + ctx->count++; + } + else { + state->ptr = ctx->ptr; + RETURN_FAILURE; + } + } + + /* Clear the context's Input stream pointer so that it + doesn't match the global state so that the while loop can + be entered. */ + ctx->ptr = NULL; + + /* Keep trying to parse the sub-pattern until the + end is reached, creating a new context each time. */ + while ((ctx->count < (Py_ssize_t)ctx->pattern[2] || + (Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT) && + state->ptr != ctx->ptr) { + /* Save the Capture Group Marker state into the current + Context and back up the current highest number + Capture Group marker. */ + LASTMARK_SAVE(); + MARK_PUSH(ctx->lastmark); + + /* zero-width match protection */ + /* Set the context's Input Stream pointer to be the + current Input Stream pointer from the global + state. When the loop reaches the next iteration, + the context will then store the last known good + position with the global state holding the Input + Input Stream position that has been updated with + the most recent match. Thus, if state's Input + stream remains the same as the one stored in the + current Context, we know we have successfully + matched an empty string and that all subsequent + matches will also be the empty string until the + maximum number of matches are counted, and because + of this, we could immediately stop at that point and + consider this match successful. */ + ctx->ptr = state->ptr; + + /* We have not reached the maximin matches, so try to + match once more. */ + DO_JUMP(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, + &ctx->pattern[3]); + + /* Check to see if the last attempted match + succeeded. */ + if (ret) { + /* Drop the saved highest number Capture Group + marker saved above and use the newly updated + value. */ + MARK_POP_DISCARD(ctx->lastmark); + RETURN_ON_ERROR(ret); + + /* Success, increment the count. */ + ctx->count++; + } + /* Last attempted match failed. */ + else { + /* Restore the previously saved highest number + Capture Group marker since the last iteration + did not match, then restore that to the global + state. */ + MARK_POP(ctx->lastmark); + LASTMARK_RESTORE(); + + /* We have sufficient matches, so exit loop. */ + break; + } + } + + /* Evaluate Tail */ + /* Jump to end of pattern indicated by skip, and then skip + the SUCCESS op code that follows it. */ + ctx->pattern += ctx->pattern[0] + 1; + ctx->ptr = state->ptr; + break; + + case SRE_OP_ATOMIC_GROUP: + /* Atomic Group Sub Pattern */ + /* pattern tail */ + TRACE(("|%p|%p|ATOMIC_GROUP\n", ctx->pattern, ctx->ptr)); + + /* Set the global Input pointer to this context's Input + pointer */ + state->ptr = ctx->ptr; + + /* Evaluate the Atomic Group in a new context, terminating + when the end of the group, represented by a SUCCESS op + code, is reached. */ + /* Group Pattern begins at an offset of 1 code. */ + DO_JUMP(JUMP_ATOMIC_GROUP, jump_atomic_group, + &ctx->pattern[1]); + + /* Test Exit Condition */ + RETURN_ON_ERROR(ret); + + if (ret == 0) { + /* Atomic Group failed to Match. */ + state->ptr = ctx->ptr; + RETURN_FAILURE; + } + + /* Evaluate Tail */ + /* Jump to end of pattern indicated by skip, and then skip + the SUCCESS op code that follows it. */ + ctx->pattern += ctx->pattern[0]; + ctx->ptr = state->ptr; + break; + + case SRE_OP_GROUPREF: + /* match backreference */ + TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[0])); + i = ctx->pattern[0]; + { + Py_ssize_t groupref = i+i; + if (groupref >= state->lastmark) { + RETURN_FAILURE; + } else { + SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; + SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; + if (!p || !e || e < p) + RETURN_FAILURE; + while (p < e) { + if (ctx->ptr >= end || *ctx->ptr != *p) + RETURN_FAILURE; + p++; + ctx->ptr++; + } + } + } + ctx->pattern++; + break; + + case SRE_OP_GROUPREF_IGNORE: + /* match backreference */ + TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[0])); + i = ctx->pattern[0]; + { + Py_ssize_t groupref = i+i; + if (groupref >= state->lastmark) { + RETURN_FAILURE; + } else { + SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; + SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; + if (!p || !e || e < p) + RETURN_FAILURE; + while (p < e) { + if (ctx->ptr >= end || + sre_lower_ascii(*ctx->ptr) != sre_lower_ascii(*p)) + RETURN_FAILURE; + p++; + ctx->ptr++; + } + } + } + ctx->pattern++; + break; + + case SRE_OP_GROUPREF_UNI_IGNORE: + /* match backreference */ + TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[0])); + i = ctx->pattern[0]; + { + Py_ssize_t groupref = i+i; + if (groupref >= state->lastmark) { + RETURN_FAILURE; + } else { + SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; + SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; + if (!p || !e || e < p) + RETURN_FAILURE; + while (p < e) { + if (ctx->ptr >= end || + sre_lower_unicode(*ctx->ptr) != sre_lower_unicode(*p)) + RETURN_FAILURE; + p++; + ctx->ptr++; + } + } + } + ctx->pattern++; + break; + + case SRE_OP_GROUPREF_LOC_IGNORE: + /* match backreference */ + TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[0])); + i = ctx->pattern[0]; + { + Py_ssize_t groupref = i+i; + if (groupref >= state->lastmark) { + RETURN_FAILURE; + } else { + SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; + SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; + if (!p || !e || e < p) + RETURN_FAILURE; + while (p < e) { + if (ctx->ptr >= end || + sre_lower_locale(*ctx->ptr) != sre_lower_locale(*p)) + RETURN_FAILURE; + p++; + ctx->ptr++; + } + } + } + ctx->pattern++; + break; + + case SRE_OP_GROUPREF_EXISTS: + TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[0])); + /* codeyes codeno ... */ + i = ctx->pattern[0]; + { + Py_ssize_t groupref = i+i; + if (groupref >= state->lastmark) { + ctx->pattern += ctx->pattern[1]; + break; + } else { + SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; + SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; + if (!p || !e || e < p) { + ctx->pattern += ctx->pattern[1]; + break; + } + } + } + ctx->pattern += 2; + break; + + case SRE_OP_ASSERT: + /* assert subpattern */ + /* */ + TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[1])); + if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1]) + RETURN_FAILURE; + state->ptr = ctx->ptr - ctx->pattern[1]; + DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2); + RETURN_ON_FAILURE(ret); + ctx->pattern += ctx->pattern[0]; + break; + + case SRE_OP_ASSERT_NOT: + /* assert not subpattern */ + /* */ + TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, + ctx->ptr, ctx->pattern[1])); + if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) { + state->ptr = ctx->ptr - ctx->pattern[1]; + LASTMARK_SAVE(); + if (state->repeat) + MARK_PUSH(ctx->lastmark); + + DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); + if (ret) { + if (state->repeat) + MARK_POP_DISCARD(ctx->lastmark); + RETURN_ON_ERROR(ret); + RETURN_FAILURE; + } + if (state->repeat) + MARK_POP(ctx->lastmark); + LASTMARK_RESTORE(); + } + ctx->pattern += ctx->pattern[0]; + break; + + case SRE_OP_FAILURE: + /* immediate failure */ + TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr)); + RETURN_FAILURE; + + default: + TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr, + ctx->pattern[-1])); + RETURN_ERROR(SRE_ERROR_ILLEGAL); + } + } + +exit: + ctx_pos = ctx->last_ctx_pos; + jump = ctx->jump; + DATA_POP_DISCARD(ctx); + if (ctx_pos == -1) + return ret; + DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); + + switch (jump) { + case JUMP_MAX_UNTIL_2: + TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr)); + goto jump_max_until_2; + case JUMP_MAX_UNTIL_3: + TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr)); + goto jump_max_until_3; + case JUMP_MIN_UNTIL_2: + TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr)); + goto jump_min_until_2; + case JUMP_MIN_UNTIL_3: + TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr)); + goto jump_min_until_3; + case JUMP_BRANCH: + TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr)); + goto jump_branch; + case JUMP_MAX_UNTIL_1: + TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr)); + goto jump_max_until_1; + case JUMP_MIN_UNTIL_1: + TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr)); + goto jump_min_until_1; + case JUMP_POSS_REPEAT_1: + TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", ctx->pattern, ctx->ptr)); + goto jump_poss_repeat_1; + case JUMP_POSS_REPEAT_2: + TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", ctx->pattern, ctx->ptr)); + goto jump_poss_repeat_2; + case JUMP_REPEAT: + TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr)); + goto jump_repeat; + case JUMP_REPEAT_ONE_1: + TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr)); + goto jump_repeat_one_1; + case JUMP_REPEAT_ONE_2: + TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr)); + goto jump_repeat_one_2; + case JUMP_MIN_REPEAT_ONE: + TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr)); + goto jump_min_repeat_one; + case JUMP_ATOMIC_GROUP: + TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", ctx->pattern, ctx->ptr)); + goto jump_atomic_group; + case JUMP_ASSERT: + TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr)); + goto jump_assert; + case JUMP_ASSERT_NOT: + TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr)); + goto jump_assert_not; + case JUMP_NONE: + TRACE(("|%p|%p|RETURN %zd\n", ctx->pattern, + ctx->ptr, ret)); + break; + } + + return ret; /* should never get here */ +} + +/* need to reset capturing groups between two SRE(match) callings in loops */ +#define RESET_CAPTURE_GROUP() \ + do { state->lastmark = state->lastindex = -1; } while (0) + +LOCAL(Py_ssize_t) +SRE(search)(SRE_STATE* state, SRE_CODE* pattern) +{ + SRE_CHAR* ptr = (SRE_CHAR *)state->start; + SRE_CHAR* end = (SRE_CHAR *)state->end; + Py_ssize_t status = 0; + Py_ssize_t prefix_len = 0; + Py_ssize_t prefix_skip = 0; + SRE_CODE* prefix = NULL; + SRE_CODE* charset = NULL; + SRE_CODE* overlap = NULL; + int flags = 0; + + if (ptr > end) + return 0; + + if (pattern[0] == SRE_OP_INFO) { + /* optimization info block */ + /* <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ + + flags = pattern[2]; + + if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) { + TRACE(("reject (got %u chars, need %u)\n", + (unsigned int)(end - ptr), pattern[3])); + return 0; + } + if (pattern[3] > 1) { + /* adjust end point (but make sure we leave at least one + character in there, so literal search will work) */ + end -= pattern[3] - 1; + if (end <= ptr) + end = ptr; + } + + if (flags & SRE_INFO_PREFIX) { + /* pattern starts with a known prefix */ + /* */ + prefix_len = pattern[5]; + prefix_skip = pattern[6]; + prefix = pattern + 7; + overlap = prefix + prefix_len - 1; + } else if (flags & SRE_INFO_CHARSET) + /* pattern starts with a character from a known set */ + /* */ + charset = pattern + 5; + + pattern += 1 + pattern[1]; + } + + TRACE(("prefix = %p %zd %zd\n", + prefix, prefix_len, prefix_skip)); + TRACE(("charset = %p\n", charset)); + + if (prefix_len == 1) { + /* pattern starts with a literal character */ + SRE_CHAR c = (SRE_CHAR) prefix[0]; +#if SIZEOF_SRE_CHAR < 4 + if ((SRE_CODE) c != prefix[0]) + return 0; /* literal can't match: doesn't fit in char width */ +#endif + end = (SRE_CHAR *)state->end; + state->must_advance = 0; + while (ptr < end) { + while (*ptr != c) { + if (++ptr >= end) + return 0; + } + TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); + state->start = ptr; + state->ptr = ptr + prefix_skip; + if (flags & SRE_INFO_LITERAL) + return 1; /* we got all of it */ + status = SRE(match)(state, pattern + 2*prefix_skip, 0); + if (status != 0) + return status; + ++ptr; + RESET_CAPTURE_GROUP(); + } + return 0; + } + + if (prefix_len > 1) { + /* pattern starts with a known prefix. use the overlap + table to skip forward as fast as we possibly can */ + Py_ssize_t i = 0; + + end = (SRE_CHAR *)state->end; + if (prefix_len > end - ptr) + return 0; +#if SIZEOF_SRE_CHAR < 4 + for (i = 0; i < prefix_len; i++) + if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) + return 0; /* literal can't match: doesn't fit in char width */ +#endif + while (ptr < end) { + SRE_CHAR c = (SRE_CHAR) prefix[0]; + while (*ptr++ != c) { + if (ptr >= end) + return 0; + } + if (ptr >= end) + return 0; + + i = 1; + state->must_advance = 0; + do { + if (*ptr == (SRE_CHAR) prefix[i]) { + if (++i != prefix_len) { + if (++ptr >= end) + return 0; + continue; + } + /* found a potential match */ + TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); + state->start = ptr - (prefix_len - 1); + state->ptr = ptr - (prefix_len - prefix_skip - 1); + if (flags & SRE_INFO_LITERAL) + return 1; /* we got all of it */ + status = SRE(match)(state, pattern + 2*prefix_skip, 0); + if (status != 0) + return status; + /* close but no cigar -- try again */ + if (++ptr >= end) + return 0; + RESET_CAPTURE_GROUP(); + } + i = overlap[i]; + } while (i != 0); + } + return 0; + } + + if (charset) { + /* pattern starts with a character from a known set */ + end = (SRE_CHAR *)state->end; + state->must_advance = 0; + for (;;) { + while (ptr < end && !SRE(charset)(state, charset, *ptr)) + ptr++; + if (ptr >= end) + return 0; + TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); + state->start = ptr; + state->ptr = ptr; + status = SRE(match)(state, pattern, 0); + if (status != 0) + break; + ptr++; + RESET_CAPTURE_GROUP(); + } + } else { + /* general case */ + assert(ptr <= end); + TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); + state->start = state->ptr = ptr; + status = SRE(match)(state, pattern, 1); + state->must_advance = 0; + if (status == 0 && pattern[0] == SRE_OP_AT && + (pattern[1] == SRE_AT_BEGINNING || + pattern[1] == SRE_AT_BEGINNING_STRING)) + { + state->start = state->ptr = ptr = end; + return 0; + } + while (status == 0 && ptr < end) { + ptr++; + RESET_CAPTURE_GROUP(); + TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); + state->start = state->ptr = ptr; + status = SRE(match)(state, pattern, 0); + } + } + + return status; +} + +#undef SRE_CHAR +#undef SIZEOF_SRE_CHAR +#undef SRE + +/* vim:ts=4:sw=4:et +*/ diff --git a/Modules/clinic/_sre.c.h b/Modules/clinic/_sre.c.h deleted file mode 100644 index 34cbe21..0000000 --- a/Modules/clinic/_sre.c.h +++ /dev/null @@ -1,926 +0,0 @@ -/*[clinic input] -preserve -[clinic start generated code]*/ - -PyDoc_STRVAR(_sre_getcodesize__doc__, -"getcodesize($module, /)\n" -"--\n" -"\n"); - -#define _SRE_GETCODESIZE_METHODDEF \ - {"getcodesize", (PyCFunction)_sre_getcodesize, METH_NOARGS, _sre_getcodesize__doc__}, - -static int -_sre_getcodesize_impl(PyObject *module); - -static PyObject * -_sre_getcodesize(PyObject *module, PyObject *Py_UNUSED(ignored)) -{ - PyObject *return_value = NULL; - int _return_value; - - _return_value = _sre_getcodesize_impl(module); - if ((_return_value == -1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromLong((long)_return_value); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_ascii_iscased__doc__, -"ascii_iscased($module, character, /)\n" -"--\n" -"\n"); - -#define _SRE_ASCII_ISCASED_METHODDEF \ - {"ascii_iscased", (PyCFunction)_sre_ascii_iscased, METH_O, _sre_ascii_iscased__doc__}, - -static int -_sre_ascii_iscased_impl(PyObject *module, int character); - -static PyObject * -_sre_ascii_iscased(PyObject *module, PyObject *arg) -{ - PyObject *return_value = NULL; - int character; - int _return_value; - - character = _PyLong_AsInt(arg); - if (character == -1 && PyErr_Occurred()) { - goto exit; - } - _return_value = _sre_ascii_iscased_impl(module, character); - if ((_return_value == -1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyBool_FromLong((long)_return_value); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_unicode_iscased__doc__, -"unicode_iscased($module, character, /)\n" -"--\n" -"\n"); - -#define _SRE_UNICODE_ISCASED_METHODDEF \ - {"unicode_iscased", (PyCFunction)_sre_unicode_iscased, METH_O, _sre_unicode_iscased__doc__}, - -static int -_sre_unicode_iscased_impl(PyObject *module, int character); - -static PyObject * -_sre_unicode_iscased(PyObject *module, PyObject *arg) -{ - PyObject *return_value = NULL; - int character; - int _return_value; - - character = _PyLong_AsInt(arg); - if (character == -1 && PyErr_Occurred()) { - goto exit; - } - _return_value = _sre_unicode_iscased_impl(module, character); - if ((_return_value == -1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyBool_FromLong((long)_return_value); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_ascii_tolower__doc__, -"ascii_tolower($module, character, /)\n" -"--\n" -"\n"); - -#define _SRE_ASCII_TOLOWER_METHODDEF \ - {"ascii_tolower", (PyCFunction)_sre_ascii_tolower, METH_O, _sre_ascii_tolower__doc__}, - -static int -_sre_ascii_tolower_impl(PyObject *module, int character); - -static PyObject * -_sre_ascii_tolower(PyObject *module, PyObject *arg) -{ - PyObject *return_value = NULL; - int character; - int _return_value; - - character = _PyLong_AsInt(arg); - if (character == -1 && PyErr_Occurred()) { - goto exit; - } - _return_value = _sre_ascii_tolower_impl(module, character); - if ((_return_value == -1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromLong((long)_return_value); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_unicode_tolower__doc__, -"unicode_tolower($module, character, /)\n" -"--\n" -"\n"); - -#define _SRE_UNICODE_TOLOWER_METHODDEF \ - {"unicode_tolower", (PyCFunction)_sre_unicode_tolower, METH_O, _sre_unicode_tolower__doc__}, - -static int -_sre_unicode_tolower_impl(PyObject *module, int character); - -static PyObject * -_sre_unicode_tolower(PyObject *module, PyObject *arg) -{ - PyObject *return_value = NULL; - int character; - int _return_value; - - character = _PyLong_AsInt(arg); - if (character == -1 && PyErr_Occurred()) { - goto exit; - } - _return_value = _sre_unicode_tolower_impl(module, character); - if ((_return_value == -1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromLong((long)_return_value); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Pattern_match__doc__, -"match($self, /, string, pos=0, endpos=sys.maxsize)\n" -"--\n" -"\n" -"Matches zero or more characters at the beginning of the string."); - -#define _SRE_SRE_PATTERN_MATCH_METHODDEF \ - {"match", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_match, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_match__doc__}, - -static PyObject * -_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos); - -static PyObject * -_sre_SRE_Pattern_match(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; - static _PyArg_Parser _parser = {"O|nn:match", _keywords, 0}; - PyObject *string; - Py_ssize_t pos = 0; - Py_ssize_t endpos = PY_SSIZE_T_MAX; - - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - &string, &pos, &endpos)) { - goto exit; - } - return_value = _sre_SRE_Pattern_match_impl(self, cls, string, pos, endpos); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Pattern_fullmatch__doc__, -"fullmatch($self, /, string, pos=0, endpos=sys.maxsize)\n" -"--\n" -"\n" -"Matches against all of the string."); - -#define _SRE_SRE_PATTERN_FULLMATCH_METHODDEF \ - {"fullmatch", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_fullmatch, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_fullmatch__doc__}, - -static PyObject * -_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos); - -static PyObject * -_sre_SRE_Pattern_fullmatch(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; - static _PyArg_Parser _parser = {"O|nn:fullmatch", _keywords, 0}; - PyObject *string; - Py_ssize_t pos = 0; - Py_ssize_t endpos = PY_SSIZE_T_MAX; - - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - &string, &pos, &endpos)) { - goto exit; - } - return_value = _sre_SRE_Pattern_fullmatch_impl(self, cls, string, pos, endpos); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Pattern_search__doc__, -"search($self, /, string, pos=0, endpos=sys.maxsize)\n" -"--\n" -"\n" -"Scan through string looking for a match, and return a corresponding match object instance.\n" -"\n" -"Return None if no position in the string matches."); - -#define _SRE_SRE_PATTERN_SEARCH_METHODDEF \ - {"search", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_search, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_search__doc__}, - -static PyObject * -_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos); - -static PyObject * -_sre_SRE_Pattern_search(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; - static _PyArg_Parser _parser = {"O|nn:search", _keywords, 0}; - PyObject *string; - Py_ssize_t pos = 0; - Py_ssize_t endpos = PY_SSIZE_T_MAX; - - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - &string, &pos, &endpos)) { - goto exit; - } - return_value = _sre_SRE_Pattern_search_impl(self, cls, string, pos, endpos); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Pattern_findall__doc__, -"findall($self, /, string, pos=0, endpos=sys.maxsize)\n" -"--\n" -"\n" -"Return a list of all non-overlapping matches of pattern in string."); - -#define _SRE_SRE_PATTERN_FINDALL_METHODDEF \ - {"findall", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_findall, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_findall__doc__}, - -static PyObject * -_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, - Py_ssize_t pos, Py_ssize_t endpos); - -static PyObject * -_sre_SRE_Pattern_findall(PatternObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; - static _PyArg_Parser _parser = {NULL, _keywords, "findall", 0}; - PyObject *argsbuf[3]; - Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; - PyObject *string; - Py_ssize_t pos = 0; - Py_ssize_t endpos = PY_SSIZE_T_MAX; - - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); - if (!args) { - goto exit; - } - string = args[0]; - if (!noptargs) { - goto skip_optional_pos; - } - if (args[1]) { - { - Py_ssize_t ival = -1; - PyObject *iobj = _PyNumber_Index(args[1]); - if (iobj != NULL) { - ival = PyLong_AsSsize_t(iobj); - Py_DECREF(iobj); - } - if (ival == -1 && PyErr_Occurred()) { - goto exit; - } - pos = ival; - } - if (!--noptargs) { - goto skip_optional_pos; - } - } - { - Py_ssize_t ival = -1; - PyObject *iobj = _PyNumber_Index(args[2]); - if (iobj != NULL) { - ival = PyLong_AsSsize_t(iobj); - Py_DECREF(iobj); - } - if (ival == -1 && PyErr_Occurred()) { - goto exit; - } - endpos = ival; - } -skip_optional_pos: - return_value = _sre_SRE_Pattern_findall_impl(self, string, pos, endpos); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Pattern_finditer__doc__, -"finditer($self, /, string, pos=0, endpos=sys.maxsize)\n" -"--\n" -"\n" -"Return an iterator over all non-overlapping matches for the RE pattern in string.\n" -"\n" -"For each match, the iterator returns a match object."); - -#define _SRE_SRE_PATTERN_FINDITER_METHODDEF \ - {"finditer", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_finditer, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_finditer__doc__}, - -static PyObject * -_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos); - -static PyObject * -_sre_SRE_Pattern_finditer(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; - static _PyArg_Parser _parser = {"O|nn:finditer", _keywords, 0}; - PyObject *string; - Py_ssize_t pos = 0; - Py_ssize_t endpos = PY_SSIZE_T_MAX; - - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - &string, &pos, &endpos)) { - goto exit; - } - return_value = _sre_SRE_Pattern_finditer_impl(self, cls, string, pos, endpos); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Pattern_scanner__doc__, -"scanner($self, /, string, pos=0, endpos=sys.maxsize)\n" -"--\n" -"\n"); - -#define _SRE_SRE_PATTERN_SCANNER_METHODDEF \ - {"scanner", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_scanner, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_scanner__doc__}, - -static PyObject * -_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos); - -static PyObject * -_sre_SRE_Pattern_scanner(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; - static _PyArg_Parser _parser = {"O|nn:scanner", _keywords, 0}; - PyObject *string; - Py_ssize_t pos = 0; - Py_ssize_t endpos = PY_SSIZE_T_MAX; - - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - &string, &pos, &endpos)) { - goto exit; - } - return_value = _sre_SRE_Pattern_scanner_impl(self, cls, string, pos, endpos); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Pattern_split__doc__, -"split($self, /, string, maxsplit=0)\n" -"--\n" -"\n" -"Split string by the occurrences of pattern."); - -#define _SRE_SRE_PATTERN_SPLIT_METHODDEF \ - {"split", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_split, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_split__doc__}, - -static PyObject * -_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, - Py_ssize_t maxsplit); - -static PyObject * -_sre_SRE_Pattern_split(PatternObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = {"string", "maxsplit", NULL}; - static _PyArg_Parser _parser = {NULL, _keywords, "split", 0}; - PyObject *argsbuf[2]; - Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; - PyObject *string; - Py_ssize_t maxsplit = 0; - - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf); - if (!args) { - goto exit; - } - string = args[0]; - if (!noptargs) { - goto skip_optional_pos; - } - { - Py_ssize_t ival = -1; - PyObject *iobj = _PyNumber_Index(args[1]); - if (iobj != NULL) { - ival = PyLong_AsSsize_t(iobj); - Py_DECREF(iobj); - } - if (ival == -1 && PyErr_Occurred()) { - goto exit; - } - maxsplit = ival; - } -skip_optional_pos: - return_value = _sre_SRE_Pattern_split_impl(self, string, maxsplit); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Pattern_sub__doc__, -"sub($self, /, repl, string, count=0)\n" -"--\n" -"\n" -"Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl."); - -#define _SRE_SRE_PATTERN_SUB_METHODDEF \ - {"sub", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_sub, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_sub__doc__}, - -static PyObject * -_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls, - PyObject *repl, PyObject *string, Py_ssize_t count); - -static PyObject * -_sre_SRE_Pattern_sub(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = {"repl", "string", "count", NULL}; - static _PyArg_Parser _parser = {"OO|n:sub", _keywords, 0}; - PyObject *repl; - PyObject *string; - Py_ssize_t count = 0; - - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - &repl, &string, &count)) { - goto exit; - } - return_value = _sre_SRE_Pattern_sub_impl(self, cls, repl, string, count); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Pattern_subn__doc__, -"subn($self, /, repl, string, count=0)\n" -"--\n" -"\n" -"Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl."); - -#define _SRE_SRE_PATTERN_SUBN_METHODDEF \ - {"subn", (PyCFunction)(void(*)(void))_sre_SRE_Pattern_subn, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_subn__doc__}, - -static PyObject * -_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls, - PyObject *repl, PyObject *string, - Py_ssize_t count); - -static PyObject * -_sre_SRE_Pattern_subn(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = {"repl", "string", "count", NULL}; - static _PyArg_Parser _parser = {"OO|n:subn", _keywords, 0}; - PyObject *repl; - PyObject *string; - Py_ssize_t count = 0; - - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - &repl, &string, &count)) { - goto exit; - } - return_value = _sre_SRE_Pattern_subn_impl(self, cls, repl, string, count); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Pattern___copy____doc__, -"__copy__($self, /)\n" -"--\n" -"\n"); - -#define _SRE_SRE_PATTERN___COPY___METHODDEF \ - {"__copy__", (PyCFunction)_sre_SRE_Pattern___copy__, METH_NOARGS, _sre_SRE_Pattern___copy____doc__}, - -static PyObject * -_sre_SRE_Pattern___copy___impl(PatternObject *self); - -static PyObject * -_sre_SRE_Pattern___copy__(PatternObject *self, PyObject *Py_UNUSED(ignored)) -{ - return _sre_SRE_Pattern___copy___impl(self); -} - -PyDoc_STRVAR(_sre_SRE_Pattern___deepcopy____doc__, -"__deepcopy__($self, memo, /)\n" -"--\n" -"\n"); - -#define _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF \ - {"__deepcopy__", (PyCFunction)_sre_SRE_Pattern___deepcopy__, METH_O, _sre_SRE_Pattern___deepcopy____doc__}, - -PyDoc_STRVAR(_sre_compile__doc__, -"compile($module, /, pattern, flags, code, groups, groupindex,\n" -" indexgroup, repeat_count)\n" -"--\n" -"\n"); - -#define _SRE_COMPILE_METHODDEF \ - {"compile", (PyCFunction)(void(*)(void))_sre_compile, METH_FASTCALL|METH_KEYWORDS, _sre_compile__doc__}, - -static PyObject * -_sre_compile_impl(PyObject *module, PyObject *pattern, int flags, - PyObject *code, Py_ssize_t groups, PyObject *groupindex, - PyObject *indexgroup, Py_ssize_t repeat_count); - -static PyObject * -_sre_compile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = {"pattern", "flags", "code", "groups", "groupindex", "indexgroup", "repeat_count", NULL}; - static _PyArg_Parser _parser = {NULL, _keywords, "compile", 0}; - PyObject *argsbuf[7]; - PyObject *pattern; - int flags; - PyObject *code; - Py_ssize_t groups; - PyObject *groupindex; - PyObject *indexgroup; - Py_ssize_t repeat_count; - - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 7, 7, 0, argsbuf); - if (!args) { - goto exit; - } - pattern = args[0]; - flags = _PyLong_AsInt(args[1]); - if (flags == -1 && PyErr_Occurred()) { - goto exit; - } - if (!PyList_Check(args[2])) { - _PyArg_BadArgument("compile", "argument 'code'", "list", args[2]); - goto exit; - } - code = args[2]; - { - Py_ssize_t ival = -1; - PyObject *iobj = _PyNumber_Index(args[3]); - if (iobj != NULL) { - ival = PyLong_AsSsize_t(iobj); - Py_DECREF(iobj); - } - if (ival == -1 && PyErr_Occurred()) { - goto exit; - } - groups = ival; - } - if (!PyDict_Check(args[4])) { - _PyArg_BadArgument("compile", "argument 'groupindex'", "dict", args[4]); - goto exit; - } - groupindex = args[4]; - if (!PyTuple_Check(args[5])) { - _PyArg_BadArgument("compile", "argument 'indexgroup'", "tuple", args[5]); - goto exit; - } - indexgroup = args[5]; - { - Py_ssize_t ival = -1; - PyObject *iobj = _PyNumber_Index(args[6]); - if (iobj != NULL) { - ival = PyLong_AsSsize_t(iobj); - Py_DECREF(iobj); - } - if (ival == -1 && PyErr_Occurred()) { - goto exit; - } - repeat_count = ival; - } - return_value = _sre_compile_impl(module, pattern, flags, code, groups, groupindex, indexgroup, repeat_count); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Match_expand__doc__, -"expand($self, /, template)\n" -"--\n" -"\n" -"Return the string obtained by doing backslash substitution on the string template, as done by the sub() method."); - -#define _SRE_SRE_MATCH_EXPAND_METHODDEF \ - {"expand", (PyCFunction)(void(*)(void))_sre_SRE_Match_expand, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Match_expand__doc__}, - -static PyObject * -_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template); - -static PyObject * -_sre_SRE_Match_expand(MatchObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = {"template", NULL}; - static _PyArg_Parser _parser = {NULL, _keywords, "expand", 0}; - PyObject *argsbuf[1]; - PyObject *template; - - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); - if (!args) { - goto exit; - } - template = args[0]; - return_value = _sre_SRE_Match_expand_impl(self, template); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Match_groups__doc__, -"groups($self, /, default=None)\n" -"--\n" -"\n" -"Return a tuple containing all the subgroups of the match, from 1.\n" -"\n" -" default\n" -" Is used for groups that did not participate in the match."); - -#define _SRE_SRE_MATCH_GROUPS_METHODDEF \ - {"groups", (PyCFunction)(void(*)(void))_sre_SRE_Match_groups, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Match_groups__doc__}, - -static PyObject * -_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value); - -static PyObject * -_sre_SRE_Match_groups(MatchObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = {"default", NULL}; - static _PyArg_Parser _parser = {NULL, _keywords, "groups", 0}; - PyObject *argsbuf[1]; - Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *default_value = Py_None; - - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 1, 0, argsbuf); - if (!args) { - goto exit; - } - if (!noptargs) { - goto skip_optional_pos; - } - default_value = args[0]; -skip_optional_pos: - return_value = _sre_SRE_Match_groups_impl(self, default_value); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Match_groupdict__doc__, -"groupdict($self, /, default=None)\n" -"--\n" -"\n" -"Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.\n" -"\n" -" default\n" -" Is used for groups that did not participate in the match."); - -#define _SRE_SRE_MATCH_GROUPDICT_METHODDEF \ - {"groupdict", (PyCFunction)(void(*)(void))_sre_SRE_Match_groupdict, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Match_groupdict__doc__}, - -static PyObject * -_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value); - -static PyObject * -_sre_SRE_Match_groupdict(MatchObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = {"default", NULL}; - static _PyArg_Parser _parser = {NULL, _keywords, "groupdict", 0}; - PyObject *argsbuf[1]; - Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *default_value = Py_None; - - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 1, 0, argsbuf); - if (!args) { - goto exit; - } - if (!noptargs) { - goto skip_optional_pos; - } - default_value = args[0]; -skip_optional_pos: - return_value = _sre_SRE_Match_groupdict_impl(self, default_value); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Match_start__doc__, -"start($self, group=0, /)\n" -"--\n" -"\n" -"Return index of the start of the substring matched by group."); - -#define _SRE_SRE_MATCH_START_METHODDEF \ - {"start", (PyCFunction)(void(*)(void))_sre_SRE_Match_start, METH_FASTCALL, _sre_SRE_Match_start__doc__}, - -static Py_ssize_t -_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group); - -static PyObject * -_sre_SRE_Match_start(MatchObject *self, PyObject *const *args, Py_ssize_t nargs) -{ - PyObject *return_value = NULL; - PyObject *group = NULL; - Py_ssize_t _return_value; - - if (!_PyArg_CheckPositional("start", nargs, 0, 1)) { - goto exit; - } - if (nargs < 1) { - goto skip_optional; - } - group = args[0]; -skip_optional: - _return_value = _sre_SRE_Match_start_impl(self, group); - if ((_return_value == -1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromSsize_t(_return_value); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Match_end__doc__, -"end($self, group=0, /)\n" -"--\n" -"\n" -"Return index of the end of the substring matched by group."); - -#define _SRE_SRE_MATCH_END_METHODDEF \ - {"end", (PyCFunction)(void(*)(void))_sre_SRE_Match_end, METH_FASTCALL, _sre_SRE_Match_end__doc__}, - -static Py_ssize_t -_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group); - -static PyObject * -_sre_SRE_Match_end(MatchObject *self, PyObject *const *args, Py_ssize_t nargs) -{ - PyObject *return_value = NULL; - PyObject *group = NULL; - Py_ssize_t _return_value; - - if (!_PyArg_CheckPositional("end", nargs, 0, 1)) { - goto exit; - } - if (nargs < 1) { - goto skip_optional; - } - group = args[0]; -skip_optional: - _return_value = _sre_SRE_Match_end_impl(self, group); - if ((_return_value == -1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromSsize_t(_return_value); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Match_span__doc__, -"span($self, group=0, /)\n" -"--\n" -"\n" -"For match object m, return the 2-tuple (m.start(group), m.end(group))."); - -#define _SRE_SRE_MATCH_SPAN_METHODDEF \ - {"span", (PyCFunction)(void(*)(void))_sre_SRE_Match_span, METH_FASTCALL, _sre_SRE_Match_span__doc__}, - -static PyObject * -_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group); - -static PyObject * -_sre_SRE_Match_span(MatchObject *self, PyObject *const *args, Py_ssize_t nargs) -{ - PyObject *return_value = NULL; - PyObject *group = NULL; - - if (!_PyArg_CheckPositional("span", nargs, 0, 1)) { - goto exit; - } - if (nargs < 1) { - goto skip_optional; - } - group = args[0]; -skip_optional: - return_value = _sre_SRE_Match_span_impl(self, group); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Match___copy____doc__, -"__copy__($self, /)\n" -"--\n" -"\n"); - -#define _SRE_SRE_MATCH___COPY___METHODDEF \ - {"__copy__", (PyCFunction)_sre_SRE_Match___copy__, METH_NOARGS, _sre_SRE_Match___copy____doc__}, - -static PyObject * -_sre_SRE_Match___copy___impl(MatchObject *self); - -static PyObject * -_sre_SRE_Match___copy__(MatchObject *self, PyObject *Py_UNUSED(ignored)) -{ - return _sre_SRE_Match___copy___impl(self); -} - -PyDoc_STRVAR(_sre_SRE_Match___deepcopy____doc__, -"__deepcopy__($self, memo, /)\n" -"--\n" -"\n"); - -#define _SRE_SRE_MATCH___DEEPCOPY___METHODDEF \ - {"__deepcopy__", (PyCFunction)_sre_SRE_Match___deepcopy__, METH_O, _sre_SRE_Match___deepcopy____doc__}, - -PyDoc_STRVAR(_sre_SRE_Scanner_match__doc__, -"match($self, /)\n" -"--\n" -"\n"); - -#define _SRE_SRE_SCANNER_MATCH_METHODDEF \ - {"match", (PyCFunction)(void(*)(void))_sre_SRE_Scanner_match, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Scanner_match__doc__}, - -static PyObject * -_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls); - -static PyObject * -_sre_SRE_Scanner_match(ScannerObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = { NULL}; - static _PyArg_Parser _parser = {":match", _keywords, 0}; - - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser - )) { - goto exit; - } - return_value = _sre_SRE_Scanner_match_impl(self, cls); - -exit: - return return_value; -} - -PyDoc_STRVAR(_sre_SRE_Scanner_search__doc__, -"search($self, /)\n" -"--\n" -"\n"); - -#define _SRE_SRE_SCANNER_SEARCH_METHODDEF \ - {"search", (PyCFunction)(void(*)(void))_sre_SRE_Scanner_search, METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Scanner_search__doc__}, - -static PyObject * -_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls); - -static PyObject * -_sre_SRE_Scanner_search(ScannerObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - static const char * const _keywords[] = { NULL}; - static _PyArg_Parser _parser = {":search", _keywords, 0}; - - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser - )) { - goto exit; - } - return_value = _sre_SRE_Scanner_search_impl(self, cls); - -exit: - return return_value; -} -/*[clinic end generated code: output=9d7510a57a157a38 input=a9049054013a1b77]*/ diff --git a/Modules/sre.h b/Modules/sre.h deleted file mode 100644 index e2c5277..0000000 --- a/Modules/sre.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Secret Labs' Regular Expression Engine - * - * regular expression matching engine - * - * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. - * - * See the _sre.c file for information on usage and redistribution. - */ - -#ifndef SRE_INCLUDED -#define SRE_INCLUDED - -#include "sre_constants.h" - -/* size of a code word (must be unsigned short or larger, and - large enough to hold a UCS4 character) */ -#define SRE_CODE Py_UCS4 -#if SIZEOF_SIZE_T > 4 -# define SRE_MAXREPEAT (~(SRE_CODE)0) -# define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2) -#else -# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX) -# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2) -#endif - -typedef struct { - PyObject_VAR_HEAD - Py_ssize_t groups; /* must be first! */ - PyObject* groupindex; /* dict */ - PyObject* indexgroup; /* tuple */ - /* the number of REPEATs */ - Py_ssize_t repeat_count; - /* compatibility */ - PyObject* pattern; /* pattern source (or None) */ - int flags; /* flags used when compiling pattern source */ - PyObject *weakreflist; /* List of weak references */ - int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */ - /* pattern code */ - Py_ssize_t codesize; - SRE_CODE code[1]; -} PatternObject; - -#define PatternObject_GetCode(o) (((PatternObject*)(o))->code) - -typedef struct { - PyObject_VAR_HEAD - PyObject* string; /* link to the target string (must be first) */ - PyObject* regs; /* cached list of matching spans */ - PatternObject* pattern; /* link to the regex (pattern) object */ - Py_ssize_t pos, endpos; /* current target slice */ - Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */ - Py_ssize_t groups; /* number of groups (start/end marks) */ - Py_ssize_t mark[1]; -} MatchObject; - -typedef struct SRE_REPEAT_T { - Py_ssize_t count; - const SRE_CODE* pattern; /* points to REPEAT operator arguments */ - const void* last_ptr; /* helper to check for infinite loops */ - struct SRE_REPEAT_T *prev; /* points to previous repeat context */ -} SRE_REPEAT; - -typedef struct { - /* string pointers */ - const void* ptr; /* current position (also end of current slice) */ - const void* beginning; /* start of original string */ - const void* start; /* start of current slice */ - const void* end; /* end of original string */ - /* attributes for the match object */ - PyObject* string; - Py_buffer buffer; - Py_ssize_t pos, endpos; - int isbytes; - int charsize; /* character size */ - /* registers */ - Py_ssize_t lastindex; - Py_ssize_t lastmark; - const void** mark; - int match_all; - int must_advance; - /* dynamically allocated stuff */ - char* data_stack; - size_t data_stack_size; - size_t data_stack_base; - /* current repeat context */ - SRE_REPEAT *repeat; - /* repeat contexts array */ - SRE_REPEAT *repeats_array; -} SRE_STATE; - -typedef struct { - PyObject_HEAD - PyObject* pattern; - SRE_STATE state; - int executing; -} ScannerObject; - -#endif diff --git a/Modules/sre_constants.h b/Modules/sre_constants.h deleted file mode 100644 index 8b24949..0000000 --- a/Modules/sre_constants.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Secret Labs' Regular Expression Engine - * - * regular expression matching engine - * - * NOTE: This file is generated by Lib/re/_constants.py. If you need - * to change anything in here, edit Lib/re/_constants.py and run it. - * - * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. - * - * See the _sre.c file for information on usage and redistribution. - */ - -#define SRE_MAGIC 20220402 -#define SRE_OP_FAILURE 0 -#define SRE_OP_SUCCESS 1 -#define SRE_OP_ANY 2 -#define SRE_OP_ANY_ALL 3 -#define SRE_OP_ASSERT 4 -#define SRE_OP_ASSERT_NOT 5 -#define SRE_OP_AT 6 -#define SRE_OP_BRANCH 7 -#define SRE_OP_CALL 8 -#define SRE_OP_CATEGORY 9 -#define SRE_OP_CHARSET 10 -#define SRE_OP_BIGCHARSET 11 -#define SRE_OP_GROUPREF 12 -#define SRE_OP_GROUPREF_EXISTS 13 -#define SRE_OP_IN 14 -#define SRE_OP_INFO 15 -#define SRE_OP_JUMP 16 -#define SRE_OP_LITERAL 17 -#define SRE_OP_MARK 18 -#define SRE_OP_MAX_UNTIL 19 -#define SRE_OP_MIN_UNTIL 20 -#define SRE_OP_NOT_LITERAL 21 -#define SRE_OP_NEGATE 22 -#define SRE_OP_RANGE 23 -#define SRE_OP_REPEAT 24 -#define SRE_OP_REPEAT_ONE 25 -#define SRE_OP_SUBPATTERN 26 -#define SRE_OP_MIN_REPEAT_ONE 27 -#define SRE_OP_ATOMIC_GROUP 28 -#define SRE_OP_POSSESSIVE_REPEAT 29 -#define SRE_OP_POSSESSIVE_REPEAT_ONE 30 -#define SRE_OP_GROUPREF_IGNORE 31 -#define SRE_OP_IN_IGNORE 32 -#define SRE_OP_LITERAL_IGNORE 33 -#define SRE_OP_NOT_LITERAL_IGNORE 34 -#define SRE_OP_GROUPREF_LOC_IGNORE 35 -#define SRE_OP_IN_LOC_IGNORE 36 -#define SRE_OP_LITERAL_LOC_IGNORE 37 -#define SRE_OP_NOT_LITERAL_LOC_IGNORE 38 -#define SRE_OP_GROUPREF_UNI_IGNORE 39 -#define SRE_OP_IN_UNI_IGNORE 40 -#define SRE_OP_LITERAL_UNI_IGNORE 41 -#define SRE_OP_NOT_LITERAL_UNI_IGNORE 42 -#define SRE_OP_RANGE_UNI_IGNORE 43 -#define SRE_AT_BEGINNING 0 -#define SRE_AT_BEGINNING_LINE 1 -#define SRE_AT_BEGINNING_STRING 2 -#define SRE_AT_BOUNDARY 3 -#define SRE_AT_NON_BOUNDARY 4 -#define SRE_AT_END 5 -#define SRE_AT_END_LINE 6 -#define SRE_AT_END_STRING 7 -#define SRE_AT_LOC_BOUNDARY 8 -#define SRE_AT_LOC_NON_BOUNDARY 9 -#define SRE_AT_UNI_BOUNDARY 10 -#define SRE_AT_UNI_NON_BOUNDARY 11 -#define SRE_CATEGORY_DIGIT 0 -#define SRE_CATEGORY_NOT_DIGIT 1 -#define SRE_CATEGORY_SPACE 2 -#define SRE_CATEGORY_NOT_SPACE 3 -#define SRE_CATEGORY_WORD 4 -#define SRE_CATEGORY_NOT_WORD 5 -#define SRE_CATEGORY_LINEBREAK 6 -#define SRE_CATEGORY_NOT_LINEBREAK 7 -#define SRE_CATEGORY_LOC_WORD 8 -#define SRE_CATEGORY_LOC_NOT_WORD 9 -#define SRE_CATEGORY_UNI_DIGIT 10 -#define SRE_CATEGORY_UNI_NOT_DIGIT 11 -#define SRE_CATEGORY_UNI_SPACE 12 -#define SRE_CATEGORY_UNI_NOT_SPACE 13 -#define SRE_CATEGORY_UNI_WORD 14 -#define SRE_CATEGORY_UNI_NOT_WORD 15 -#define SRE_CATEGORY_UNI_LINEBREAK 16 -#define SRE_CATEGORY_UNI_NOT_LINEBREAK 17 -#define SRE_FLAG_TEMPLATE 1 -#define SRE_FLAG_IGNORECASE 2 -#define SRE_FLAG_LOCALE 4 -#define SRE_FLAG_MULTILINE 8 -#define SRE_FLAG_DOTALL 16 -#define SRE_FLAG_UNICODE 32 -#define SRE_FLAG_VERBOSE 64 -#define SRE_FLAG_DEBUG 128 -#define SRE_FLAG_ASCII 256 -#define SRE_INFO_PREFIX 1 -#define SRE_INFO_LITERAL 2 -#define SRE_INFO_CHARSET 4 diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h deleted file mode 100644 index 1cc926d..0000000 --- a/Modules/sre_lib.h +++ /dev/null @@ -1,1759 +0,0 @@ -/* - * Secret Labs' Regular Expression Engine - * - * regular expression matching engine - * - * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. - * - * See the _sre.c file for information on usage and redistribution. - */ - -/* String matching engine */ - -/* This file is included three times, with different character settings */ - -LOCAL(int) -SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at) -{ - /* check if pointer is at given position */ - - Py_ssize_t thisp, thatp; - - switch (at) { - - case SRE_AT_BEGINNING: - case SRE_AT_BEGINNING_STRING: - return ((void*) ptr == state->beginning); - - case SRE_AT_BEGINNING_LINE: - return ((void*) ptr == state->beginning || - SRE_IS_LINEBREAK((int) ptr[-1])); - - case SRE_AT_END: - return (((SRE_CHAR *)state->end - ptr == 1 && - SRE_IS_LINEBREAK((int) ptr[0])) || - ((void*) ptr == state->end)); - - case SRE_AT_END_LINE: - return ((void*) ptr == state->end || - SRE_IS_LINEBREAK((int) ptr[0])); - - case SRE_AT_END_STRING: - return ((void*) ptr == state->end); - - case SRE_AT_BOUNDARY: - if (state->beginning == state->end) - return 0; - thatp = ((void*) ptr > state->beginning) ? - SRE_IS_WORD((int) ptr[-1]) : 0; - thisp = ((void*) ptr < state->end) ? - SRE_IS_WORD((int) ptr[0]) : 0; - return thisp != thatp; - - case SRE_AT_NON_BOUNDARY: - if (state->beginning == state->end) - return 0; - thatp = ((void*) ptr > state->beginning) ? - SRE_IS_WORD((int) ptr[-1]) : 0; - thisp = ((void*) ptr < state->end) ? - SRE_IS_WORD((int) ptr[0]) : 0; - return thisp == thatp; - - case SRE_AT_LOC_BOUNDARY: - if (state->beginning == state->end) - return 0; - thatp = ((void*) ptr > state->beginning) ? - SRE_LOC_IS_WORD((int) ptr[-1]) : 0; - thisp = ((void*) ptr < state->end) ? - SRE_LOC_IS_WORD((int) ptr[0]) : 0; - return thisp != thatp; - - case SRE_AT_LOC_NON_BOUNDARY: - if (state->beginning == state->end) - return 0; - thatp = ((void*) ptr > state->beginning) ? - SRE_LOC_IS_WORD((int) ptr[-1]) : 0; - thisp = ((void*) ptr < state->end) ? - SRE_LOC_IS_WORD((int) ptr[0]) : 0; - return thisp == thatp; - - case SRE_AT_UNI_BOUNDARY: - if (state->beginning == state->end) - return 0; - thatp = ((void*) ptr > state->beginning) ? - SRE_UNI_IS_WORD((int) ptr[-1]) : 0; - thisp = ((void*) ptr < state->end) ? - SRE_UNI_IS_WORD((int) ptr[0]) : 0; - return thisp != thatp; - - case SRE_AT_UNI_NON_BOUNDARY: - if (state->beginning == state->end) - return 0; - thatp = ((void*) ptr > state->beginning) ? - SRE_UNI_IS_WORD((int) ptr[-1]) : 0; - thisp = ((void*) ptr < state->end) ? - SRE_UNI_IS_WORD((int) ptr[0]) : 0; - return thisp == thatp; - - } - - return 0; -} - -LOCAL(int) -SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) -{ - /* check if character is a member of the given set */ - - int ok = 1; - - for (;;) { - switch (*set++) { - - case SRE_OP_FAILURE: - return !ok; - - case SRE_OP_LITERAL: - /* */ - if (ch == set[0]) - return ok; - set++; - break; - - case SRE_OP_CATEGORY: - /* */ - if (sre_category(set[0], (int) ch)) - return ok; - set++; - break; - - case SRE_OP_CHARSET: - /* */ - if (ch < 256 && - (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) - return ok; - set += 256/SRE_CODE_BITS; - break; - - case SRE_OP_RANGE: - /* */ - if (set[0] <= ch && ch <= set[1]) - return ok; - set += 2; - break; - - case SRE_OP_RANGE_UNI_IGNORE: - /* */ - { - SRE_CODE uch; - /* ch is already lower cased */ - if (set[0] <= ch && ch <= set[1]) - return ok; - uch = sre_upper_unicode(ch); - if (set[0] <= uch && uch <= set[1]) - return ok; - set += 2; - break; - } - - case SRE_OP_NEGATE: - ok = !ok; - break; - - case SRE_OP_BIGCHARSET: - /* <256 blockindices> */ - { - Py_ssize_t count, block; - count = *(set++); - - if (ch < 0x10000u) - block = ((unsigned char*)set)[ch >> 8]; - else - block = -1; - set += 256/sizeof(SRE_CODE); - if (block >=0 && - (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & - (1u << (ch & (SRE_CODE_BITS-1))))) - return ok; - set += count * (256/SRE_CODE_BITS); - break; - } - - default: - /* internal error -- there's not much we can do about it - here, so let's just pretend it didn't match... */ - return 0; - } - } -} - -LOCAL(int) -SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) -{ - SRE_CODE lo, up; - lo = sre_lower_locale(ch); - if (SRE(charset)(state, set, lo)) - return 1; - - up = sre_upper_locale(ch); - return up != lo && SRE(charset)(state, set, up); -} - -LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel); - -LOCAL(Py_ssize_t) -SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount) -{ - SRE_CODE chr; - SRE_CHAR c; - const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr; - const SRE_CHAR* end = (const SRE_CHAR *)state->end; - Py_ssize_t i; - - /* adjust end */ - if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) - end = ptr + maxcount; - - switch (pattern[0]) { - - case SRE_OP_IN: - /* repeated set */ - TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); - while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) - ptr++; - break; - - case SRE_OP_ANY: - /* repeated dot wildcard. */ - TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); - while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) - ptr++; - break; - - case SRE_OP_ANY_ALL: - /* repeated dot wildcard. skip to the end of the target - string, and backtrack from there */ - TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); - ptr = end; - break; - - case SRE_OP_LITERAL: - /* repeated literal */ - chr = pattern[1]; - TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); - c = (SRE_CHAR) chr; -#if SIZEOF_SRE_CHAR < 4 - if ((SRE_CODE) c != chr) - ; /* literal can't match: doesn't fit in char width */ - else -#endif - while (ptr < end && *ptr == c) - ptr++; - break; - - case SRE_OP_LITERAL_IGNORE: - /* repeated literal */ - chr = pattern[1]; - TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); - while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) - ptr++; - break; - - case SRE_OP_LITERAL_UNI_IGNORE: - /* repeated literal */ - chr = pattern[1]; - TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); - while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) - ptr++; - break; - - case SRE_OP_LITERAL_LOC_IGNORE: - /* repeated literal */ - chr = pattern[1]; - TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); - while (ptr < end && char_loc_ignore(chr, *ptr)) - ptr++; - break; - - case SRE_OP_NOT_LITERAL: - /* repeated non-literal */ - chr = pattern[1]; - TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); - c = (SRE_CHAR) chr; -#if SIZEOF_SRE_CHAR < 4 - if ((SRE_CODE) c != chr) - ptr = end; /* literal can't match: doesn't fit in char width */ - else -#endif - while (ptr < end && *ptr != c) - ptr++; - break; - - case SRE_OP_NOT_LITERAL_IGNORE: - /* repeated non-literal */ - chr = pattern[1]; - TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); - while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) - ptr++; - break; - - case SRE_OP_NOT_LITERAL_UNI_IGNORE: - /* repeated non-literal */ - chr = pattern[1]; - TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); - while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) - ptr++; - break; - - case SRE_OP_NOT_LITERAL_LOC_IGNORE: - /* repeated non-literal */ - chr = pattern[1]; - TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); - while (ptr < end && !char_loc_ignore(chr, *ptr)) - ptr++; - break; - - default: - /* repeated single character pattern */ - TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); - while ((SRE_CHAR*) state->ptr < end) { - i = SRE(match)(state, pattern, 0); - if (i < 0) - return i; - if (!i) - break; - } - TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, - (SRE_CHAR*) state->ptr - ptr)); - return (SRE_CHAR*) state->ptr - ptr; - } - - TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, - ptr - (SRE_CHAR*) state->ptr)); - return ptr - (SRE_CHAR*) state->ptr; -} - -/* The macros below should be used to protect recursive SRE(match)() - * calls that *failed* and do *not* return immediately (IOW, those - * that will backtrack). Explaining: - * - * - Recursive SRE(match)() returned true: that's usually a success - * (besides atypical cases like ASSERT_NOT), therefore there's no - * reason to restore lastmark; - * - * - Recursive SRE(match)() returned false but the current SRE(match)() - * is returning to the caller: If the current SRE(match)() is the - * top function of the recursion, returning false will be a matching - * failure, and it doesn't matter where lastmark is pointing to. - * If it's *not* the top function, it will be a recursive SRE(match)() - * failure by itself, and the calling SRE(match)() will have to deal - * with the failure by the same rules explained here (it will restore - * lastmark by itself if necessary); - * - * - Recursive SRE(match)() returned false, and will continue the - * outside 'for' loop: must be protected when breaking, since the next - * OP could potentially depend on lastmark; - * - * - Recursive SRE(match)() returned false, and will be called again - * inside a local for/while loop: must be protected between each - * loop iteration, since the recursive SRE(match)() could do anything, - * and could potentially depend on lastmark. - * - * For more information, check the discussion at SF patch #712900. - */ -#define LASTMARK_SAVE() \ - do { \ - ctx->lastmark = state->lastmark; \ - ctx->lastindex = state->lastindex; \ - } while (0) -#define LASTMARK_RESTORE() \ - do { \ - state->lastmark = ctx->lastmark; \ - state->lastindex = ctx->lastindex; \ - } while (0) - -#define RETURN_ERROR(i) do { return i; } while(0) -#define RETURN_FAILURE do { ret = 0; goto exit; } while(0) -#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0) - -#define RETURN_ON_ERROR(i) \ - do { if (i < 0) RETURN_ERROR(i); } while (0) -#define RETURN_ON_SUCCESS(i) \ - do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0) -#define RETURN_ON_FAILURE(i) \ - do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0) - -#define DATA_STACK_ALLOC(state, type, ptr) \ -do { \ - alloc_pos = state->data_stack_base; \ - TRACE(("allocating %s in %zd (%zd)\n", \ - Py_STRINGIFY(type), alloc_pos, sizeof(type))); \ - if (sizeof(type) > state->data_stack_size - alloc_pos) { \ - int j = data_stack_grow(state, sizeof(type)); \ - if (j < 0) return j; \ - if (ctx_pos != -1) \ - DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ - } \ - ptr = (type*)(state->data_stack+alloc_pos); \ - state->data_stack_base += sizeof(type); \ -} while (0) - -#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \ -do { \ - TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \ - ptr = (type*)(state->data_stack+pos); \ -} while (0) - -#define DATA_STACK_PUSH(state, data, size) \ -do { \ - TRACE(("copy data in %p to %zd (%zd)\n", \ - data, state->data_stack_base, size)); \ - if (size > state->data_stack_size - state->data_stack_base) { \ - int j = data_stack_grow(state, size); \ - if (j < 0) return j; \ - if (ctx_pos != -1) \ - DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ - } \ - memcpy(state->data_stack+state->data_stack_base, data, size); \ - state->data_stack_base += size; \ -} while (0) - -/* We add an explicit cast to memcpy here because MSVC has a bug when - compiling C code where it believes that `const void**` cannot be - safely casted to `void*`, see bpo-39943 for details. */ -#define DATA_STACK_POP(state, data, size, discard) \ -do { \ - TRACE(("copy data to %p from %zd (%zd)\n", \ - data, state->data_stack_base-size, size)); \ - memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \ - if (discard) \ - state->data_stack_base -= size; \ -} while (0) - -#define DATA_STACK_POP_DISCARD(state, size) \ -do { \ - TRACE(("discard data from %zd (%zd)\n", \ - state->data_stack_base-size, size)); \ - state->data_stack_base -= size; \ -} while(0) - -#define DATA_PUSH(x) \ - DATA_STACK_PUSH(state, (x), sizeof(*(x))) -#define DATA_POP(x) \ - DATA_STACK_POP(state, (x), sizeof(*(x)), 1) -#define DATA_POP_DISCARD(x) \ - DATA_STACK_POP_DISCARD(state, sizeof(*(x))) -#define DATA_ALLOC(t,p) \ - DATA_STACK_ALLOC(state, t, p) -#define DATA_LOOKUP_AT(t,p,pos) \ - DATA_STACK_LOOKUP_AT(state,t,p,pos) - -#define MARK_PUSH(lastmark) \ - do if (lastmark >= 0) { \ - i = lastmark; /* ctx->lastmark may change if reallocated */ \ - DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \ - } while (0) -#define MARK_POP(lastmark) \ - do if (lastmark >= 0) { \ - DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \ - } while (0) -#define MARK_POP_KEEP(lastmark) \ - do if (lastmark >= 0) { \ - DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \ - } while (0) -#define MARK_POP_DISCARD(lastmark) \ - do if (lastmark >= 0) { \ - DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \ - } while (0) - -#define JUMP_NONE 0 -#define JUMP_MAX_UNTIL_1 1 -#define JUMP_MAX_UNTIL_2 2 -#define JUMP_MAX_UNTIL_3 3 -#define JUMP_MIN_UNTIL_1 4 -#define JUMP_MIN_UNTIL_2 5 -#define JUMP_MIN_UNTIL_3 6 -#define JUMP_REPEAT 7 -#define JUMP_REPEAT_ONE_1 8 -#define JUMP_REPEAT_ONE_2 9 -#define JUMP_MIN_REPEAT_ONE 10 -#define JUMP_BRANCH 11 -#define JUMP_ASSERT 12 -#define JUMP_ASSERT_NOT 13 -#define JUMP_POSS_REPEAT_1 14 -#define JUMP_POSS_REPEAT_2 15 -#define JUMP_ATOMIC_GROUP 16 - -#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \ - DATA_ALLOC(SRE(match_context), nextctx); \ - nextctx->last_ctx_pos = ctx_pos; \ - nextctx->jump = jumpvalue; \ - nextctx->pattern = nextpattern; \ - nextctx->toplevel = toplevel_; \ - ctx_pos = alloc_pos; \ - ctx = nextctx; \ - goto entrance; \ - jumplabel: \ - while (0) /* gcc doesn't like labels at end of scopes */ \ - -#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ - DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel) - -#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \ - DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0) - -typedef struct { - Py_ssize_t last_ctx_pos; - Py_ssize_t jump; - const SRE_CHAR* ptr; - const SRE_CODE* pattern; - Py_ssize_t count; - Py_ssize_t lastmark; - Py_ssize_t lastindex; - union { - SRE_CODE chr; - SRE_REPEAT* rep; - } u; - int toplevel; -} SRE(match_context); - -/* check if string matches the given pattern. returns <0 for - error, 0 for failure, and 1 for success */ -LOCAL(Py_ssize_t) -SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) -{ - const SRE_CHAR* end = (const SRE_CHAR *)state->end; - Py_ssize_t alloc_pos, ctx_pos = -1; - Py_ssize_t i, ret = 0; - Py_ssize_t jump; - unsigned int sigcount=0; - - SRE(match_context)* ctx; - SRE(match_context)* nextctx; - - TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); - - DATA_ALLOC(SRE(match_context), ctx); - ctx->last_ctx_pos = -1; - ctx->jump = JUMP_NONE; - ctx->pattern = pattern; - ctx->toplevel = toplevel; - ctx_pos = alloc_pos; - -entrance: - - ctx->ptr = (SRE_CHAR *)state->ptr; - - if (ctx->pattern[0] == SRE_OP_INFO) { - /* optimization info block */ - /* <1=skip> <2=flags> <3=min> ... */ - if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) { - TRACE(("reject (got %zd chars, need %zd)\n", - end - ctx->ptr, (Py_ssize_t) ctx->pattern[3])); - RETURN_FAILURE; - } - ctx->pattern += ctx->pattern[1] + 1; - } - - for (;;) { - ++sigcount; - if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals()) - RETURN_ERROR(SRE_ERROR_INTERRUPTED); - - switch (*ctx->pattern++) { - - case SRE_OP_MARK: - /* set mark */ - /* */ - TRACE(("|%p|%p|MARK %d\n", ctx->pattern, - ctx->ptr, ctx->pattern[0])); - i = ctx->pattern[0]; - if (i & 1) - state->lastindex = i/2 + 1; - if (i > state->lastmark) { - /* state->lastmark is the highest valid index in the - state->mark array. If it is increased by more than 1, - the intervening marks must be set to NULL to signal - that these marks have not been encountered. */ - Py_ssize_t j = state->lastmark + 1; - while (j < i) - state->mark[j++] = NULL; - state->lastmark = i; - } - state->mark[i] = ctx->ptr; - ctx->pattern++; - break; - - case SRE_OP_LITERAL: - /* match literal string */ - /* */ - TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern, - ctx->ptr, *ctx->pattern)); - if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0]) - RETURN_FAILURE; - ctx->pattern++; - ctx->ptr++; - break; - - case SRE_OP_NOT_LITERAL: - /* match anything that is not literal character */ - /* */ - TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern, - ctx->ptr, *ctx->pattern)); - if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0]) - RETURN_FAILURE; - ctx->pattern++; - ctx->ptr++; - break; - - case SRE_OP_SUCCESS: - /* end of pattern */ - TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr)); - if (ctx->toplevel && - ((state->match_all && ctx->ptr != state->end) || - (state->must_advance && ctx->ptr == state->start))) - { - RETURN_FAILURE; - } - state->ptr = ctx->ptr; - RETURN_SUCCESS; - - case SRE_OP_AT: - /* match at given position */ - /* */ - TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); - if (!SRE(at)(state, ctx->ptr, *ctx->pattern)) - RETURN_FAILURE; - ctx->pattern++; - break; - - case SRE_OP_CATEGORY: - /* match at given category */ - /* */ - TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern, - ctx->ptr, *ctx->pattern)); - if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0])) - RETURN_FAILURE; - ctx->pattern++; - ctx->ptr++; - break; - - case SRE_OP_ANY: - /* match anything (except a newline) */ - /* */ - TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr)); - if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0])) - RETURN_FAILURE; - ctx->ptr++; - break; - - case SRE_OP_ANY_ALL: - /* match anything */ - /* */ - TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr)); - if (ctx->ptr >= end) - RETURN_FAILURE; - ctx->ptr++; - break; - - case SRE_OP_IN: - /* match set member (or non_member) */ - /* */ - TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr)); - if (ctx->ptr >= end || - !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr)) - RETURN_FAILURE; - ctx->pattern += ctx->pattern[0]; - ctx->ptr++; - break; - - case SRE_OP_LITERAL_IGNORE: - TRACE(("|%p|%p|LITERAL_IGNORE %d\n", - ctx->pattern, ctx->ptr, ctx->pattern[0])); - if (ctx->ptr >= end || - sre_lower_ascii(*ctx->ptr) != *ctx->pattern) - RETURN_FAILURE; - ctx->pattern++; - ctx->ptr++; - break; - - case SRE_OP_LITERAL_UNI_IGNORE: - TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", - ctx->pattern, ctx->ptr, ctx->pattern[0])); - if (ctx->ptr >= end || - sre_lower_unicode(*ctx->ptr) != *ctx->pattern) - RETURN_FAILURE; - ctx->pattern++; - ctx->ptr++; - break; - - case SRE_OP_LITERAL_LOC_IGNORE: - TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", - ctx->pattern, ctx->ptr, ctx->pattern[0])); - if (ctx->ptr >= end - || !char_loc_ignore(*ctx->pattern, *ctx->ptr)) - RETURN_FAILURE; - ctx->pattern++; - ctx->ptr++; - break; - - case SRE_OP_NOT_LITERAL_IGNORE: - TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", - ctx->pattern, ctx->ptr, *ctx->pattern)); - if (ctx->ptr >= end || - sre_lower_ascii(*ctx->ptr) == *ctx->pattern) - RETURN_FAILURE; - ctx->pattern++; - ctx->ptr++; - break; - - case SRE_OP_NOT_LITERAL_UNI_IGNORE: - TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", - ctx->pattern, ctx->ptr, *ctx->pattern)); - if (ctx->ptr >= end || - sre_lower_unicode(*ctx->ptr) == *ctx->pattern) - RETURN_FAILURE; - ctx->pattern++; - ctx->ptr++; - break; - - case SRE_OP_NOT_LITERAL_LOC_IGNORE: - TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", - ctx->pattern, ctx->ptr, *ctx->pattern)); - if (ctx->ptr >= end - || char_loc_ignore(*ctx->pattern, *ctx->ptr)) - RETURN_FAILURE; - ctx->pattern++; - ctx->ptr++; - break; - - case SRE_OP_IN_IGNORE: - TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr)); - if (ctx->ptr >= end - || !SRE(charset)(state, ctx->pattern+1, - (SRE_CODE)sre_lower_ascii(*ctx->ptr))) - RETURN_FAILURE; - ctx->pattern += ctx->pattern[0]; - ctx->ptr++; - break; - - case SRE_OP_IN_UNI_IGNORE: - TRACE(("|%p|%p|IN_UNI_IGNORE\n", ctx->pattern, ctx->ptr)); - if (ctx->ptr >= end - || !SRE(charset)(state, ctx->pattern+1, - (SRE_CODE)sre_lower_unicode(*ctx->ptr))) - RETURN_FAILURE; - ctx->pattern += ctx->pattern[0]; - ctx->ptr++; - break; - - case SRE_OP_IN_LOC_IGNORE: - TRACE(("|%p|%p|IN_LOC_IGNORE\n", ctx->pattern, ctx->ptr)); - if (ctx->ptr >= end - || !SRE(charset_loc_ignore)(state, ctx->pattern+1, *ctx->ptr)) - RETURN_FAILURE; - ctx->pattern += ctx->pattern[0]; - ctx->ptr++; - break; - - case SRE_OP_JUMP: - case SRE_OP_INFO: - /* jump forward */ - /* */ - TRACE(("|%p|%p|JUMP %d\n", ctx->pattern, - ctx->ptr, ctx->pattern[0])); - ctx->pattern += ctx->pattern[0]; - break; - - case SRE_OP_BRANCH: - /* alternation */ - /* <0=skip> code ... */ - TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr)); - LASTMARK_SAVE(); - if (state->repeat) - MARK_PUSH(ctx->lastmark); - for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) { - if (ctx->pattern[1] == SRE_OP_LITERAL && - (ctx->ptr >= end || - (SRE_CODE) *ctx->ptr != ctx->pattern[2])) - continue; - if (ctx->pattern[1] == SRE_OP_IN && - (ctx->ptr >= end || - !SRE(charset)(state, ctx->pattern + 3, - (SRE_CODE) *ctx->ptr))) - continue; - state->ptr = ctx->ptr; - DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1); - if (ret) { - if (state->repeat) - MARK_POP_DISCARD(ctx->lastmark); - RETURN_ON_ERROR(ret); - RETURN_SUCCESS; - } - if (state->repeat) - MARK_POP_KEEP(ctx->lastmark); - LASTMARK_RESTORE(); - } - if (state->repeat) - MARK_POP_DISCARD(ctx->lastmark); - RETURN_FAILURE; - - case SRE_OP_REPEAT_ONE: - /* match repeated sequence (maximizing regexp) */ - - /* this operator only works if the repeated item is - exactly one character wide, and we're not already - collecting backtracking points. for other cases, - use the MAX_REPEAT operator */ - - /* <1=min> <2=max> item tail */ - - TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, - ctx->pattern[1], ctx->pattern[2])); - - if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) - RETURN_FAILURE; /* cannot match */ - - state->ptr = ctx->ptr; - - ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]); - RETURN_ON_ERROR(ret); - DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); - ctx->count = ret; - ctx->ptr += ctx->count; - - /* when we arrive here, count contains the number of - matches, and ctx->ptr points to the tail of the target - string. check if the rest of the pattern matches, - and backtrack if not. */ - - if (ctx->count < (Py_ssize_t) ctx->pattern[1]) - RETURN_FAILURE; - - if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && - ctx->ptr == state->end && - !(ctx->toplevel && state->must_advance && ctx->ptr == state->start)) - { - /* tail is empty. we're finished */ - state->ptr = ctx->ptr; - RETURN_SUCCESS; - } - - LASTMARK_SAVE(); - if (state->repeat) - MARK_PUSH(ctx->lastmark); - - if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) { - /* tail starts with a literal. skip positions where - the rest of the pattern cannot possibly match */ - ctx->u.chr = ctx->pattern[ctx->pattern[0]+1]; - for (;;) { - while (ctx->count >= (Py_ssize_t) ctx->pattern[1] && - (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) { - ctx->ptr--; - ctx->count--; - } - if (ctx->count < (Py_ssize_t) ctx->pattern[1]) - break; - state->ptr = ctx->ptr; - DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, - ctx->pattern+ctx->pattern[0]); - if (ret) { - if (state->repeat) - MARK_POP_DISCARD(ctx->lastmark); - RETURN_ON_ERROR(ret); - RETURN_SUCCESS; - } - if (state->repeat) - MARK_POP_KEEP(ctx->lastmark); - LASTMARK_RESTORE(); - - ctx->ptr--; - ctx->count--; - } - if (state->repeat) - MARK_POP_DISCARD(ctx->lastmark); - } else { - /* general case */ - while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) { - state->ptr = ctx->ptr; - DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, - ctx->pattern+ctx->pattern[0]); - if (ret) { - if (state->repeat) - MARK_POP_DISCARD(ctx->lastmark); - RETURN_ON_ERROR(ret); - RETURN_SUCCESS; - } - if (state->repeat) - MARK_POP_KEEP(ctx->lastmark); - LASTMARK_RESTORE(); - - ctx->ptr--; - ctx->count--; - } - if (state->repeat) - MARK_POP_DISCARD(ctx->lastmark); - } - RETURN_FAILURE; - - case SRE_OP_MIN_REPEAT_ONE: - /* match repeated sequence (minimizing regexp) */ - - /* this operator only works if the repeated item is - exactly one character wide, and we're not already - collecting backtracking points. for other cases, - use the MIN_REPEAT operator */ - - /* <1=min> <2=max> item tail */ - - TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, - ctx->pattern[1], ctx->pattern[2])); - - if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) - RETURN_FAILURE; /* cannot match */ - - state->ptr = ctx->ptr; - - if (ctx->pattern[1] == 0) - ctx->count = 0; - else { - /* count using pattern min as the maximum */ - ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]); - RETURN_ON_ERROR(ret); - DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); - if (ret < (Py_ssize_t) ctx->pattern[1]) - /* didn't match minimum number of times */ - RETURN_FAILURE; - /* advance past minimum matches of repeat */ - ctx->count = ret; - ctx->ptr += ctx->count; - } - - if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && - !(ctx->toplevel && - ((state->match_all && ctx->ptr != state->end) || - (state->must_advance && ctx->ptr == state->start)))) - { - /* tail is empty. we're finished */ - state->ptr = ctx->ptr; - RETURN_SUCCESS; - - } else { - /* general case */ - LASTMARK_SAVE(); - if (state->repeat) - MARK_PUSH(ctx->lastmark); - - while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT - || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { - state->ptr = ctx->ptr; - DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, - ctx->pattern+ctx->pattern[0]); - if (ret) { - if (state->repeat) - MARK_POP_DISCARD(ctx->lastmark); - RETURN_ON_ERROR(ret); - RETURN_SUCCESS; - } - if (state->repeat) - MARK_POP_KEEP(ctx->lastmark); - LASTMARK_RESTORE(); - - state->ptr = ctx->ptr; - ret = SRE(count)(state, ctx->pattern+3, 1); - RETURN_ON_ERROR(ret); - DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); - if (ret == 0) - break; - assert(ret == 1); - ctx->ptr++; - ctx->count++; - } - if (state->repeat) - MARK_POP_DISCARD(ctx->lastmark); - } - RETURN_FAILURE; - - case SRE_OP_POSSESSIVE_REPEAT_ONE: - /* match repeated sequence (maximizing regexp) without - backtracking */ - - /* this operator only works if the repeated item is - exactly one character wide, and we're not already - collecting backtracking points. for other cases, - use the MAX_REPEAT operator */ - - /* <1=min> <2=max> item - tail */ - - TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", ctx->pattern, - ctx->ptr, ctx->pattern[1], ctx->pattern[2])); - - if (ctx->ptr + ctx->pattern[1] > end) { - RETURN_FAILURE; /* cannot match */ - } - - state->ptr = ctx->ptr; - - ret = SRE(count)(state, ctx->pattern + 3, ctx->pattern[2]); - RETURN_ON_ERROR(ret); - DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); - ctx->count = ret; - ctx->ptr += ctx->count; - - /* when we arrive here, count contains the number of - matches, and ctx->ptr points to the tail of the target - string. check if the rest of the pattern matches, - and fail if not. */ - - /* Test for not enough repetitions in match */ - if (ctx->count < (Py_ssize_t) ctx->pattern[1]) { - RETURN_FAILURE; - } - - /* Update the pattern to point to the next op code */ - ctx->pattern += ctx->pattern[0]; - - /* Let the tail be evaluated separately and consider this - match successful. */ - if (*ctx->pattern == SRE_OP_SUCCESS && - ctx->ptr == state->end && - !(ctx->toplevel && state->must_advance && ctx->ptr == state->start)) - { - /* tail is empty. we're finished */ - state->ptr = ctx->ptr; - RETURN_SUCCESS; - } - - /* Attempt to match the rest of the string */ - break; - - case SRE_OP_REPEAT: - /* create repeat context. all the hard work is done - by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ - /* <1=min> <2=max> - <3=repeat_index> item tail */ - TRACE(("|%p|%p|REPEAT %d %d %d\n", ctx->pattern, ctx->ptr, - ctx->pattern[1], ctx->pattern[2], ctx->pattern[3])); - - /* install repeat context */ - ctx->u.rep = &state->repeats_array[ctx->pattern[3]]; - - ctx->u.rep->count = -1; - ctx->u.rep->pattern = ctx->pattern; - ctx->u.rep->prev = state->repeat; - ctx->u.rep->last_ptr = NULL; - state->repeat = ctx->u.rep; - - state->ptr = ctx->ptr; - DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]); - state->repeat = ctx->u.rep->prev; - - if (ret) { - RETURN_ON_ERROR(ret); - RETURN_SUCCESS; - } - RETURN_FAILURE; - - case SRE_OP_MAX_UNTIL: - /* maximizing repeat */ - /* <1=min> <2=max> - <3=repeat_index> item tail */ - - /* FIXME: we probably need to deal with zero-width - matches in here... */ - - ctx->u.rep = state->repeat; - if (!ctx->u.rep) - RETURN_ERROR(SRE_ERROR_STATE); - - state->ptr = ctx->ptr; - - ctx->count = ctx->u.rep->count+1; - - TRACE(("|%p|%p|MAX_UNTIL %zd\n", ctx->pattern, - ctx->ptr, ctx->count)); - - if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { - /* not enough matches */ - ctx->u.rep->count = ctx->count; - DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, - ctx->u.rep->pattern+4); - if (ret) { - RETURN_ON_ERROR(ret); - RETURN_SUCCESS; - } - ctx->u.rep->count = ctx->count-1; - state->ptr = ctx->ptr; - RETURN_FAILURE; - } - - if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || - ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && - state->ptr != ctx->u.rep->last_ptr) { - /* we may have enough matches, but if we can - match another item, do so */ - ctx->u.rep->count = ctx->count; - LASTMARK_SAVE(); - MARK_PUSH(ctx->lastmark); - /* zero-width match protection */ - DATA_PUSH(&ctx->u.rep->last_ptr); - ctx->u.rep->last_ptr = state->ptr; - DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, - ctx->u.rep->pattern+4); - DATA_POP(&ctx->u.rep->last_ptr); - if (ret) { - MARK_POP_DISCARD(ctx->lastmark); - RETURN_ON_ERROR(ret); - RETURN_SUCCESS; - } - MARK_POP(ctx->lastmark); - LASTMARK_RESTORE(); - ctx->u.rep->count = ctx->count-1; - state->ptr = ctx->ptr; - } - - /* cannot match more repeated items here. make sure the - tail matches */ - state->repeat = ctx->u.rep->prev; - DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern); - state->repeat = ctx->u.rep; // restore repeat before return - - RETURN_ON_SUCCESS(ret); - state->ptr = ctx->ptr; - RETURN_FAILURE; - - case SRE_OP_MIN_UNTIL: - /* minimizing repeat */ - /* <1=min> <2=max> - <3=repeat_index> item tail */ - - ctx->u.rep = state->repeat; - if (!ctx->u.rep) - RETURN_ERROR(SRE_ERROR_STATE); - - state->ptr = ctx->ptr; - - ctx->count = ctx->u.rep->count+1; - - TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", ctx->pattern, - ctx->ptr, ctx->count, ctx->u.rep->pattern)); - - if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { - /* not enough matches */ - ctx->u.rep->count = ctx->count; - DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, - ctx->u.rep->pattern+4); - if (ret) { - RETURN_ON_ERROR(ret); - RETURN_SUCCESS; - } - ctx->u.rep->count = ctx->count-1; - state->ptr = ctx->ptr; - RETURN_FAILURE; - } - - /* see if the tail matches */ - state->repeat = ctx->u.rep->prev; - - LASTMARK_SAVE(); - if (state->repeat) - MARK_PUSH(ctx->lastmark); - - DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern); - SRE_REPEAT *repeat_of_tail = state->repeat; - state->repeat = ctx->u.rep; // restore repeat before return - - if (ret) { - if (repeat_of_tail) - MARK_POP_DISCARD(ctx->lastmark); - RETURN_ON_ERROR(ret); - RETURN_SUCCESS; - } - if (repeat_of_tail) - MARK_POP(ctx->lastmark); - LASTMARK_RESTORE(); - - state->ptr = ctx->ptr; - - if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] - && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || - state->ptr == ctx->u.rep->last_ptr) - RETURN_FAILURE; - - ctx->u.rep->count = ctx->count; - /* zero-width match protection */ - DATA_PUSH(&ctx->u.rep->last_ptr); - ctx->u.rep->last_ptr = state->ptr; - DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, - ctx->u.rep->pattern+4); - DATA_POP(&ctx->u.rep->last_ptr); - if (ret) { - RETURN_ON_ERROR(ret); - RETURN_SUCCESS; - } - ctx->u.rep->count = ctx->count-1; - state->ptr = ctx->ptr; - RETURN_FAILURE; - - case SRE_OP_POSSESSIVE_REPEAT: - /* create possessive repeat contexts. */ - /* <1=min> <2=max> pattern - tail */ - TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", ctx->pattern, - ctx->ptr, ctx->pattern[1], ctx->pattern[2])); - - /* Set the global Input pointer to this context's Input - pointer */ - state->ptr = ctx->ptr; - - /* Initialize Count to 0 */ - ctx->count = 0; - - /* Check for minimum required matches. */ - while (ctx->count < (Py_ssize_t)ctx->pattern[1]) { - /* not enough matches */ - DO_JUMP(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, - &ctx->pattern[3]); - if (ret) { - RETURN_ON_ERROR(ret); - ctx->count++; - } - else { - state->ptr = ctx->ptr; - RETURN_FAILURE; - } - } - - /* Clear the context's Input stream pointer so that it - doesn't match the global state so that the while loop can - be entered. */ - ctx->ptr = NULL; - - /* Keep trying to parse the sub-pattern until the - end is reached, creating a new context each time. */ - while ((ctx->count < (Py_ssize_t)ctx->pattern[2] || - (Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT) && - state->ptr != ctx->ptr) { - /* Save the Capture Group Marker state into the current - Context and back up the current highest number - Capture Group marker. */ - LASTMARK_SAVE(); - MARK_PUSH(ctx->lastmark); - - /* zero-width match protection */ - /* Set the context's Input Stream pointer to be the - current Input Stream pointer from the global - state. When the loop reaches the next iteration, - the context will then store the last known good - position with the global state holding the Input - Input Stream position that has been updated with - the most recent match. Thus, if state's Input - stream remains the same as the one stored in the - current Context, we know we have successfully - matched an empty string and that all subsequent - matches will also be the empty string until the - maximum number of matches are counted, and because - of this, we could immediately stop at that point and - consider this match successful. */ - ctx->ptr = state->ptr; - - /* We have not reached the maximin matches, so try to - match once more. */ - DO_JUMP(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, - &ctx->pattern[3]); - - /* Check to see if the last attempted match - succeeded. */ - if (ret) { - /* Drop the saved highest number Capture Group - marker saved above and use the newly updated - value. */ - MARK_POP_DISCARD(ctx->lastmark); - RETURN_ON_ERROR(ret); - - /* Success, increment the count. */ - ctx->count++; - } - /* Last attempted match failed. */ - else { - /* Restore the previously saved highest number - Capture Group marker since the last iteration - did not match, then restore that to the global - state. */ - MARK_POP(ctx->lastmark); - LASTMARK_RESTORE(); - - /* We have sufficient matches, so exit loop. */ - break; - } - } - - /* Evaluate Tail */ - /* Jump to end of pattern indicated by skip, and then skip - the SUCCESS op code that follows it. */ - ctx->pattern += ctx->pattern[0] + 1; - ctx->ptr = state->ptr; - break; - - case SRE_OP_ATOMIC_GROUP: - /* Atomic Group Sub Pattern */ - /* pattern tail */ - TRACE(("|%p|%p|ATOMIC_GROUP\n", ctx->pattern, ctx->ptr)); - - /* Set the global Input pointer to this context's Input - pointer */ - state->ptr = ctx->ptr; - - /* Evaluate the Atomic Group in a new context, terminating - when the end of the group, represented by a SUCCESS op - code, is reached. */ - /* Group Pattern begins at an offset of 1 code. */ - DO_JUMP(JUMP_ATOMIC_GROUP, jump_atomic_group, - &ctx->pattern[1]); - - /* Test Exit Condition */ - RETURN_ON_ERROR(ret); - - if (ret == 0) { - /* Atomic Group failed to Match. */ - state->ptr = ctx->ptr; - RETURN_FAILURE; - } - - /* Evaluate Tail */ - /* Jump to end of pattern indicated by skip, and then skip - the SUCCESS op code that follows it. */ - ctx->pattern += ctx->pattern[0]; - ctx->ptr = state->ptr; - break; - - case SRE_OP_GROUPREF: - /* match backreference */ - TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern, - ctx->ptr, ctx->pattern[0])); - i = ctx->pattern[0]; - { - Py_ssize_t groupref = i+i; - if (groupref >= state->lastmark) { - RETURN_FAILURE; - } else { - SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; - SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; - if (!p || !e || e < p) - RETURN_FAILURE; - while (p < e) { - if (ctx->ptr >= end || *ctx->ptr != *p) - RETURN_FAILURE; - p++; - ctx->ptr++; - } - } - } - ctx->pattern++; - break; - - case SRE_OP_GROUPREF_IGNORE: - /* match backreference */ - TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern, - ctx->ptr, ctx->pattern[0])); - i = ctx->pattern[0]; - { - Py_ssize_t groupref = i+i; - if (groupref >= state->lastmark) { - RETURN_FAILURE; - } else { - SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; - SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; - if (!p || !e || e < p) - RETURN_FAILURE; - while (p < e) { - if (ctx->ptr >= end || - sre_lower_ascii(*ctx->ptr) != sre_lower_ascii(*p)) - RETURN_FAILURE; - p++; - ctx->ptr++; - } - } - } - ctx->pattern++; - break; - - case SRE_OP_GROUPREF_UNI_IGNORE: - /* match backreference */ - TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", ctx->pattern, - ctx->ptr, ctx->pattern[0])); - i = ctx->pattern[0]; - { - Py_ssize_t groupref = i+i; - if (groupref >= state->lastmark) { - RETURN_FAILURE; - } else { - SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; - SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; - if (!p || !e || e < p) - RETURN_FAILURE; - while (p < e) { - if (ctx->ptr >= end || - sre_lower_unicode(*ctx->ptr) != sre_lower_unicode(*p)) - RETURN_FAILURE; - p++; - ctx->ptr++; - } - } - } - ctx->pattern++; - break; - - case SRE_OP_GROUPREF_LOC_IGNORE: - /* match backreference */ - TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", ctx->pattern, - ctx->ptr, ctx->pattern[0])); - i = ctx->pattern[0]; - { - Py_ssize_t groupref = i+i; - if (groupref >= state->lastmark) { - RETURN_FAILURE; - } else { - SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; - SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; - if (!p || !e || e < p) - RETURN_FAILURE; - while (p < e) { - if (ctx->ptr >= end || - sre_lower_locale(*ctx->ptr) != sre_lower_locale(*p)) - RETURN_FAILURE; - p++; - ctx->ptr++; - } - } - } - ctx->pattern++; - break; - - case SRE_OP_GROUPREF_EXISTS: - TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern, - ctx->ptr, ctx->pattern[0])); - /* codeyes codeno ... */ - i = ctx->pattern[0]; - { - Py_ssize_t groupref = i+i; - if (groupref >= state->lastmark) { - ctx->pattern += ctx->pattern[1]; - break; - } else { - SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; - SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; - if (!p || !e || e < p) { - ctx->pattern += ctx->pattern[1]; - break; - } - } - } - ctx->pattern += 2; - break; - - case SRE_OP_ASSERT: - /* assert subpattern */ - /* */ - TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, - ctx->ptr, ctx->pattern[1])); - if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1]) - RETURN_FAILURE; - state->ptr = ctx->ptr - ctx->pattern[1]; - DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2); - RETURN_ON_FAILURE(ret); - ctx->pattern += ctx->pattern[0]; - break; - - case SRE_OP_ASSERT_NOT: - /* assert not subpattern */ - /* */ - TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, - ctx->ptr, ctx->pattern[1])); - if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) { - state->ptr = ctx->ptr - ctx->pattern[1]; - LASTMARK_SAVE(); - if (state->repeat) - MARK_PUSH(ctx->lastmark); - - DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); - if (ret) { - if (state->repeat) - MARK_POP_DISCARD(ctx->lastmark); - RETURN_ON_ERROR(ret); - RETURN_FAILURE; - } - if (state->repeat) - MARK_POP(ctx->lastmark); - LASTMARK_RESTORE(); - } - ctx->pattern += ctx->pattern[0]; - break; - - case SRE_OP_FAILURE: - /* immediate failure */ - TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr)); - RETURN_FAILURE; - - default: - TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr, - ctx->pattern[-1])); - RETURN_ERROR(SRE_ERROR_ILLEGAL); - } - } - -exit: - ctx_pos = ctx->last_ctx_pos; - jump = ctx->jump; - DATA_POP_DISCARD(ctx); - if (ctx_pos == -1) - return ret; - DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); - - switch (jump) { - case JUMP_MAX_UNTIL_2: - TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr)); - goto jump_max_until_2; - case JUMP_MAX_UNTIL_3: - TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr)); - goto jump_max_until_3; - case JUMP_MIN_UNTIL_2: - TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr)); - goto jump_min_until_2; - case JUMP_MIN_UNTIL_3: - TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr)); - goto jump_min_until_3; - case JUMP_BRANCH: - TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr)); - goto jump_branch; - case JUMP_MAX_UNTIL_1: - TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr)); - goto jump_max_until_1; - case JUMP_MIN_UNTIL_1: - TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr)); - goto jump_min_until_1; - case JUMP_POSS_REPEAT_1: - TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", ctx->pattern, ctx->ptr)); - goto jump_poss_repeat_1; - case JUMP_POSS_REPEAT_2: - TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", ctx->pattern, ctx->ptr)); - goto jump_poss_repeat_2; - case JUMP_REPEAT: - TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr)); - goto jump_repeat; - case JUMP_REPEAT_ONE_1: - TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr)); - goto jump_repeat_one_1; - case JUMP_REPEAT_ONE_2: - TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr)); - goto jump_repeat_one_2; - case JUMP_MIN_REPEAT_ONE: - TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr)); - goto jump_min_repeat_one; - case JUMP_ATOMIC_GROUP: - TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", ctx->pattern, ctx->ptr)); - goto jump_atomic_group; - case JUMP_ASSERT: - TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr)); - goto jump_assert; - case JUMP_ASSERT_NOT: - TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr)); - goto jump_assert_not; - case JUMP_NONE: - TRACE(("|%p|%p|RETURN %zd\n", ctx->pattern, - ctx->ptr, ret)); - break; - } - - return ret; /* should never get here */ -} - -/* need to reset capturing groups between two SRE(match) callings in loops */ -#define RESET_CAPTURE_GROUP() \ - do { state->lastmark = state->lastindex = -1; } while (0) - -LOCAL(Py_ssize_t) -SRE(search)(SRE_STATE* state, SRE_CODE* pattern) -{ - SRE_CHAR* ptr = (SRE_CHAR *)state->start; - SRE_CHAR* end = (SRE_CHAR *)state->end; - Py_ssize_t status = 0; - Py_ssize_t prefix_len = 0; - Py_ssize_t prefix_skip = 0; - SRE_CODE* prefix = NULL; - SRE_CODE* charset = NULL; - SRE_CODE* overlap = NULL; - int flags = 0; - - if (ptr > end) - return 0; - - if (pattern[0] == SRE_OP_INFO) { - /* optimization info block */ - /* <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ - - flags = pattern[2]; - - if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) { - TRACE(("reject (got %u chars, need %u)\n", - (unsigned int)(end - ptr), pattern[3])); - return 0; - } - if (pattern[3] > 1) { - /* adjust end point (but make sure we leave at least one - character in there, so literal search will work) */ - end -= pattern[3] - 1; - if (end <= ptr) - end = ptr; - } - - if (flags & SRE_INFO_PREFIX) { - /* pattern starts with a known prefix */ - /* */ - prefix_len = pattern[5]; - prefix_skip = pattern[6]; - prefix = pattern + 7; - overlap = prefix + prefix_len - 1; - } else if (flags & SRE_INFO_CHARSET) - /* pattern starts with a character from a known set */ - /* */ - charset = pattern + 5; - - pattern += 1 + pattern[1]; - } - - TRACE(("prefix = %p %zd %zd\n", - prefix, prefix_len, prefix_skip)); - TRACE(("charset = %p\n", charset)); - - if (prefix_len == 1) { - /* pattern starts with a literal character */ - SRE_CHAR c = (SRE_CHAR) prefix[0]; -#if SIZEOF_SRE_CHAR < 4 - if ((SRE_CODE) c != prefix[0]) - return 0; /* literal can't match: doesn't fit in char width */ -#endif - end = (SRE_CHAR *)state->end; - state->must_advance = 0; - while (ptr < end) { - while (*ptr != c) { - if (++ptr >= end) - return 0; - } - TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); - state->start = ptr; - state->ptr = ptr + prefix_skip; - if (flags & SRE_INFO_LITERAL) - return 1; /* we got all of it */ - status = SRE(match)(state, pattern + 2*prefix_skip, 0); - if (status != 0) - return status; - ++ptr; - RESET_CAPTURE_GROUP(); - } - return 0; - } - - if (prefix_len > 1) { - /* pattern starts with a known prefix. use the overlap - table to skip forward as fast as we possibly can */ - Py_ssize_t i = 0; - - end = (SRE_CHAR *)state->end; - if (prefix_len > end - ptr) - return 0; -#if SIZEOF_SRE_CHAR < 4 - for (i = 0; i < prefix_len; i++) - if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) - return 0; /* literal can't match: doesn't fit in char width */ -#endif - while (ptr < end) { - SRE_CHAR c = (SRE_CHAR) prefix[0]; - while (*ptr++ != c) { - if (ptr >= end) - return 0; - } - if (ptr >= end) - return 0; - - i = 1; - state->must_advance = 0; - do { - if (*ptr == (SRE_CHAR) prefix[i]) { - if (++i != prefix_len) { - if (++ptr >= end) - return 0; - continue; - } - /* found a potential match */ - TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); - state->start = ptr - (prefix_len - 1); - state->ptr = ptr - (prefix_len - prefix_skip - 1); - if (flags & SRE_INFO_LITERAL) - return 1; /* we got all of it */ - status = SRE(match)(state, pattern + 2*prefix_skip, 0); - if (status != 0) - return status; - /* close but no cigar -- try again */ - if (++ptr >= end) - return 0; - RESET_CAPTURE_GROUP(); - } - i = overlap[i]; - } while (i != 0); - } - return 0; - } - - if (charset) { - /* pattern starts with a character from a known set */ - end = (SRE_CHAR *)state->end; - state->must_advance = 0; - for (;;) { - while (ptr < end && !SRE(charset)(state, charset, *ptr)) - ptr++; - if (ptr >= end) - return 0; - TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); - state->start = ptr; - state->ptr = ptr; - status = SRE(match)(state, pattern, 0); - if (status != 0) - break; - ptr++; - RESET_CAPTURE_GROUP(); - } - } else { - /* general case */ - assert(ptr <= end); - TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); - state->start = state->ptr = ptr; - status = SRE(match)(state, pattern, 1); - state->must_advance = 0; - if (status == 0 && pattern[0] == SRE_OP_AT && - (pattern[1] == SRE_AT_BEGINNING || - pattern[1] == SRE_AT_BEGINNING_STRING)) - { - state->start = state->ptr = ptr = end; - return 0; - } - while (status == 0 && ptr < end) { - ptr++; - RESET_CAPTURE_GROUP(); - TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); - state->start = state->ptr = ptr; - status = SRE(match)(state, pattern, 0); - } - } - - return status; -} - -#undef SRE_CHAR -#undef SIZEOF_SRE_CHAR -#undef SRE - -/* vim:ts=4:sw=4:et -*/ diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 5e6e703..9894e37 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -296,9 +296,6 @@ - - - @@ -359,7 +356,10 @@ - + + + + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 86049a2..55fca49 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -240,15 +240,6 @@ Modules - - Modules - - - Modules - - - Modules - Modules\_io @@ -731,9 +722,18 @@ Modules - + Modules + + Modules + + + Modules + + + Modules + Modules diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index e58871c..1900009 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -148,7 +148,7 @@ Modules/_io/*.c Py_BUILD_CORE 1 Modules/_localemodule.c Py_BUILD_CORE 1 Modules/_operator.c Py_BUILD_CORE 1 Modules/_posixsubprocess.c Py_BUILD_CORE 1 -Modules/_sre.c Py_BUILD_CORE 1 +Modules/_sre/sre.c Py_BUILD_CORE 1 Modules/_threadmodule.c Py_BUILD_CORE 1 Modules/_tracemalloc.c Py_BUILD_CORE 1 Modules/_weakref.c Py_BUILD_CORE 1 @@ -262,8 +262,8 @@ Modules/expat/xmlparse.c XML_POOR_ENTROPY 1 Modules/_dbmmodule.c HAVE_GDBM_DASH_NDBM_H 1 # others -Modules/sre_lib.h LOCAL(type) static inline type -Modules/sre_lib.h SRE(F) sre_ucs2_##F +Modules/_sre/sre_lib.h LOCAL(type) static inline type +Modules/_sre/sre_lib.h SRE(F) sre_ucs2_##F Objects/stringlib/codecs.h STRINGLIB_IS_UNICODE 1 Include/internal/pycore_bitutils.h _Py__has_builtin(B) 0 diff --git a/configure b/configure index bb1aa75..a06d4c9 100755 --- a/configure +++ b/configure @@ -20557,6 +20557,7 @@ SRCDIRS="\ Modules/_multiprocessing \ Modules/_sha3 \ Modules/_sqlite \ + Modules/_sre \ Modules/_xxtestfuzz \ Modules/cjkcodecs \ Modules/expat \ diff --git a/configure.ac b/configure.ac index 9f0a50e..abcd379 100644 --- a/configure.ac +++ b/configure.ac @@ -5992,6 +5992,7 @@ SRCDIRS="\ Modules/_multiprocessing \ Modules/_sha3 \ Modules/_sqlite \ + Modules/_sre \ Modules/_xxtestfuzz \ Modules/cjkcodecs \ Modules/expat \ -- cgit v0.12