diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2008-08-19 17:56:33 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2008-08-19 17:56:33 (GMT) |
commit | fd036451bf0e0ade8783e21df801abf7be96d020 (patch) | |
tree | e70ff65a9e641d8e790bc091f0dc2507baf344ca /Modules | |
parent | 3ad7ba10a20827b24d4b1aa9dd49474db8affbdd (diff) | |
download | cpython-fd036451bf0e0ade8783e21df801abf7be96d020.zip cpython-fd036451bf0e0ade8783e21df801abf7be96d020.tar.gz cpython-fd036451bf0e0ade8783e21df801abf7be96d020.tar.bz2 |
#2834: Change re module semantics, so that str and bytes mixing is forbidden,
and str (unicode) patterns get full unicode matching by default. The re.ASCII
flag is also introduced to ask for ASCII matching instead.
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_sre.c | 27 | ||||
-rw-r--r-- | Modules/sre.h | 1 |
2 files changed, 25 insertions, 3 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c index 64fc513..2a54d8e 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -1691,7 +1691,7 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize) /* get pointer to string buffer */ view.len = -1; buffer = Py_TYPE(string)->tp_as_buffer; - if (!buffer || !buffer->bf_getbuffer || + if (!buffer || !buffer->bf_getbuffer || (*buffer->bf_getbuffer)(string, &view, PyBUF_SIMPLE) < 0) { PyErr_SetString(PyExc_TypeError, "expected string or buffer"); return NULL; @@ -1717,7 +1717,7 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize) if (PyBytes_Check(string) || bytes == size) charsize = 1; #if defined(HAVE_UNICODE) - else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE))) + else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE))) charsize = sizeof(Py_UNICODE); #endif else { @@ -1729,7 +1729,7 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize) *p_charsize = charsize; if (ptr == NULL) { - PyErr_SetString(PyExc_ValueError, + PyErr_SetString(PyExc_ValueError, "Buffer is NULL"); } return ptr; @@ -1754,6 +1754,17 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, if (!ptr) return NULL; + if (charsize == 1 && pattern->charsize > 1) { + PyErr_SetString(PyExc_TypeError, + "can't use a string pattern on a bytes-like object"); + return NULL; + } + if (charsize > 1 && pattern->charsize == 1) { + PyErr_SetString(PyExc_TypeError, + "can't use a bytes pattern on a string-like object"); + return NULL; + } + /* adjust boundaries */ if (start < 0) start = 0; @@ -2682,6 +2693,16 @@ _compile(PyObject* self_, PyObject* args) return NULL; } + if (pattern == Py_None) + self->charsize = -1; + else { + Py_ssize_t p_length; + if (!getstring(pattern, &p_length, &self->charsize)) { + PyObject_DEL(self); + return NULL; + } + } + Py_INCREF(pattern); self->pattern = pattern; diff --git a/Modules/sre.h b/Modules/sre.h index d4af05c..518c11d 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -30,6 +30,7 @@ typedef struct { PyObject* pattern; /* pattern source (or None) */ int flags; /* flags used when compiling pattern source */ PyObject *weakreflist; /* List of weak references */ + int charsize; /* pattern charsize (or -1) */ /* pattern code */ Py_ssize_t codesize; SRE_CODE code[1]; |