summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2008-08-19 17:56:33 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2008-08-19 17:56:33 (GMT)
commitfd036451bf0e0ade8783e21df801abf7be96d020 (patch)
treee70ff65a9e641d8e790bc091f0dc2507baf344ca /Modules
parent3ad7ba10a20827b24d4b1aa9dd49474db8affbdd (diff)
downloadcpython-fd036451bf0e0ade8783e21df801abf7be96d020.zip
cpython-fd036451bf0e0ade8783e21df801abf7be96d020.tar.gz
cpython-fd036451bf0e0ade8783e21df801abf7be96d020.tar.bz2
#2834: Change re module semantics, so that str and bytes mixing is forbidden,
and str (unicode) patterns get full unicode matching by default. The re.ASCII flag is also introduced to ask for ASCII matching instead.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_sre.c27
-rw-r--r--Modules/sre.h1
2 files changed, 25 insertions, 3 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 64fc513..2a54d8e 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -1691,7 +1691,7 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
/* get pointer to string buffer */
view.len = -1;
buffer = Py_TYPE(string)->tp_as_buffer;
- if (!buffer || !buffer->bf_getbuffer ||
+ if (!buffer || !buffer->bf_getbuffer ||
(*buffer->bf_getbuffer)(string, &view, PyBUF_SIMPLE) < 0) {
PyErr_SetString(PyExc_TypeError, "expected string or buffer");
return NULL;
@@ -1717,7 +1717,7 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
if (PyBytes_Check(string) || bytes == size)
charsize = 1;
#if defined(HAVE_UNICODE)
- else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
+ else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
charsize = sizeof(Py_UNICODE);
#endif
else {
@@ -1729,7 +1729,7 @@ getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
*p_charsize = charsize;
if (ptr == NULL) {
- PyErr_SetString(PyExc_ValueError,
+ PyErr_SetString(PyExc_ValueError,
"Buffer is NULL");
}
return ptr;
@@ -1754,6 +1754,17 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
if (!ptr)
return NULL;
+ if (charsize == 1 && pattern->charsize > 1) {
+ PyErr_SetString(PyExc_TypeError,
+ "can't use a string pattern on a bytes-like object");
+ return NULL;
+ }
+ if (charsize > 1 && pattern->charsize == 1) {
+ PyErr_SetString(PyExc_TypeError,
+ "can't use a bytes pattern on a string-like object");
+ return NULL;
+ }
+
/* adjust boundaries */
if (start < 0)
start = 0;
@@ -2682,6 +2693,16 @@ _compile(PyObject* self_, PyObject* args)
return NULL;
}
+ if (pattern == Py_None)
+ self->charsize = -1;
+ else {
+ Py_ssize_t p_length;
+ if (!getstring(pattern, &p_length, &self->charsize)) {
+ PyObject_DEL(self);
+ return NULL;
+ }
+ }
+
Py_INCREF(pattern);
self->pattern = pattern;
diff --git a/Modules/sre.h b/Modules/sre.h
index d4af05c..518c11d 100644
--- a/Modules/sre.h
+++ b/Modules/sre.h
@@ -30,6 +30,7 @@ typedef struct {
PyObject* pattern; /* pattern source (or None) */
int flags; /* flags used when compiling pattern source */
PyObject *weakreflist; /* List of weak references */
+ int charsize; /* pattern charsize (or -1) */
/* pattern code */
Py_ssize_t codesize;
SRE_CODE code[1];