summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2007-05-04 19:28:21 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2007-05-04 19:28:21 (GMT)
commit612344f12774cbbefd735d9fcbfb2001fe187362 (patch)
tree3be6051c7e4ac7fe3a93372fa3d86bce06072e1f
parentc2b87a6dff1edade6542a484cb9b9419b254c1ed (diff)
downloadcpython-612344f12774cbbefd735d9fcbfb2001fe187362.zip
cpython-612344f12774cbbefd735d9fcbfb2001fe187362.tar.gz
cpython-612344f12774cbbefd735d9fcbfb2001fe187362.tar.bz2
Change UnicodeDecodeError objects so that the 'object' attribute
is a bytes object. Add 'y' and 'y#' format specifiers that work like 's' and 's#' but only accept bytes objects.
-rw-r--r--Doc/api/utilities.tex12
-rw-r--r--Doc/ext/extending.tex2
-rw-r--r--Objects/exceptions.c32
-rw-r--r--Python/getargs.c27
-rw-r--r--Python/modsupport.c33
5 files changed, 98 insertions, 8 deletions
diff --git a/Doc/api/utilities.tex b/Doc/api/utilities.tex
index 93e3796..fb9c909 100644
--- a/Doc/api/utilities.tex
+++ b/Doc/api/utilities.tex
@@ -424,6 +424,18 @@ whose address should be passed.
compatible objects pass back a reference to the raw internal data
representation.
+ \item[\samp{y} (bytes object)
+ {[const char *]}]
+ This variant on \samp{s} convert a Python bytes object to a C pointer to a
+ character string. The bytes object must not contain embedded NUL bytes;
+ if it does, a \exception{TypeError} exception is raised.
+
+ \item[\samp{y\#} (bytes object)
+ {[const char *, int]}]
+ This variant on \samp{s#} stores into two C variables, the first one
+ a pointer to a character string, the second one its length. This only
+ accepts bytes objects.
+
\item[\samp{z} (string or \code{None}) {[const char *]}]
Like \samp{s}, but the Python object may also be \code{None}, in
which case the C pointer is set to \NULL.
diff --git a/Doc/ext/extending.tex b/Doc/ext/extending.tex
index 2af88b5..1f3e2d5 100644
--- a/Doc/ext/extending.tex
+++ b/Doc/ext/extending.tex
@@ -802,8 +802,10 @@ Examples (to the left the call, to the right the resulting Python value):
Py_BuildValue("i", 123) 123
Py_BuildValue("iii", 123, 456, 789) (123, 456, 789)
Py_BuildValue("s", "hello") 'hello'
+ Py_BuildValue("y", "hello") b'hello'
Py_BuildValue("ss", "hello", "world") ('hello', 'world')
Py_BuildValue("s#", "hello", 4) 'hell'
+ Py_BuildValue("y#", "hello", 4) b'hell'
Py_BuildValue("()") ()
Py_BuildValue("(i)", 123) (123,)
Py_BuildValue("(ii)", 123, 456) (123, 456)
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index e30e9df..1096bac 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -1243,6 +1243,22 @@ set_string(PyObject **attr, const char *value)
static PyObject *
+get_bytes(PyObject *attr, const char *name)
+{
+ if (!attr) {
+ PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
+ return NULL;
+ }
+
+ if (!PyBytes_Check(attr)) {
+ PyErr_Format(PyExc_TypeError, "%.200s attribute must be bytes", name);
+ return NULL;
+ }
+ Py_INCREF(attr);
+ return attr;
+}
+
+static PyObject *
get_unicode(PyObject *attr, const char *name)
{
if (!attr) {
@@ -1280,7 +1296,7 @@ PyUnicodeEncodeError_GetObject(PyObject *exc)
PyObject *
PyUnicodeDecodeError_GetObject(PyObject *exc)
{
- return get_string(((PyUnicodeErrorObject *)exc)->object, "object");
+ return get_bytes(((PyUnicodeErrorObject *)exc)->object, "object");
}
PyObject *
@@ -1314,10 +1330,10 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
{
if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) {
Py_ssize_t size;
- PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
+ PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object,
"object");
if (!obj) return -1;
- size = PyString_GET_SIZE(obj);
+ size = PyBytes_GET_SIZE(obj);
if (*start<0)
*start = 0;
if (*start>=size)
@@ -1382,10 +1398,10 @@ PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
{
if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) {
Py_ssize_t size;
- PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
+ PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object,
"object");
if (!obj) return -1;
- size = PyString_GET_SIZE(obj);
+ size = PyBytes_GET_SIZE(obj);
if (*end<1)
*end = 1;
if (*end>size)
@@ -1629,7 +1645,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
return -1;
return UnicodeError_init((PyUnicodeErrorObject *)self, args,
- kwds, &PyString_Type);
+ kwds, &PyBytes_Type);
}
static PyObject *
@@ -1648,7 +1664,7 @@ UnicodeDecodeError_str(PyObject *self)
/* FromFormat does not support %02x, so format that separately */
char byte[4];
PyOS_snprintf(byte, sizeof(byte), "%02x",
- ((int)PyString_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
+ ((int)PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
return PyString_FromFormat(
"'%.400s' codec can't decode byte 0x%s in position %zd: %.400s",
PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
@@ -1689,7 +1705,7 @@ PyUnicodeDecodeError_Create(
assert(length < INT_MAX);
assert(start < INT_MAX);
assert(end < INT_MAX);
- return PyObject_CallFunction(PyExc_UnicodeDecodeError, "ss#nns",
+ return PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nns",
encoding, object, length, start, end, reason);
}
diff --git a/Python/getargs.c b/Python/getargs.c
index f7a6604..8331a18 100644
--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -819,6 +819,32 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
break;
}
+ case 'y': {/* bytes */
+ if (*format == '#') {
+ void **p = (void **)va_arg(*p_va, char **);
+ FETCH_SIZE;
+
+ if (PyBytes_Check(arg)) {
+ *p = PyBytes_AS_STRING(arg);
+ STORE_SIZE(PyBytes_GET_SIZE(arg));
+ }
+ else
+ return converterr("bytes", arg, msgbuf, bufsize);
+ format++;
+ } else {
+ char **p = va_arg(*p_va, char **);
+
+ if (PyBytes_Check(arg))
+ *p = PyBytes_AS_STRING(arg);
+ else
+ return converterr("bytes", arg, msgbuf, bufsize);
+ if ((Py_ssize_t)strlen(*p) != PyBytes_Size(arg))
+ return converterr("bytes without null bytes",
+ arg, msgbuf, bufsize);
+ }
+ break;
+ }
+
case 'z': {/* string, may be NULL (None) */
if (*format == '#') { /* any buffer-like object */
void **p = (void **)va_arg(*p_va, char **);
@@ -1595,6 +1621,7 @@ skipitem(const char **p_format, va_list *p_va, int flags)
case 's': /* string */
case 'z': /* string or None */
+ case 'y': /* bytes */
case 'u': /* unicode string */
case 't': /* buffer, read-only */
case 'w': /* buffer, read-write */
diff --git a/Python/modsupport.c b/Python/modsupport.c
index af774f0..8f600dc 100644
--- a/Python/modsupport.c
+++ b/Python/modsupport.c
@@ -424,6 +424,39 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
return v;
}
+ case 'y':
+ {
+ PyObject *v;
+ char *str = va_arg(*p_va, char *);
+ Py_ssize_t n;
+ if (**p_format == '#') {
+ ++*p_format;
+ if (flags & FLAG_SIZE_T)
+ n = va_arg(*p_va, Py_ssize_t);
+ else
+ n = va_arg(*p_va, int);
+ }
+ else
+ n = -1;
+ if (str == NULL) {
+ v = Py_None;
+ Py_INCREF(v);
+ }
+ else {
+ if (n < 0) {
+ size_t m = strlen(str);
+ if (m > PY_SSIZE_T_MAX) {
+ PyErr_SetString(PyExc_OverflowError,
+ "string too long for Python bytes");
+ return NULL;
+ }
+ n = (Py_ssize_t)m;
+ }
+ v = PyBytes_FromStringAndSize(str, n);
+ }
+ return v;
+ }
+
case 'N':
case 'S':
case 'O':