summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2007-05-04 00:41:39 (GMT)
committerGuido van Rossum <guido@python.org>2007-05-04 00:41:39 (GMT)
commitf15a29f975bbdef6de0aa19a19b176d1baf8f5ab (patch)
tree60f4f72289129eaa808e05f2b7c7fb7bde077371 /Objects
parentbae5cedb8d41edc20bea54b8bff0c7f835de8043 (diff)
downloadcpython-f15a29f975bbdef6de0aa19a19b176d1baf8f5ab.zip
cpython-f15a29f975bbdef6de0aa19a19b176d1baf8f5ab.tar.gz
cpython-f15a29f975bbdef6de0aa19a19b176d1baf8f5ab.tar.bz2
More coding by random modification.
Encoding now return bytes instead of str8. eval(), exec(), compile() now accept unicode or bytes.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/bytesobject.c26
-rw-r--r--Objects/moduleobject.c7
-rw-r--r--Objects/object.c11
-rw-r--r--Objects/stringobject.c4
-rw-r--r--Objects/unicodeobject.c116
5 files changed, 96 insertions, 68 deletions
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 213dbfc..75b7939 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -79,6 +79,7 @@ PyObject *
PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
{
PyBytesObject *new;
+ int alloc;
assert(size >= 0);
@@ -86,18 +87,23 @@ PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
if (new == NULL)
return NULL;
- if (size == 0)
+ if (size == 0) {
new->ob_bytes = NULL;
+ alloc = 0;
+ }
else {
- new->ob_bytes = PyMem_Malloc(size);
+ alloc = size + 1;
+ new->ob_bytes = PyMem_Malloc(alloc);
if (new->ob_bytes == NULL) {
Py_DECREF(new);
return NULL;
}
if (bytes != NULL)
memcpy(new->ob_bytes, bytes, size);
+ new->ob_bytes[size] = '\0'; /* Trailing null byte */
}
- new->ob_size = new->ob_alloc = size;
+ new->ob_size = size;
+ new->ob_alloc = alloc;
return (PyObject *)new;
}
@@ -134,7 +140,7 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
/* Major downsize; resize down to exact size */
alloc = size;
}
- else if (size <= alloc) {
+ else if (size < alloc) {
/* Within allocated size; quick exit */
((PyBytesObject *)self)->ob_size = size;
return 0;
@@ -147,6 +153,8 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
/* Major upsize; resize up to exact size */
alloc = size;
}
+ if (alloc <= size)
+ alloc = size + 1;
sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
if (sval == NULL) {
@@ -158,6 +166,8 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size)
((PyBytesObject *)self)->ob_size = size;
((PyBytesObject *)self)->ob_alloc = alloc;
+ ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */
+
return 0;
}
@@ -221,7 +231,7 @@ bytes_iconcat(PyBytesObject *self, PyObject *other)
size = mysize + osize;
if (size < 0)
return PyErr_NoMemory();
- if (size <= self->ob_alloc)
+ if (size < self->ob_alloc)
self->ob_size = size;
else if (PyBytes_Resize((PyObject *)self, size) < 0)
return NULL;
@@ -243,7 +253,7 @@ bytes_repeat(PyBytesObject *self, Py_ssize_t count)
size = mysize * count;
if (count != 0 && size / count != mysize)
return PyErr_NoMemory();
- result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
+ result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size);
if (result != NULL && size != 0) {
if (mysize == 1)
memset(result->ob_bytes, self->ob_bytes[0], size);
@@ -268,7 +278,7 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
size = mysize * count;
if (count != 0 && size / count != mysize)
return PyErr_NoMemory();
- if (size <= self->ob_alloc)
+ if (size < self->ob_alloc)
self->ob_size = size;
else if (PyBytes_Resize((PyObject *)self, size) < 0)
return NULL;
@@ -703,7 +713,7 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
}
bytes = PyString_AS_STRING(encoded);
size = PyString_GET_SIZE(encoded);
- if (size <= self->ob_alloc)
+ if (size < self->ob_alloc)
self->ob_size = size;
else if (PyBytes_Resize((PyObject *)self, size) < 0) {
Py_DECREF(encoded);
diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c
index 18914d8..82eabf1 100644
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -72,8 +72,11 @@ PyModule_GetName(PyObject *m)
PyErr_SetString(PyExc_SystemError, "nameless module");
return NULL;
}
- if (PyUnicode_Check(nameobj))
- nameobj = _PyUnicode_AsDefaultEncodedString(nameobj, "replace");
+ if (PyUnicode_Check(nameobj)) {
+ nameobj = _PyUnicode_AsDefaultEncodedString(nameobj, NULL);
+ if (nameobj == NULL)
+ return NULL;
+ }
return PyString_AsString(nameobj);
}
diff --git a/Objects/object.c b/Objects/object.c
index ada1d1e..81f5669 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -422,7 +422,8 @@ PyObject_Str(PyObject *v)
return NULL;
if (PyUnicode_Check(res)) {
PyObject* str;
- str = PyUnicode_AsEncodedString(res, NULL, NULL);
+ str = _PyUnicode_AsDefaultEncodedString(res, NULL);
+ Py_XINCREF(str);
Py_DECREF(res);
if (str)
res = str;
@@ -929,12 +930,12 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value)
PyTypeObject *tp = v->ob_type;
int err;
- if (!PyString_Check(name)){
+ if (!PyString_Check(name)) {
/* The Unicode to string conversion is done here because the
existing tp_setattro slots expect a string object as name
and we wouldn't want to break those. */
if (PyUnicode_Check(name)) {
- name = PyUnicode_AsEncodedString(name, NULL, NULL);
+ name = _PyUnicode_AsDefaultEncodedString(name, NULL);
if (name == NULL)
return -1;
}
@@ -946,8 +947,7 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value)
return -1;
}
}
- else
- Py_INCREF(name);
+ Py_INCREF(name);
PyString_InternInPlace(&name);
if (tp->tp_setattro != NULL) {
@@ -961,6 +961,7 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value)
return err;
}
Py_DECREF(name);
+ assert(name->ob_refcnt >= 1);
if (tp->tp_getattr == NULL && tp->tp_getattro == NULL)
PyErr_Format(PyExc_TypeError,
"'%.100s' object has no attributes "
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index f74c5dc..2ebaca8 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -3181,9 +3181,9 @@ string_encode(PyStringObject *self, PyObject *args)
v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
if (v == NULL)
goto onError;
- if (!PyString_Check(v) && !PyUnicode_Check(v)) {
+ if (!PyBytes_Check(v)) {
PyErr_Format(PyExc_TypeError,
- "encoder did not return a string/unicode object "
+ "[str8] encoder did not return a bytes object "
"(type=%.400s)",
v->ob_type->tp_name);
Py_DECREF(v);
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d4a7e7e..db3f9c4 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -104,13 +104,9 @@ static PyUnicodeObject *unicode_empty;
static PyUnicodeObject *unicode_latin1[256];
/* Default encoding to use and assume when NULL is passed as encoding
- parameter; it is initialized by _PyUnicode_Init().
-
- Always use the PyUnicode_SetDefaultEncoding() and
- PyUnicode_GetDefaultEncoding() APIs to access this global.
-
-*/
-static char unicode_default_encoding[100];
+ parameter; it is fixed to "utf-8". Always use the
+ PyUnicode_GetDefaultEncoding() API to access this global. */
+static const char unicode_default_encoding[] = "utf-8";
Py_UNICODE
PyUnicode_GetMax(void)
@@ -711,10 +707,19 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
v = PyCodec_Encode(unicode, encoding, errors);
if (v == NULL)
goto onError;
- if (!PyString_Check(v)) {
+ if (!PyBytes_Check(v)) {
+ if (PyString_Check(v)) {
+ /* Old codec, turn it into bytes */
+ PyObject *b = PyBytes_FromObject(v);
+ Py_DECREF(v);
+ return b;
+ }
PyErr_Format(PyExc_TypeError,
- "encoder did not return a string object (type=%.400s)",
- v->ob_type->tp_name);
+ "encoder did not return a bytes object "
+ "(type=%.400s, encoding=%.20s, errors=%.20s)",
+ v->ob_type->tp_name,
+ encoding ? encoding : "NULL",
+ errors ? errors : "NULL");
Py_DECREF(v);
goto onError;
}
@@ -728,12 +733,28 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
const char *errors)
{
PyObject *v = ((PyUnicodeObject *)unicode)->defenc;
-
+ PyObject *b;
if (v)
return v;
- v = PyUnicode_AsEncodedString(unicode, NULL, errors);
- if (v && errors == NULL)
+ if (errors != NULL)
+ Py_FatalError("non-NULL encoding in _PyUnicode_AsDefaultEncodedString");
+ if (errors == NULL) {
+ b = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+ PyUnicode_GET_SIZE(unicode),
+ NULL);
+ }
+ else {
+ b = PyUnicode_AsEncodedString(unicode, NULL, errors);
+ }
+ if (!b)
+ return NULL;
+ v = PyString_FromStringAndSize(PyBytes_AsString(b),
+ PyBytes_Size(b));
+ Py_DECREF(b);
+ if (!errors) {
+ Py_XINCREF(v);
((PyUnicodeObject *)unicode)->defenc = v;
+ }
return v;
}
@@ -768,21 +789,13 @@ const char *PyUnicode_GetDefaultEncoding(void)
int PyUnicode_SetDefaultEncoding(const char *encoding)
{
- PyObject *v;
-
- /* Make sure the encoding is valid. As side effect, this also
- loads the encoding into the codec registry cache. */
- v = _PyCodec_Lookup(encoding);
- if (v == NULL)
- goto onError;
- Py_DECREF(v);
- strncpy(unicode_default_encoding,
- encoding,
- sizeof(unicode_default_encoding));
+ if (strcmp(encoding, unicode_default_encoding) != 0) {
+ PyErr_Format(PyExc_ValueError,
+ "Can only set default encoding to %s",
+ unicode_default_encoding);
+ return -1;
+ }
return 0;
-
- onError:
- return -1;
}
/* error handling callback helper:
@@ -1429,10 +1442,10 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s,
nallocated = size * 4;
if (nallocated / 4 != size) /* overflow! */
return PyErr_NoMemory();
- v = PyString_FromStringAndSize(NULL, nallocated);
+ v = PyBytes_FromStringAndSize(NULL, nallocated);
if (v == NULL)
return NULL;
- p = PyString_AS_STRING(v);
+ p = PyBytes_AS_STRING(v);
}
for (i = 0; i < size;) {
@@ -1480,13 +1493,13 @@ encodeUCS4:
/* This was stack allocated. */
nneeded = p - stackbuf;
assert(nneeded <= nallocated);
- v = PyString_FromStringAndSize(stackbuf, nneeded);
+ v = PyBytes_FromStringAndSize(stackbuf, nneeded);
}
else {
/* Cut back to size actually needed. */
- nneeded = p - PyString_AS_STRING(v);
+ nneeded = p - PyBytes_AS_STRING(v);
assert(nneeded <= nallocated);
- _PyString_Resize(&v, nneeded);
+ PyBytes_Resize(v, nneeded);
}
return v;
@@ -2588,12 +2601,12 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
/* allocate enough for a simple encoding without
replacements, if we need more, we'll resize */
- res = PyString_FromStringAndSize(NULL, size);
+ res = PyBytes_FromStringAndSize(NULL, size);
if (res == NULL)
goto onError;
if (size == 0)
return res;
- str = PyString_AS_STRING(res);
+ str = PyBytes_AS_STRING(res);
ressize = size;
while (p<endp) {
@@ -2643,7 +2656,7 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
p = collend;
break;
case 4: /* xmlcharrefreplace */
- respos = str-PyString_AS_STRING(res);
+ respos = str - PyBytes_AS_STRING(res);
/* determine replacement size (temporarily (mis)uses p) */
for (p = collstart, repsize = 0; p < collend; ++p) {
if (*p<10)
@@ -2670,9 +2683,9 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
if (requiredsize > ressize) {
if (requiredsize<2*ressize)
requiredsize = 2*ressize;
- if (_PyString_Resize(&res, requiredsize))
+ if (PyBytes_Resize(res, requiredsize))
goto onError;
- str = PyString_AS_STRING(res) + respos;
+ str = PyBytes_AS_STRING(res) + respos;
ressize = requiredsize;
}
/* generate replacement (temporarily (mis)uses p) */
@@ -2690,17 +2703,17 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
/* need more space? (at least enough for what we
have+the replacement+the rest of the string, so
we won't have to check space for encodable characters) */
- respos = str-PyString_AS_STRING(res);
+ respos = str - PyBytes_AS_STRING(res);
repsize = PyUnicode_GET_SIZE(repunicode);
requiredsize = respos+repsize+(endp-collend);
if (requiredsize > ressize) {
if (requiredsize<2*ressize)
requiredsize = 2*ressize;
- if (_PyString_Resize(&res, requiredsize)) {
+ if (PyBytes_Resize(res, requiredsize)) {
Py_DECREF(repunicode);
goto onError;
}
- str = PyString_AS_STRING(res) + respos;
+ str = PyBytes_AS_STRING(res) + respos;
ressize = requiredsize;
}
/* check if there is anything unencodable in the replacement
@@ -2721,10 +2734,10 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
}
}
/* Resize if we allocated to much */
- respos = str-PyString_AS_STRING(res);
+ respos = str - PyBytes_AS_STRING(res);
if (respos<ressize)
/* If this falls res will be NULL */
- _PyString_Resize(&res, respos);
+ PyBytes_Resize(res, respos);
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return res;
@@ -2979,20 +2992,20 @@ static int encode_mbcs(PyObject **repr,
if (*repr == NULL) {
/* Create string object */
- *repr = PyString_FromStringAndSize(NULL, mbcssize);
+ *repr = PyBytes_FromStringAndSize(NULL, mbcssize);
if (*repr == NULL)
return -1;
}
else {
/* Extend string object */
- n = PyString_Size(*repr);
- if (_PyString_Resize(repr, n + mbcssize) < 0)
+ n = PyBytes_Size(*repr);
+ if (PyBytes_Resize(*repr, n + mbcssize) < 0)
return -1;
}
/* Do the conversion */
if (size > 0) {
- char *s = PyString_AS_STRING(*repr) + n;
+ char *s = PyBytes_AS_STRING(*repr) + n;
if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) {
PyErr_SetFromWindowsErrWithFilename(0, NULL);
return -1;
@@ -5630,9 +5643,9 @@ unicode_encode(PyUnicodeObject *self, PyObject *args)
v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors);
if (v == NULL)
goto onError;
- if (!PyString_Check(v) && !PyUnicode_Check(v)) {
+ if (!PyBytes_Check(v)) {
PyErr_Format(PyExc_TypeError,
- "encoder did not return a string/unicode object "
+ "encoder did not return a bytes object "
"(type=%.400s)",
v->ob_type->tp_name);
Py_DECREF(v);
@@ -6797,9 +6810,11 @@ unicode_splitlines(PyUnicodeObject *self, PyObject *args)
}
static
-PyObject *unicode_str(PyUnicodeObject *self)
+PyObject *unicode_str(PyObject *self)
{
- return PyUnicode_AsEncodedString((PyObject *)self, NULL, NULL);
+ PyObject *res = _PyUnicode_AsDefaultEncodedString(self, NULL);
+ Py_XINCREF(res);
+ return res;
}
PyDoc_STRVAR(swapcase__doc__,
@@ -8021,7 +8036,6 @@ void _PyUnicode_Init(void)
if (!unicode_empty)
return;
- strcpy(unicode_default_encoding, "ascii");
for (i = 0; i < 256; i++)
unicode_latin1[i] = NULL;
if (PyType_Ready(&PyUnicode_Type) < 0)