diff options
author | Guido van Rossum <guido@python.org> | 2007-05-04 00:41:39 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2007-05-04 00:41:39 (GMT) |
commit | f15a29f975bbdef6de0aa19a19b176d1baf8f5ab (patch) | |
tree | 60f4f72289129eaa808e05f2b7c7fb7bde077371 | |
parent | bae5cedb8d41edc20bea54b8bff0c7f835de8043 (diff) | |
download | cpython-f15a29f975bbdef6de0aa19a19b176d1baf8f5ab.zip cpython-f15a29f975bbdef6de0aa19a19b176d1baf8f5ab.tar.gz cpython-f15a29f975bbdef6de0aa19a19b176d1baf8f5ab.tar.bz2 |
More coding by random modification.
Encoding now return bytes instead of str8.
eval(), exec(), compile() now accept unicode or bytes.
-rw-r--r-- | Lib/test/test_builtin.py | 19 | ||||
-rw-r--r-- | Objects/bytesobject.c | 26 | ||||
-rw-r--r-- | Objects/moduleobject.c | 7 | ||||
-rw-r--r-- | Objects/object.c | 11 | ||||
-rw-r--r-- | Objects/stringobject.c | 4 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 116 | ||||
-rw-r--r-- | Parser/tokenizer.c | 6 | ||||
-rw-r--r-- | Python/ast.c | 5 | ||||
-rw-r--r-- | Python/bltinmodule.c | 94 | ||||
-rw-r--r-- | Python/getargs.c | 24 | ||||
-rw-r--r-- | Python/import.c | 22 | ||||
-rw-r--r-- | Python/marshal.c | 6 |
12 files changed, 185 insertions, 155 deletions
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 9233871..eae28b0 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -208,8 +208,8 @@ class BuiltinTest(unittest.TestCase): def test_compile(self): compile('print(1)\n', '', 'exec') - bom = '\xef\xbb\xbf' - compile((bom + 'print(1)\n').encode("latin-1"), '', 'exec') +## bom = b'\xef\xbb\xbf' +## compile(bom + b'print(1)\n', '', 'exec') compile(source='pass', filename='?', mode='exec') compile(dont_inherit=0, filename='tmp', source='0', mode='eval') compile('pass', '?', dont_inherit=1, mode='exec') @@ -220,7 +220,7 @@ class BuiltinTest(unittest.TestCase): self.assertRaises(TypeError, compile, 'pass', '?', 'exec', mode='eval', source='0', filename='tmp') if have_unicode: - compile(str(b'print(u"\xc3\xa5")\n', 'utf8'), '', 'exec') + compile('print(u"\xe5")\n', '', 'exec') self.assertRaises(TypeError, compile, chr(0), 'f', 'exec') self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad') @@ -338,10 +338,9 @@ class BuiltinTest(unittest.TestCase): self.assertEqual(eval(str('a'), globals, locals), 1) self.assertEqual(eval(str('b'), globals, locals), 200) self.assertEqual(eval(str('c'), globals, locals), 300) - bom = '\xef\xbb\xbf' - self.assertEqual(eval((bom + 'a').encode("latin-1"), globals, locals), 1) - self.assertEqual(eval(str(b'u"\xc3\xa5"', 'utf8'), globals), - str(b'\xc3\xa5', 'utf8')) +## bom = b'\xef\xbb\xbf' +## self.assertEqual(eval(bom + b'a', globals, locals), 1) + self.assertEqual(eval('u"\xe5"', globals), u"\xe5") self.assertRaises(TypeError, eval) self.assertRaises(TypeError, eval, ()) @@ -675,16 +674,14 @@ class BuiltinTest(unittest.TestCase): self.assertRaises(TypeError, getattr, sys, 1) self.assertRaises(TypeError, getattr, sys, 1, "foo") self.assertRaises(TypeError, getattr) - if have_unicode: - self.assertRaises(UnicodeError, getattr, sys, chr(sys.maxunicode)) + self.assertRaises(AttributeError, getattr, sys, chr(sys.maxunicode)) def test_hasattr(self): import sys self.assert_(hasattr(sys, 'stdout')) self.assertRaises(TypeError, hasattr, sys, 1) self.assertRaises(TypeError, hasattr) - if have_unicode: - self.assertRaises(UnicodeError, hasattr, sys, chr(sys.maxunicode)) + self.assertEqual(False, hasattr(sys, chr(sys.maxunicode))) def test_hash(self): hash(None) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 213dbfc..75b7939 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -79,6 +79,7 @@ PyObject * PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size) { PyBytesObject *new; + int alloc; assert(size >= 0); @@ -86,18 +87,23 @@ PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size) if (new == NULL) return NULL; - if (size == 0) + if (size == 0) { new->ob_bytes = NULL; + alloc = 0; + } else { - new->ob_bytes = PyMem_Malloc(size); + alloc = size + 1; + new->ob_bytes = PyMem_Malloc(alloc); if (new->ob_bytes == NULL) { Py_DECREF(new); return NULL; } if (bytes != NULL) memcpy(new->ob_bytes, bytes, size); + new->ob_bytes[size] = '\0'; /* Trailing null byte */ } - new->ob_size = new->ob_alloc = size; + new->ob_size = size; + new->ob_alloc = alloc; return (PyObject *)new; } @@ -134,7 +140,7 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size) /* Major downsize; resize down to exact size */ alloc = size; } - else if (size <= alloc) { + else if (size < alloc) { /* Within allocated size; quick exit */ ((PyBytesObject *)self)->ob_size = size; return 0; @@ -147,6 +153,8 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size) /* Major upsize; resize up to exact size */ alloc = size; } + if (alloc <= size) + alloc = size + 1; sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc); if (sval == NULL) { @@ -158,6 +166,8 @@ PyBytes_Resize(PyObject *self, Py_ssize_t size) ((PyBytesObject *)self)->ob_size = size; ((PyBytesObject *)self)->ob_alloc = alloc; + ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null byte */ + return 0; } @@ -221,7 +231,7 @@ bytes_iconcat(PyBytesObject *self, PyObject *other) size = mysize + osize; if (size < 0) return PyErr_NoMemory(); - if (size <= self->ob_alloc) + if (size < self->ob_alloc) self->ob_size = size; else if (PyBytes_Resize((PyObject *)self, size) < 0) return NULL; @@ -243,7 +253,7 @@ bytes_repeat(PyBytesObject *self, Py_ssize_t count) size = mysize * count; if (count != 0 && size / count != mysize) return PyErr_NoMemory(); - result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size); + result = (PyBytesObject *)PyBytes_FromStringAndSize(NULL, size); if (result != NULL && size != 0) { if (mysize == 1) memset(result->ob_bytes, self->ob_bytes[0], size); @@ -268,7 +278,7 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count) size = mysize * count; if (count != 0 && size / count != mysize) return PyErr_NoMemory(); - if (size <= self->ob_alloc) + if (size < self->ob_alloc) self->ob_size = size; else if (PyBytes_Resize((PyObject *)self, size) < 0) return NULL; @@ -703,7 +713,7 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds) } bytes = PyString_AS_STRING(encoded); size = PyString_GET_SIZE(encoded); - if (size <= self->ob_alloc) + if (size < self->ob_alloc) self->ob_size = size; else if (PyBytes_Resize((PyObject *)self, size) < 0) { Py_DECREF(encoded); diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index 18914d8..82eabf1 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -72,8 +72,11 @@ PyModule_GetName(PyObject *m) PyErr_SetString(PyExc_SystemError, "nameless module"); return NULL; } - if (PyUnicode_Check(nameobj)) - nameobj = _PyUnicode_AsDefaultEncodedString(nameobj, "replace"); + if (PyUnicode_Check(nameobj)) { + nameobj = _PyUnicode_AsDefaultEncodedString(nameobj, NULL); + if (nameobj == NULL) + return NULL; + } return PyString_AsString(nameobj); } diff --git a/Objects/object.c b/Objects/object.c index ada1d1e..81f5669 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -422,7 +422,8 @@ PyObject_Str(PyObject *v) return NULL; if (PyUnicode_Check(res)) { PyObject* str; - str = PyUnicode_AsEncodedString(res, NULL, NULL); + str = _PyUnicode_AsDefaultEncodedString(res, NULL); + Py_XINCREF(str); Py_DECREF(res); if (str) res = str; @@ -929,12 +930,12 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) PyTypeObject *tp = v->ob_type; int err; - if (!PyString_Check(name)){ + if (!PyString_Check(name)) { /* The Unicode to string conversion is done here because the existing tp_setattro slots expect a string object as name and we wouldn't want to break those. */ if (PyUnicode_Check(name)) { - name = PyUnicode_AsEncodedString(name, NULL, NULL); + name = _PyUnicode_AsDefaultEncodedString(name, NULL); if (name == NULL) return -1; } @@ -946,8 +947,7 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) return -1; } } - else - Py_INCREF(name); + Py_INCREF(name); PyString_InternInPlace(&name); if (tp->tp_setattro != NULL) { @@ -961,6 +961,7 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) return err; } Py_DECREF(name); + assert(name->ob_refcnt >= 1); if (tp->tp_getattr == NULL && tp->tp_getattro == NULL) PyErr_Format(PyExc_TypeError, "'%.100s' object has no attributes " diff --git a/Objects/stringobject.c b/Objects/stringobject.c index f74c5dc..2ebaca8 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -3181,9 +3181,9 @@ string_encode(PyStringObject *self, PyObject *args) v = PyString_AsEncodedObject((PyObject *)self, encoding, errors); if (v == NULL) goto onError; - if (!PyString_Check(v) && !PyUnicode_Check(v)) { + if (!PyBytes_Check(v)) { PyErr_Format(PyExc_TypeError, - "encoder did not return a string/unicode object " + "[str8] encoder did not return a bytes object " "(type=%.400s)", v->ob_type->tp_name); Py_DECREF(v); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d4a7e7e..db3f9c4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -104,13 +104,9 @@ static PyUnicodeObject *unicode_empty; static PyUnicodeObject *unicode_latin1[256]; /* Default encoding to use and assume when NULL is passed as encoding - parameter; it is initialized by _PyUnicode_Init(). - - Always use the PyUnicode_SetDefaultEncoding() and - PyUnicode_GetDefaultEncoding() APIs to access this global. - -*/ -static char unicode_default_encoding[100]; + parameter; it is fixed to "utf-8". Always use the + PyUnicode_GetDefaultEncoding() API to access this global. */ +static const char unicode_default_encoding[] = "utf-8"; Py_UNICODE PyUnicode_GetMax(void) @@ -711,10 +707,19 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode, v = PyCodec_Encode(unicode, encoding, errors); if (v == NULL) goto onError; - if (!PyString_Check(v)) { + if (!PyBytes_Check(v)) { + if (PyString_Check(v)) { + /* Old codec, turn it into bytes */ + PyObject *b = PyBytes_FromObject(v); + Py_DECREF(v); + return b; + } PyErr_Format(PyExc_TypeError, - "encoder did not return a string object (type=%.400s)", - v->ob_type->tp_name); + "encoder did not return a bytes object " + "(type=%.400s, encoding=%.20s, errors=%.20s)", + v->ob_type->tp_name, + encoding ? encoding : "NULL", + errors ? errors : "NULL"); Py_DECREF(v); goto onError; } @@ -728,12 +733,28 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode, const char *errors) { PyObject *v = ((PyUnicodeObject *)unicode)->defenc; - + PyObject *b; if (v) return v; - v = PyUnicode_AsEncodedString(unicode, NULL, errors); - if (v && errors == NULL) + if (errors != NULL) + Py_FatalError("non-NULL encoding in _PyUnicode_AsDefaultEncodedString"); + if (errors == NULL) { + b = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + NULL); + } + else { + b = PyUnicode_AsEncodedString(unicode, NULL, errors); + } + if (!b) + return NULL; + v = PyString_FromStringAndSize(PyBytes_AsString(b), + PyBytes_Size(b)); + Py_DECREF(b); + if (!errors) { + Py_XINCREF(v); ((PyUnicodeObject *)unicode)->defenc = v; + } return v; } @@ -768,21 +789,13 @@ const char *PyUnicode_GetDefaultEncoding(void) int PyUnicode_SetDefaultEncoding(const char *encoding) { - PyObject *v; - - /* Make sure the encoding is valid. As side effect, this also - loads the encoding into the codec registry cache. */ - v = _PyCodec_Lookup(encoding); - if (v == NULL) - goto onError; - Py_DECREF(v); - strncpy(unicode_default_encoding, - encoding, - sizeof(unicode_default_encoding)); + if (strcmp(encoding, unicode_default_encoding) != 0) { + PyErr_Format(PyExc_ValueError, + "Can only set default encoding to %s", + unicode_default_encoding); + return -1; + } return 0; - - onError: - return -1; } /* error handling callback helper: @@ -1429,10 +1442,10 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s, nallocated = size * 4; if (nallocated / 4 != size) /* overflow! */ return PyErr_NoMemory(); - v = PyString_FromStringAndSize(NULL, nallocated); + v = PyBytes_FromStringAndSize(NULL, nallocated); if (v == NULL) return NULL; - p = PyString_AS_STRING(v); + p = PyBytes_AS_STRING(v); } for (i = 0; i < size;) { @@ -1480,13 +1493,13 @@ encodeUCS4: /* This was stack allocated. */ nneeded = p - stackbuf; assert(nneeded <= nallocated); - v = PyString_FromStringAndSize(stackbuf, nneeded); + v = PyBytes_FromStringAndSize(stackbuf, nneeded); } else { /* Cut back to size actually needed. */ - nneeded = p - PyString_AS_STRING(v); + nneeded = p - PyBytes_AS_STRING(v); assert(nneeded <= nallocated); - _PyString_Resize(&v, nneeded); + PyBytes_Resize(v, nneeded); } return v; @@ -2588,12 +2601,12 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, /* allocate enough for a simple encoding without replacements, if we need more, we'll resize */ - res = PyString_FromStringAndSize(NULL, size); + res = PyBytes_FromStringAndSize(NULL, size); if (res == NULL) goto onError; if (size == 0) return res; - str = PyString_AS_STRING(res); + str = PyBytes_AS_STRING(res); ressize = size; while (p<endp) { @@ -2643,7 +2656,7 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, p = collend; break; case 4: /* xmlcharrefreplace */ - respos = str-PyString_AS_STRING(res); + respos = str - PyBytes_AS_STRING(res); /* determine replacement size (temporarily (mis)uses p) */ for (p = collstart, repsize = 0; p < collend; ++p) { if (*p<10) @@ -2670,9 +2683,9 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, if (requiredsize > ressize) { if (requiredsize<2*ressize) requiredsize = 2*ressize; - if (_PyString_Resize(&res, requiredsize)) + if (PyBytes_Resize(res, requiredsize)) goto onError; - str = PyString_AS_STRING(res) + respos; + str = PyBytes_AS_STRING(res) + respos; ressize = requiredsize; } /* generate replacement (temporarily (mis)uses p) */ @@ -2690,17 +2703,17 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, /* need more space? (at least enough for what we have+the replacement+the rest of the string, so we won't have to check space for encodable characters) */ - respos = str-PyString_AS_STRING(res); + respos = str - PyBytes_AS_STRING(res); repsize = PyUnicode_GET_SIZE(repunicode); requiredsize = respos+repsize+(endp-collend); if (requiredsize > ressize) { if (requiredsize<2*ressize) requiredsize = 2*ressize; - if (_PyString_Resize(&res, requiredsize)) { + if (PyBytes_Resize(res, requiredsize)) { Py_DECREF(repunicode); goto onError; } - str = PyString_AS_STRING(res) + respos; + str = PyBytes_AS_STRING(res) + respos; ressize = requiredsize; } /* check if there is anything unencodable in the replacement @@ -2721,10 +2734,10 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, } } /* Resize if we allocated to much */ - respos = str-PyString_AS_STRING(res); + respos = str - PyBytes_AS_STRING(res); if (respos<ressize) /* If this falls res will be NULL */ - _PyString_Resize(&res, respos); + PyBytes_Resize(res, respos); Py_XDECREF(errorHandler); Py_XDECREF(exc); return res; @@ -2979,20 +2992,20 @@ static int encode_mbcs(PyObject **repr, if (*repr == NULL) { /* Create string object */ - *repr = PyString_FromStringAndSize(NULL, mbcssize); + *repr = PyBytes_FromStringAndSize(NULL, mbcssize); if (*repr == NULL) return -1; } else { /* Extend string object */ - n = PyString_Size(*repr); - if (_PyString_Resize(repr, n + mbcssize) < 0) + n = PyBytes_Size(*repr); + if (PyBytes_Resize(*repr, n + mbcssize) < 0) return -1; } /* Do the conversion */ if (size > 0) { - char *s = PyString_AS_STRING(*repr) + n; + char *s = PyBytes_AS_STRING(*repr) + n; if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) { PyErr_SetFromWindowsErrWithFilename(0, NULL); return -1; @@ -5630,9 +5643,9 @@ unicode_encode(PyUnicodeObject *self, PyObject *args) v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors); if (v == NULL) goto onError; - if (!PyString_Check(v) && !PyUnicode_Check(v)) { + if (!PyBytes_Check(v)) { PyErr_Format(PyExc_TypeError, - "encoder did not return a string/unicode object " + "encoder did not return a bytes object " "(type=%.400s)", v->ob_type->tp_name); Py_DECREF(v); @@ -6797,9 +6810,11 @@ unicode_splitlines(PyUnicodeObject *self, PyObject *args) } static -PyObject *unicode_str(PyUnicodeObject *self) +PyObject *unicode_str(PyObject *self) { - return PyUnicode_AsEncodedString((PyObject *)self, NULL, NULL); + PyObject *res = _PyUnicode_AsDefaultEncodedString(self, NULL); + Py_XINCREF(res); + return res; } PyDoc_STRVAR(swapcase__doc__, @@ -8021,7 +8036,6 @@ void _PyUnicode_Init(void) if (!unicode_empty) return; - strcpy(unicode_default_encoding, "ascii"); for (i = 0; i < 256; i++) unicode_latin1[i] = NULL; if (PyType_Ready(&PyUnicode_Type) < 0) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 02f33e2..563fbf2 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -696,9 +696,9 @@ tok_stdin_decode(struct tok_state *tok, char **inp) if (utf8 == NULL) goto error_clear; - assert(PyString_Check(utf8)); - converted = new_string(PyString_AS_STRING(utf8), - PyString_GET_SIZE(utf8)); + assert(PyBytes_Check(utf8)); + converted = new_string(PyBytes_AS_STRING(utf8), + PyBytes_GET_SIZE(utf8)); Py_DECREF(utf8); if (converted == NULL) goto error_nomem; diff --git a/Python/ast.c b/Python/ast.c index 821a5ad..5845076 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -3101,8 +3101,9 @@ decode_unicode(const char *s, size_t len, int rawmode, const char *encoding) Py_DECREF(u); return NULL; } - r = PyString_AsString(w); - rn = PyString_Size(w); + assert(PyBytes_Check(w)); + r = PyBytes_AsString(w); + rn = PyBytes_Size(w); assert(rn % 2 == 0); for (i = 0; i < rn; i += 2) { sprintf(p, "\\u%02x%02x", diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 828cb5d..7d52e2b 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -412,6 +412,36 @@ PyDoc_STRVAR(cmp_doc, \n\ Return negative if x<y, zero if x==y, positive if x>y."); + +static char * +source_as_string(PyObject *cmd) +{ + char *str; + Py_ssize_t size; + + if (!PyObject_CheckReadBuffer(cmd) && + !PyUnicode_Check(cmd)) { + PyErr_SetString(PyExc_TypeError, + "eval()/exec() arg 1 must be a string, bytes or code object"); + return NULL; + } + + if (PyUnicode_Check(cmd)) { + cmd = _PyUnicode_AsDefaultEncodedString(cmd, NULL); + if (cmd == NULL) + return NULL; + } + if (PyObject_AsReadBuffer(cmd, (const void **)&str, &size) < 0) { + return NULL; + } + if (strlen(str) != size) { + PyErr_SetString(PyExc_TypeError, + "source code string cannot contain null bytes"); + return NULL; + } + return str; +} + static PyObject * builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) { @@ -422,8 +452,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) int dont_inherit = 0; int supplied_flags = 0; PyCompilerFlags cf; - PyObject *result = NULL, *cmd, *tmp = NULL; - Py_ssize_t length; + PyObject *cmd; static char *kwlist[] = {"source", "filename", "mode", "flags", "dont_inherit", NULL}; @@ -432,22 +461,11 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) &supplied_flags, &dont_inherit)) return NULL; - cf.cf_flags = supplied_flags; + cf.cf_flags = supplied_flags | PyCF_SOURCE_IS_UTF8; - if (PyUnicode_Check(cmd)) { - tmp = PyUnicode_AsUTF8String(cmd); - if (tmp == NULL) - return NULL; - cmd = tmp; - cf.cf_flags |= PyCF_SOURCE_IS_UTF8; - } - if (PyObject_AsReadBuffer(cmd, (const void **)&str, &length)) + str = source_as_string(cmd); + if (str == NULL) return NULL; - if ((size_t)length != strlen(str)) { - PyErr_SetString(PyExc_TypeError, - "compile() expected string without null bytes"); - goto cleanup; - } if (strcmp(startstr, "exec") == 0) start = Py_file_input; @@ -458,7 +476,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) else { PyErr_SetString(PyExc_ValueError, "compile() arg 3 must be 'exec' or 'eval' or 'single'"); - goto cleanup; + return NULL; } if (supplied_flags & @@ -466,17 +484,14 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) { PyErr_SetString(PyExc_ValueError, "compile(): unrecognised flags"); - goto cleanup; + return NULL; } /* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */ if (!dont_inherit) { PyEval_MergeCompilerFlags(&cf); } - result = Py_CompileStringFlags(str, filename, start, &cf); -cleanup: - Py_XDECREF(tmp); - return result; + return Py_CompileStringFlags(str, filename, start, &cf); } PyDoc_STRVAR(compile_doc, @@ -584,28 +599,14 @@ builtin_eval(PyObject *self, PyObject *args) return PyEval_EvalCode((PyCodeObject *) cmd, globals, locals); } - if (!PyString_Check(cmd) && - !PyUnicode_Check(cmd)) { - PyErr_SetString(PyExc_TypeError, - "eval() arg 1 must be a string or code object"); + str = source_as_string(cmd); + if (str == NULL) return NULL; - } - cf.cf_flags = 0; - if (PyUnicode_Check(cmd)) { - tmp = PyUnicode_AsUTF8String(cmd); - if (tmp == NULL) - return NULL; - cmd = tmp; - cf.cf_flags |= PyCF_SOURCE_IS_UTF8; - } - if (PyString_AsStringAndSize(cmd, &str, NULL)) { - Py_XDECREF(tmp); - return NULL; - } while (*str == ' ' || *str == '\t') str++; + cf.cf_flags = PyCF_SOURCE_IS_UTF8; (void)PyEval_MergeCompilerFlags(&cf); result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf); Py_XDECREF(tmp); @@ -694,25 +695,16 @@ builtin_exec(PyObject *self, PyObject *args) locals); } else { - PyObject *tmp = NULL; - char *str; + char *str = source_as_string(prog); PyCompilerFlags cf; - cf.cf_flags = 0; - if (PyUnicode_Check(prog)) { - tmp = PyUnicode_AsUTF8String(prog); - if (tmp == NULL) - return NULL; - prog = tmp; - cf.cf_flags |= PyCF_SOURCE_IS_UTF8; - } - if (PyString_AsStringAndSize(prog, &str, NULL)) + if (str == NULL) return NULL; + cf.cf_flags = PyCF_SOURCE_IS_UTF8; if (PyEval_MergeCompilerFlags(&cf)) v = PyRun_StringFlags(str, Py_file_input, globals, locals, &cf); else v = PyRun_String(str, Py_file_input, globals, locals); - Py_XDECREF(tmp); } if (v == NULL) return NULL; diff --git a/Python/getargs.c b/Python/getargs.c index 2a02a89..7d55bae 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -883,7 +883,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, char **buffer; const char *encoding; PyObject *s; - int size, recode_strings; + int recode_strings; + Py_ssize_t size; + char *ptr; /* Get 'e' parameter: the encoding name */ encoding = (const char *)va_arg(*p_va, const char *); @@ -912,6 +914,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (!recode_strings && PyString_Check(arg)) { s = arg; Py_INCREF(s); + size = PyString_GET_SIZE(s); + ptr = PyString_AS_STRING(s); } else { PyObject *u; @@ -931,14 +935,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (s == NULL) return converterr("(encoding failed)", arg, msgbuf, bufsize); - if (!PyString_Check(s)) { + if (!PyBytes_Check(s)) { Py_DECREF(s); return converterr( - "(encoder failed to return a string)", + "(encoder failed to return bytes)", arg, msgbuf, bufsize); } + size = PyBytes_GET_SIZE(s); + ptr = PyBytes_AS_STRING(s); } - size = PyString_GET_SIZE(s); /* Write output; output is guaranteed to be 0-terminated */ if (*format == '#') { @@ -994,9 +999,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, arg, msgbuf, bufsize); } } - memcpy(*buffer, - PyString_AS_STRING(s), - size + 1); + memcpy(*buffer, ptr, size+1); STORE_SIZE(size); } else { /* Using a 0-terminated buffer: @@ -1012,8 +1015,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, PyMem_Free()ing it after usage */ - if ((Py_ssize_t)strlen(PyString_AS_STRING(s)) - != size) { + if ((Py_ssize_t)strlen(ptr) != size) { Py_DECREF(s); return converterr( "(encoded string without NULL bytes)", @@ -1030,9 +1032,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, return converterr("(cleanup problem)", arg, msgbuf, bufsize); } - memcpy(*buffer, - PyString_AS_STRING(s), - size + 1); + memcpy(*buffer, ptr, size+1); } Py_DECREF(s); break; diff --git a/Python/import.c b/Python/import.c index 7e3d2f4..2e1f894 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1254,6 +1254,9 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf, for (i = 0; i < npath; i++) { PyObject *copy = NULL; PyObject *v = PyList_GetItem(path, i); + PyObject *origv = v; + char *base; + Py_ssize_t size; if (!v) return NULL; if (PyUnicode_Check(v)) { @@ -1263,15 +1266,24 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf, return NULL; v = copy; } - else - if (!PyString_Check(v)) + if (PyString_Check(v)) { + base = PyString_AS_STRING(v); + size = PyString_GET_SIZE(v); + } + else if (PyBytes_Check(v)) { + base = PyBytes_AS_STRING(v); + size = PyBytes_GET_SIZE(v); + } + else { + Py_XDECREF(copy); continue; - len = PyString_GET_SIZE(v); + } + len = size; if (len + 2 + namelen + MAXSUFFIXSIZE >= buflen) { Py_XDECREF(copy); continue; /* Too long */ } - strcpy(buf, PyString_AS_STRING(v)); + strcpy(buf, base); if (strlen(buf) != len) { Py_XDECREF(copy); continue; /* v contains '\0' */ @@ -1282,7 +1294,7 @@ find_module(char *fullname, char *subname, PyObject *path, char *buf, PyObject *importer; importer = get_path_importer(path_importer_cache, - path_hooks, v); + path_hooks, origv); if (importer == NULL) { Py_XDECREF(copy); return NULL; diff --git a/Python/marshal.c b/Python/marshal.c index 94d73a0..9243798 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -263,14 +263,14 @@ w_object(PyObject *v, WFILE *p) return; } w_byte(TYPE_UNICODE, p); - n = PyString_GET_SIZE(utf8); + n = PyBytes_GET_SIZE(utf8); if (n > INT_MAX) { p->depth--; p->error = 1; return; } w_long((long)n, p); - w_string(PyString_AS_STRING(utf8), (int)n, p); + w_string(PyBytes_AS_STRING(utf8), (int)n, p); Py_DECREF(utf8); } else if (PyTuple_Check(v)) { @@ -1031,7 +1031,7 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) if (wf.ptr - base > PY_SSIZE_T_MAX) { Py_DECREF(wf.str); PyErr_SetString(PyExc_OverflowError, - "too much marshall data for a string"); + "too much marshal data for a string"); return NULL; } _PyString_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)); |