From d1ba443206b535f41154f10b9d56d4fc76a1a9d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lemburg?= Date: Tue, 19 Sep 2000 21:04:18 +0000 Subject: This patch adds a new Python C API called PyString_AsStringAndSize() which implements the automatic conversion from Unicode to a string object using the default encoding. The new API is then put to use to have eval() and exec accept Unicode objects as code parameter. This closes bugs #110924 and #113890. As side-effect, the traditional C APIs PyString_Size() and PyString_AsString() will also accept Unicode objects as parameters. --- Doc/api/api.tex | 17 ++++++++++++ Doc/api/refcounts.dat | 5 ++++ Include/stringobject.h | 15 +++++++++++ Lib/test/test_b1.py | 12 +++++++++ Lib/test/test_grammar.py | 7 +++++ Objects/stringobject.c | 70 +++++++++++++++++++++++++++++++++++++++++++----- Python/bltinmodule.c | 9 +++---- Python/ceval.c | 10 +++---- 8 files changed, 126 insertions(+), 19 deletions(-) diff --git a/Doc/api/api.tex b/Doc/api/api.tex index 4362448..9acc8e8 100644 --- a/Doc/api/api.tex +++ b/Doc/api/api.tex @@ -2105,6 +2105,23 @@ Macro form of \cfunction{PyString_AsString()} but without error checking. \end{cfuncdesc} +\begin{cfuncdesc}{int}{PyString_AsStringAndSize}{PyObject *obj, + char **buffer, + int *length} +Returns a null-terminated representation of the contents of the object +\var{obj} through the output variables \var{buffer} and \var{length}. + +The function accepts both string and Unicode objects as input. For +Unicode objects it returns the default encoded version of the object. +If \var{length} is set to \NULL{}, the resulting buffer may not contain +null characters; if it does, the function returns -1 and a +TypeError is raised. + +The buffer refers to an internal string buffer of \var{obj}, not a +copy. The data must not be modified in any way. It must not be +de-allocated. +\end{cfuncdesc} + \begin{cfuncdesc}{void}{PyString_Concat}{PyObject **string, PyObject *newpart} Creates a new string object in \var{*string} containing the diff --git a/Doc/api/refcounts.dat b/Doc/api/refcounts.dat index 0a80d67..f3ef0ac 100644 --- a/Doc/api/refcounts.dat +++ b/Doc/api/refcounts.dat @@ -760,6 +760,11 @@ PyString_AS_STRING:PyObject*:string:0: PyString_AsString:char*::: PyString_AsString:PyObject*:string:0: +PyString_AsStringAndSize:int::: +PyString_AsStringAndSize:PyObject*:obj:0: +PyString_AsStringAndSize:char**:buffer:: +PyString_AsStringAndSize:int*:length:: + PyString_Check:int::: PyString_Check:PyObject*:o:0: diff --git a/Include/stringobject.h b/Include/stringobject.h index 7afd347..3bba7bc 100644 --- a/Include/stringobject.h +++ b/Include/stringobject.h @@ -103,6 +103,21 @@ extern DL_IMPORT(PyObject*) PyString_AsEncodedString( const char *errors /* error handling */ ); +/* Provides access to the internal data buffer and size of a string + object or the default encoded version of an Unicode object. Passing + NULL as *len parameter will force the string buffer to be + 0-terminated (passing a string with embedded NULL characters will + cause an exception). */ + +extern DL_IMPORT(int) PyString_AsStringAndSize( + register PyObject *obj, /* string or Unicode object */ + register char **s, /* pointer to buffer variable */ + register int *len /* pointer to length variable or NULL + (only possible for 0-terminated + strings) */ + ); + + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_b1.py b/Lib/test/test_b1.py index f8dfe47..24c5279 100644 --- a/Lib/test/test_b1.py +++ b/Lib/test/test_b1.py @@ -161,6 +161,18 @@ if eval('b', globals, locals) <> 200: raise TestFailed, "eval(3)" if eval('c', globals, locals) <> 300: raise TestFailed, "eval(4)" +if eval(u'1+1') <> 2: raise TestFailed, 'eval(u\'1+1\')' +if eval(u' 1+1\n') <> 2: raise TestFailed, 'eval(u\' 1+1\\n\')' +globals = {'a': 1, 'b': 2} +locals = {'b': 200, 'c': 300} +if eval(u'a', globals) <> 1: + raise TestFailed, "eval(1) == %s" % eval(u'a', globals) +if eval(u'a', globals, locals) <> 1: + raise TestFailed, "eval(2)" +if eval(u'b', globals, locals) <> 200: + raise TestFailed, "eval(3)" +if eval(u'c', globals, locals) <> 300: + raise TestFailed, "eval(4)" print 'execfile' z = 0 diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index ef7c09b..68cae81 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -355,6 +355,13 @@ def f(): del z exec 'z=1+1' if z <> 2: raise TestFailed, 'exec \'z=1+1\'' + z = None + del z + exec u'z=1+1\n' + if z <> 2: raise TestFailed, 'exec u\'z=1+1\'\\n' + del z + exec u'z=1+1' + if z <> 2: raise TestFailed, 'exec u\'z=1+1\'' f() g = {} exec 'z = 1' in g diff --git a/Objects/stringobject.c b/Objects/stringobject.c index eee3551..cadca16 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -239,24 +239,80 @@ string_dealloc(PyObject *op) PyObject_DEL(op); } +static int +string_getsize(register PyObject *op) +{ + char *s; + int len; + if (PyString_AsStringAndSize(op, &s, &len)) + return -1; + return len; +} + +static /*const*/ char * +string_getbuffer(register PyObject *op) +{ + char *s; + int len; + if (PyString_AsStringAndSize(op, &s, &len)) + return NULL; + return s; +} + int PyString_Size(register PyObject *op) { - if (!PyString_Check(op)) { - PyErr_BadInternalCall(); - return -1; - } + if (!PyString_Check(op)) + return string_getsize(op); return ((PyStringObject *)op) -> ob_size; } /*const*/ char * PyString_AsString(register PyObject *op) { - if (!PyString_Check(op)) { + if (!PyString_Check(op)) + return string_getbuffer(op); + return ((PyStringObject *)op) -> ob_sval; +} + +/* Internal API needed by PyString_AsStringAndSize(): */ +extern +PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode, + const char *errors); + +int +PyString_AsStringAndSize(register PyObject *obj, + register char **s, + register int *len) +{ + if (s == NULL) { PyErr_BadInternalCall(); - return NULL; + return -1; } - return ((PyStringObject *)op) -> ob_sval; + + if (!PyString_Check(obj)) { + if (PyUnicode_Check(obj)) { + obj = _PyUnicode_AsDefaultEncodedString(obj, NULL); + if (obj == NULL) + return -1; + } + else { + PyErr_Format(PyExc_TypeError, + "expected string or Unicode object, " + "%.200s found", obj->ob_type->tp_name); + return -1; + } + } + + *s = PyString_AS_STRING(obj); + if (len != NULL) + *len = PyString_GET_SIZE(obj); + else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) { + PyErr_SetString(PyExc_TypeError, + "expected string without null bytes"); + return -1; + } + return 0; } /* Methods */ diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 3eac8d5..88656ca 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -748,17 +748,14 @@ builtin_eval(PyObject *self, PyObject *args) } if (PyCode_Check(cmd)) return PyEval_EvalCode((PyCodeObject *) cmd, globals, locals); - if (!PyString_Check(cmd)) { + if (!PyString_Check(cmd) && + !PyUnicode_Check(cmd)) { PyErr_SetString(PyExc_TypeError, "eval() argument 1 must be string or code object"); return NULL; } - str = PyString_AsString(cmd); - if (strlen(str) != (size_t)PyString_Size(cmd)) { - PyErr_SetString(PyExc_ValueError, - "embedded '\\0' in string arg"); + if (PyString_AsStringAndSize(cmd, &str, NULL)) return NULL; - } while (*str == ' ' || *str == '\t') str++; return PyRun_String(str, Py_eval_input, globals, locals); diff --git a/Python/ceval.c b/Python/ceval.c index 09ae132..491a73b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3042,6 +3042,7 @@ exec_statement(PyFrameObject *f, PyObject *prog, PyObject *globals, else if (locals == Py_None) locals = globals; if (!PyString_Check(prog) && + !PyUnicode_Check(prog) && !PyCode_Check(prog) && !PyFile_Check(prog)) { PyErr_SetString(PyExc_TypeError, @@ -3064,13 +3065,10 @@ exec_statement(PyFrameObject *f, PyObject *prog, PyObject *globals, v = PyRun_File(fp, name, Py_file_input, globals, locals); } else { - char *s = PyString_AsString(prog); - if (strlen(s) != (size_t)PyString_Size(prog)) { - PyErr_SetString(PyExc_ValueError, - "embedded '\\0' in exec string"); + char *str; + if (PyString_AsStringAndSize(prog, &str, NULL)) return -1; - } - v = PyRun_String(s, Py_file_input, globals, locals); + v = PyRun_String(str, Py_file_input, globals, locals); } if (plain) PyFrame_LocalsToFast(f, 0); -- cgit v0.12