diff options
author | Marc-André Lemburg <mal@egenix.com> | 2001-05-02 17:16:16 (GMT) |
---|---|---|
committer | Marc-André Lemburg <mal@egenix.com> | 2001-05-02 17:16:16 (GMT) |
commit | 6f15e5796e89163bd9b5be7a3f7334d17f2c50c1 (patch) | |
tree | ded8cb3e698a175fc042de9524b4607e4dc9be19 | |
parent | b1f35bffe5f2ce456854d4e8c8075a0d58e8eb02 (diff) | |
download | cpython-6f15e5796e89163bd9b5be7a3f7334d17f2c50c1.zip cpython-6f15e5796e89163bd9b5be7a3f7334d17f2c50c1.tar.gz cpython-6f15e5796e89163bd9b5be7a3f7334d17f2c50c1.tar.bz2 |
Added new parser markers 'et' and 'et#' which do not recode string
objects but instead assume that they use the requested encoding.
This is needed on Windows to enable opening files by passing in
Unicode file names.
-rw-r--r-- | Doc/ext/ext.tex | 12 | ||||
-rw-r--r-- | Python/getargs.c | 24 |
2 files changed, 32 insertions, 4 deletions
diff --git a/Doc/ext/ext.tex b/Doc/ext/ext.tex index 446802f..ba8217e 100644 --- a/Doc/ext/ext.tex +++ b/Doc/ext/ext.tex @@ -736,6 +736,12 @@ buffer and adjust \var{*buffer} to reference the newly allocated storage. The caller is responsible for calling \cfunction{PyMem_Free()} to free the allocated buffer after usage. +\item[\samp{et} (string, Unicode object or character buffer compatible +object) {[const char *encoding, char **buffer]}] +Same as \samp{es} except that string objects are passed through without +recoding them. Instead, the implementation assumes that the string +object uses the encoding passed in as parameter. + \item[\samp{es\#} (string, Unicode object or character buffer compatible object) {[const char *encoding, char **buffer, int *buffer_length]}] This variant on \samp{s\#} is used for encoding Unicode and objects @@ -767,6 +773,12 @@ overflow is signalled with an exception. In both cases, \var{*buffer_length} is set to the length of the encoded data without the trailing 0-byte. +\item[\samp{et\#} (string, Unicode object or character buffer compatible +object) {[const char *encoding, char **buffer]}] +Same as \samp{es\#} except that string objects are passed through without +recoding them. Instead, the implementation assumes that the string +object uses the encoding passed in as parameter. + \item[\samp{b} (integer) {[char]}] Convert a Python integer to a tiny int, stored in a C \ctype{char}. diff --git a/Python/getargs.c b/Python/getargs.c index 00f298a..6eabd75 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -687,25 +687,39 @@ convertsimple1(PyObject *arg, char **p_format, va_list *p_va) char **buffer; const char *encoding; PyObject *u, *s; - int size; + int size, recode_strings; /* Get 'e' parameter: the encoding name */ encoding = (const char *)va_arg(*p_va, const char *); if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); - /* Get 's' parameter: the output buffer to use */ + /* Get output buffer parameter: + 's' (recode all objects via Unicode) or + 't' (only recode non-string objects) + */ if (*format != 's') + recode_strings = 1; + else if (*format == 't') + recode_strings = 0; + else return "(unknown parser marker combination)"; buffer = (char **)va_arg(*p_va, char **); format++; if (buffer == NULL) return "(buffer is NULL)"; + /* Encode object */ + if (!recode_strings && PyString_Check(arg)) { + s = arg; + Py_INCREF(s); + } + else { /* Convert object to Unicode */ u = PyUnicode_FromObject(arg); if (u == NULL) - return "string or unicode or text buffer"; + return \ + "string or unicode or text buffer"; /* Encode object; use default error handling */ s = PyUnicode_AsEncodedString(u, @@ -716,7 +730,9 @@ convertsimple1(PyObject *arg, char **p_format, va_list *p_va) return "(encoding failed)"; if (!PyString_Check(s)) { Py_DECREF(s); - return "(encoder failed to return a string)"; + return \ + "(encoder failed to return a string)"; + } } size = PyString_GET_SIZE(s); |