Marc-Andre Lemburg:

Attached you find the latest update of the Unicode implementation. The patch is against the current CVS version. It includes the fix I posted yesterday for the core dump problem in codecs.c (was introduced by my previous patch set -- sorry), adds more tests for the codecs and two new parser markers "es" and "es#".
author: Guido van Rossum <guido@python.org> 2000-03-24 22:14:19 (GMT)
committer: Guido van Rossum <guido@python.org> 2000-03-24 22:14:19 (GMT)
commit: d8855fde885ffcd9956352edb75674f38c64acaa (patch)
tree: e956abb92678c85ffb8674c9a49d1fb7e8459140 /Misc/unicode.txt
parent: 27fc3c05e14d8b876bf0577225d509cbde45bfe0 (diff)
download: cpython-d8855fde885ffcd9956352edb75674f38c64acaa.zip
cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.gz
cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.bz2
1 files changed, 110 insertions, 4 deletions
diff --git a/Misc/unicode.txt b/Misc/unicode.txt
index 9a4832a..fc1f2c5 100644
--- a/Misc/unicode.txt
+++ b/Misc/unicode.txt
@@ -715,21 +715,126 @@ Internal Argument Parsing:
 
 These markers are used by the PyArg_ParseTuple() APIs:
 
-  'U':  Check for Unicode object and return a pointer to it
+  "U":  Check for Unicode object and return a pointer to it
 
-  's':  For Unicode objects: auto convert them to the <default encoding>
+  "s":  For Unicode objects: auto convert them to the <default encoding>
         and return a pointer to the object's <defencstr> buffer.
 
-  's#': Access to the Unicode object via the bf_getreadbuf buffer interface 
+  "s#": Access to the Unicode object via the bf_getreadbuf buffer interface 
         (see Buffer Interface); note that the length relates to the buffer
         length, not the Unicode string length (this may be different
         depending on the Internal Format).
 
-  't#': Access to the Unicode object via the bf_getcharbuf buffer interface
+  "t#": Access to the Unicode object via the bf_getcharbuf buffer interface
         (see Buffer Interface); note that the length relates to the buffer
         length, not necessarily to the Unicode string length (this may
         be different depending on the <default encoding>).
 
+  "es": 
+	Takes two parameters: encoding (const char *) and
+	buffer (char **). 
+
+	The input object is first coerced to Unicode in the usual way
+	and then encoded into a string using the given encoding.
+
+	On output, a buffer of the needed size is allocated and
+	returned through *buffer as NULL-terminated string.
+	The encoded may not contain embedded NULL characters.
+	The caller is responsible for free()ing the allocated *buffer
+	after usage.
+
+  "es#":
+	Takes three parameters: encoding (const char *),
+	buffer (char **) and buffer_len (int *).
+	
+	The input object is first coerced to Unicode in the usual way
+	and then encoded into a string using the given encoding.
+
+	If *buffer is non-NULL, *buffer_len must be set to sizeof(buffer)
+	on input. Output is then copied to *buffer.
+
+	If *buffer is NULL, a buffer of the needed size is
+	allocated and output copied into it. *buffer is then
+	updated to point to the allocated memory area. The caller
+	is responsible for free()ing *buffer after usage.
+
+	In both cases *buffer_len is updated to the number of
+	characters written (excluding the trailing NULL-byte).
+	The output buffer is assured to be NULL-terminated.
+
+Examples:
+
+Using "es#" with auto-allocation:
+
+    static PyObject *
+    test_parser(PyObject *self,
+		PyObject *args)
+    {
+	PyObject *str;
+	const char *encoding = "latin-1";
+	char *buffer = NULL;
+	int buffer_len = 0;
+
+	if (!PyArg_ParseTuple(args, "es#:test_parser",
+			      encoding, &buffer, &buffer_len))
+	    return NULL;
+	if (!buffer) {
+	    PyErr_SetString(PyExc_SystemError,
+			    "buffer is NULL");
+	    return NULL;
+	}
+	str = PyString_FromStringAndSize(buffer, buffer_len);
+	free(buffer);
+	return str;
+    }
+
+Using "es" with auto-allocation returning a NULL-terminated string:    
+    
+    static PyObject *
+    test_parser(PyObject *self,
+		PyObject *args)
+    {
+	PyObject *str;
+	const char *encoding = "latin-1";
+	char *buffer = NULL;
+
+	if (!PyArg_ParseTuple(args, "es:test_parser",
+			      encoding, &buffer))
+	    return NULL;
+	if (!buffer) {
+	    PyErr_SetString(PyExc_SystemError,
+			    "buffer is NULL");
+	    return NULL;
+	}
+	str = PyString_FromString(buffer);
+	free(buffer);
+	return str;
+    }
+
+Using "es#" with a pre-allocated buffer:
+    
+    static PyObject *
+    test_parser(PyObject *self,
+		PyObject *args)
+    {
+	PyObject *str;
+	const char *encoding = "latin-1";
+	char _buffer[10];
+	char *buffer = _buffer;
+	int buffer_len = sizeof(_buffer);
+
+	if (!PyArg_ParseTuple(args, "es#:test_parser",
+			      encoding, &buffer, &buffer_len))
+	    return NULL;
+	if (!buffer) {
+	    PyErr_SetString(PyExc_SystemError,
+			    "buffer is NULL");
+	    return NULL;
+	}
+	str = PyString_FromStringAndSize(buffer, buffer_len);
+	return str;
+    }
+
 
 File/Stream Output:
 -------------------
@@ -837,6 +942,7 @@ Encodings:
 
 History of this Proposal:
 -------------------------
+1.3: Added new "es" and "es#" parser markers
 1.2: Removed POD about codecs.open()
 1.1: Added note about comparisons and hash values. Added note about
      case mapping algorithms. Changed stream codecs .read() and
author	Guido van Rossum <guido@python.org>	2000-03-24 22:14:19 (GMT)
committer	Guido van Rossum <guido@python.org>	2000-03-24 22:14:19 (GMT)
commit	d8855fde885ffcd9956352edb75674f38c64acaa (patch)
tree	e956abb92678c85ffb8674c9a49d1fb7e8459140 /Misc/unicode.txt
parent	27fc3c05e14d8b876bf0577225d509cbde45bfe0 (diff)
download	cpython-d8855fde885ffcd9956352edb75674f38c64acaa.zip cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.gz cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.bz2