summaryrefslogtreecommitdiffstats
path: root/Misc/unicode.txt
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2000-03-24 22:14:19 (GMT)
committerGuido van Rossum <guido@python.org>2000-03-24 22:14:19 (GMT)
commitd8855fde885ffcd9956352edb75674f38c64acaa (patch)
treee956abb92678c85ffb8674c9a49d1fb7e8459140 /Misc/unicode.txt
parent27fc3c05e14d8b876bf0577225d509cbde45bfe0 (diff)
downloadcpython-d8855fde885ffcd9956352edb75674f38c64acaa.zip
cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.gz
cpython-d8855fde885ffcd9956352edb75674f38c64acaa.tar.bz2
Marc-Andre Lemburg:
Attached you find the latest update of the Unicode implementation. The patch is against the current CVS version. It includes the fix I posted yesterday for the core dump problem in codecs.c (was introduced by my previous patch set -- sorry), adds more tests for the codecs and two new parser markers "es" and "es#".
Diffstat (limited to 'Misc/unicode.txt')
-rw-r--r--Misc/unicode.txt114
1 files changed, 110 insertions, 4 deletions
diff --git a/Misc/unicode.txt b/Misc/unicode.txt
index 9a4832a..fc1f2c5 100644
--- a/Misc/unicode.txt
+++ b/Misc/unicode.txt
@@ -715,21 +715,126 @@ Internal Argument Parsing:
These markers are used by the PyArg_ParseTuple() APIs:
- 'U': Check for Unicode object and return a pointer to it
+ "U": Check for Unicode object and return a pointer to it
- 's': For Unicode objects: auto convert them to the <default encoding>
+ "s": For Unicode objects: auto convert them to the <default encoding>
and return a pointer to the object's <defencstr> buffer.
- 's#': Access to the Unicode object via the bf_getreadbuf buffer interface
+ "s#": Access to the Unicode object via the bf_getreadbuf buffer interface
(see Buffer Interface); note that the length relates to the buffer
length, not the Unicode string length (this may be different
depending on the Internal Format).
- 't#': Access to the Unicode object via the bf_getcharbuf buffer interface
+ "t#": Access to the Unicode object via the bf_getcharbuf buffer interface
(see Buffer Interface); note that the length relates to the buffer
length, not necessarily to the Unicode string length (this may
be different depending on the <default encoding>).
+ "es":
+ Takes two parameters: encoding (const char *) and
+ buffer (char **).
+
+ The input object is first coerced to Unicode in the usual way
+ and then encoded into a string using the given encoding.
+
+ On output, a buffer of the needed size is allocated and
+ returned through *buffer as NULL-terminated string.
+ The encoded may not contain embedded NULL characters.
+ The caller is responsible for free()ing the allocated *buffer
+ after usage.
+
+ "es#":
+ Takes three parameters: encoding (const char *),
+ buffer (char **) and buffer_len (int *).
+
+ The input object is first coerced to Unicode in the usual way
+ and then encoded into a string using the given encoding.
+
+ If *buffer is non-NULL, *buffer_len must be set to sizeof(buffer)
+ on input. Output is then copied to *buffer.
+
+ If *buffer is NULL, a buffer of the needed size is
+ allocated and output copied into it. *buffer is then
+ updated to point to the allocated memory area. The caller
+ is responsible for free()ing *buffer after usage.
+
+ In both cases *buffer_len is updated to the number of
+ characters written (excluding the trailing NULL-byte).
+ The output buffer is assured to be NULL-terminated.
+
+Examples:
+
+Using "es#" with auto-allocation:
+
+ static PyObject *
+ test_parser(PyObject *self,
+ PyObject *args)
+ {
+ PyObject *str;
+ const char *encoding = "latin-1";
+ char *buffer = NULL;
+ int buffer_len = 0;
+
+ if (!PyArg_ParseTuple(args, "es#:test_parser",
+ encoding, &buffer, &buffer_len))
+ return NULL;
+ if (!buffer) {
+ PyErr_SetString(PyExc_SystemError,
+ "buffer is NULL");
+ return NULL;
+ }
+ str = PyString_FromStringAndSize(buffer, buffer_len);
+ free(buffer);
+ return str;
+ }
+
+Using "es" with auto-allocation returning a NULL-terminated string:
+
+ static PyObject *
+ test_parser(PyObject *self,
+ PyObject *args)
+ {
+ PyObject *str;
+ const char *encoding = "latin-1";
+ char *buffer = NULL;
+
+ if (!PyArg_ParseTuple(args, "es:test_parser",
+ encoding, &buffer))
+ return NULL;
+ if (!buffer) {
+ PyErr_SetString(PyExc_SystemError,
+ "buffer is NULL");
+ return NULL;
+ }
+ str = PyString_FromString(buffer);
+ free(buffer);
+ return str;
+ }
+
+Using "es#" with a pre-allocated buffer:
+
+ static PyObject *
+ test_parser(PyObject *self,
+ PyObject *args)
+ {
+ PyObject *str;
+ const char *encoding = "latin-1";
+ char _buffer[10];
+ char *buffer = _buffer;
+ int buffer_len = sizeof(_buffer);
+
+ if (!PyArg_ParseTuple(args, "es#:test_parser",
+ encoding, &buffer, &buffer_len))
+ return NULL;
+ if (!buffer) {
+ PyErr_SetString(PyExc_SystemError,
+ "buffer is NULL");
+ return NULL;
+ }
+ str = PyString_FromStringAndSize(buffer, buffer_len);
+ return str;
+ }
+
File/Stream Output:
-------------------
@@ -837,6 +942,7 @@ Encodings:
History of this Proposal:
-------------------------
+1.3: Added new "es" and "es#" parser markers
1.2: Removed POD about codecs.open()
1.1: Added note about comparisons and hash values. Added note about
case mapping algorithms. Changed stream codecs .read() and