summaryrefslogtreecommitdiffstats
path: root/Include/unicodeobject.h
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2011-10-03 21:19:21 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2011-10-03 21:19:21 (GMT)
commit8cfcbed4e3858e84426e606f18f87b5f3b4572fd (patch)
tree71d6169a4d75362afea04e14fd53266451c2d900 /Include/unicodeobject.h
parent77bb47b3125d271408642505cbc4edd074eb7f1c (diff)
downloadcpython-8cfcbed4e3858e84426e606f18f87b5f3b4572fd.zip
cpython-8cfcbed4e3858e84426e606f18f87b5f3b4572fd.tar.gz
cpython-8cfcbed4e3858e84426e606f18f87b5f3b4572fd.tar.bz2
Improve string forms and PyUnicode_Resize() documentation
Remove also the FIXME for resize_copy(): as discussed with Martin, copy the string on resize if the string is not resizable is just fine.
Diffstat (limited to 'Include/unicodeobject.h')
-rw-r--r--Include/unicodeobject.h35
1 files changed, 22 insertions, 13 deletions
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 1b6d1c9..331e839 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -206,7 +206,7 @@ extern "C" {
immediately follow the structure. utf8_length and wstr_length can be found
in the length field; the utf8 pointer is equal to the data pointer. */
typedef struct {
- /* Unicode strings can be in 4 states:
+ /* There a 4 forms of Unicode strings:
- compact ascii:
@@ -227,7 +227,7 @@ typedef struct {
* ascii = 0
* utf8 != data
- - string created by the legacy API (not ready):
+ - legacy string, not ready:
* structure = PyUnicodeObject
* kind = PyUnicode_WCHAR_KIND
@@ -239,7 +239,7 @@ typedef struct {
* interned = SSTATE_NOT_INTERNED
* ascii = 0
- - string created by the legacy API, ready:
+ - legacy string, ready:
* structure = PyUnicodeObject structure
* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
@@ -249,10 +249,16 @@ typedef struct {
* data.any is not NULL
* utf8 = data if ascii is 1
- String created by the legacy API becomes ready when calling
- PyUnicode_READY().
+ Compact strings use only one memory block (structure + characters),
+ whereas legacy strings use one block for the structure and one block
+ for characters.
- See also _PyUnicode_CheckConsistency(). */
+ Legacy strings are created by PyUnicode_FromUnicode() and
+ PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
+ when PyUnicode_READY() is called.
+
+ See also _PyUnicode_CheckConsistency().
+ */
PyObject_HEAD
Py_ssize_t length; /* Number of code points in the string */
Py_hash_t hash; /* Hash value; -1 if not set */
@@ -721,19 +727,22 @@ PyAPI_FUNC(int) PyUnicode_WriteChar(
PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
#endif
-/* Resize an already allocated Unicode object to the new size length.
+/* Resize an Unicode object allocated by the legacy API (e.g.
+ PyUnicode_FromUnicode). Unicode objects allocated by the new API (e.g.
+ PyUnicode_New) cannot be resized by this function.
+
+ The length is a number of Py_UNICODE characters (and not the number of code
+ points).
*unicode is modified to point to the new (resized) object and 0
returned on success.
- This API may only be called by the function which also called the
- Unicode constructor. The refcount on the object must be 1. Otherwise,
- an error is returned.
+ If the refcount on the object is 1, the function resizes the string in
+ place, which is usually faster than allocating a new string (and copy
+ characters).
Error handling is implemented as follows: an exception is set, -1
- is returned and *unicode left untouched.
-
-*/
+ is returned and *unicode left untouched. */
PyAPI_FUNC(int) PyUnicode_Resize(
PyObject **unicode, /* Pointer to the Unicode object */