summaryrefslogtreecommitdiffstats
path: root/Doc
diff options
context:
space:
mode:
authorSkip Montanaro <skip@pobox.com>2005-10-10 22:03:12 (GMT)
committerSkip Montanaro <skip@pobox.com>2005-10-10 22:03:12 (GMT)
commit7bd3306d1a926d3dda91e7c48151c83eaeb37157 (patch)
tree5c0a432e41453eb171158e11ed889c7a22f6705b /Doc
parentb54ee3ddf8e67a2fe7f117db4fa4e119fbb28d6b (diff)
downloadcpython-7bd3306d1a926d3dda91e7c48151c83eaeb37157.zip
cpython-7bd3306d1a926d3dda91e7c48151c83eaeb37157.tar.gz
cpython-7bd3306d1a926d3dda91e7c48151c83eaeb37157.tar.bz2
backport Py_UNICODE clarification
Diffstat (limited to 'Doc')
-rw-r--r--Doc/api/concrete.tex22
1 files changed, 16 insertions, 6 deletions
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex
index 1b994a6..30deb03 100644
--- a/Doc/api/concrete.tex
+++ b/Doc/api/concrete.tex
@@ -787,14 +787,24 @@ These are the basic Unicode object types used for the Unicode
implementation in Python:
\begin{ctypedesc}{Py_UNICODE}
- This type represents a 16-bit unsigned storage type which is used by
- Python internally as basis for holding Unicode ordinals. On
- platforms where \ctype{wchar_t} is available and also has 16-bits,
- \ctype{Py_UNICODE} is a typedef alias for \ctype{wchar_t} to enhance
- native platform compatibility. On all other platforms,
- \ctype{Py_UNICODE} is a typedef alias for \ctype{unsigned short}.
+ This type represents the storage type which is used by Python
+ internally as basis for holding Unicode ordinals. Python's default
+ builds use a 16-bit type for \ctype{Py_UNICODE} and store Unicode
+ values internally as UCS2. It is also possible to build a UCS4
+ version of Python (most recent Linux distributions come with UCS4
+ builds of Python). These builds then use a 32-bit type for
+ \ctype{Py_UNICODE} and store Unicode data internally as UCS4. On
+ platforms where \ctype{wchar_t} is available and compatible with the
+ chosen Python Unicode build variant, \ctype{Py_UNICODE} is a typedef
+ alias for \ctype{wchar_t} to enhance native platform compatibility.
+ On all other platforms, \ctype{Py_UNICODE} is a typedef alias for
+ either \ctype{unsigned short} (UCS2) or \ctype{unsigned long}
+ (UCS4).
\end{ctypedesc}
+Note that UCS2 and UCS4 Python builds are not binary compatible.
+Please keep this in mind when writing extensions or interfaces.
+
\begin{ctypedesc}{PyUnicodeObject}
This subtype of \ctype{PyObject} represents a Python Unicode object.
\end{ctypedesc}