summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEzio Melotti <ezio.melotti@gmail.com>2012-10-05 00:34:02 (GMT)
committerEzio Melotti <ezio.melotti@gmail.com>2012-10-05 00:34:02 (GMT)
commit080a2c087e5fa08c44ff121d74ea8ad9d4413c58 (patch)
treef5bcece562167593f10ac72f539415f46d269d14
parentb176203ddab0671fb937c488492f4a535a37c639 (diff)
parente7f90375b175ed9e610a1de3b5c910f9271375ad (diff)
downloadcpython-080a2c087e5fa08c44ff121d74ea8ad9d4413c58.zip
cpython-080a2c087e5fa08c44ff121d74ea8ad9d4413c58.tar.gz
cpython-080a2c087e5fa08c44ff121d74ea8ad9d4413c58.tar.bz2
#16127: merge with 3.3.
-rw-r--r--Doc/c-api/unicode.rst2
-rw-r--r--Doc/reference/lexical_analysis.rst4
-rw-r--r--Include/unicodeobject.h3
-rw-r--r--Objects/unicodeobject.c14
4 files changed, 6 insertions, 17 deletions
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 0f7d2bb..2ac51df 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1083,8 +1083,6 @@ These are the UTF-32 codec APIs:
After completion, *\*byteorder* is set to the current byte order at the end
of input data.
- In a narrow build codepoints outside the BMP will be decoded as surrogate pairs.
-
If *byteorder* is *NULL*, the codec starts in native order mode.
Return *NULL* if an exception was raised by the codec.
diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst
index bab39f9..94f219b 100644
--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@@ -538,9 +538,7 @@ Notes:
this escape sequence. Exactly four hex digits are required.
(6)
- Any Unicode character can be encoded this way, but characters outside the Basic
- Multilingual Plane (BMP) will be encoded using a surrogate pair if Python is
- compiled to use 16-bit code units (the default). Exactly eight hex digits
+ Any Unicode character can be encoded this way. Exactly eight hex digits
are required.
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 956bdbd..4152dd7 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -1022,8 +1022,7 @@ PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
/* Create a Unicode Object from the given Unicode code point ordinal.
- The ordinal must be in range(0x10000) on narrow Python builds
- (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
+ The ordinal must be in range(0x110000). A ValueError is
raised in case it is not.
*/
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index c28d180..daeb4b4 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5800,18 +5800,12 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
void *data;
Py_ssize_t expandsize = 0;
- /* Initial allocation is based on the longest-possible unichr
+ /* Initial allocation is based on the longest-possible character
escape.
- In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source
- unichr, so in this case it's the longest unichr escape. In
- narrow (UTF-16) builds this is five chars per source unichr
- since there are two unichrs in the surrogate pair, so in narrow
- (UTF-16) builds it's not the longest unichr escape.
-
- In wide or narrow builds '\uxxxx' is 6 chars per source unichr,
- so in the narrow (UTF-16) build case it's the longest unichr
- escape.
+ For UCS1 strings it's '\xxx', 4 bytes per source character.
+ For UCS2 strings it's '\uxxxx', 6 bytes per source character.
+ For UCS4 strings it's '\U00xxxxxx', 10 bytes per source character.
*/
if (!PyUnicode_Check(unicode)) {