Issue #19405: Fixed outdated comments in the _sre module.

author: Serhiy Storchaka <storchaka@gmail.com> 2013-10-27 06:07:46 (GMT)
committer: Serhiy Storchaka <storchaka@gmail.com> 2013-10-27 06:07:46 (GMT)
commit: 1985f7b133d2ff1f695354c50a09a7c859a1d5a4 (patch)
tree: b4b22575877c830ff8aba95d0875e9986e6cdb40
parent: b9dcfea092fa223de2a89c8eae6bb4e7dc2d8959 (diff)
parent: efa5a39fa594738d99dd8829400a9b7697d98b29 (diff)
download: cpython-1985f7b133d2ff1f695354c50a09a7c859a1d5a4.zip
cpython-1985f7b133d2ff1f695354c50a09a7c859a1d5a4.tar.gz
cpython-1985f7b133d2ff1f695354c50a09a7c859a1d5a4.tar.bz2
2 files changed, 6 insertions, 7 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index e194aaa..691659d 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -270,10 +270,10 @@ def _mk_bitmap(bits):
 # set is constructed. Then, this bitmap is sliced into chunks of 256
 # characters, duplicate chunks are eliminated, and each chunk is
 # given a number. In the compiled expression, the charset is
-# represented by a 16-bit word sequence, consisting of one word for
-# the number of different chunks, a sequence of 256 bytes (128 words)
+# represented by a 32-bit word sequence, consisting of one word for
+# the number of different chunks, a sequence of 256 bytes (64 words)
 # of chunk numbers indexed by their original chunk position, and a
-# sequence of chunks (16 words each).
+# sequence of 256-bit chunks (8 words each).
 
 # Compression is normally good: in a typical charset, large ranges of
 # Unicode will be either completely excluded (e.g. if only cyrillic
@@ -286,9 +286,9 @@ def _mk_bitmap(bits):
 # less significant byte is a bit index in the chunk (just like the
 # CHARSET matching).
 
-# In UCS-4 mode, the BIGCHARSET opcode still supports only subsets
+# The BIGCHARSET opcode still supports only subsets
 # of the basic multilingual plane; an efficient representation
-# for all of UTF-16 has not yet been developed. This means,
+# for all of Unicode has not yet been developed. This means,
 # in particular, that negated charsets cannot be represented as
 # bigcharsets.
 
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 9e79855..2d6961b 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -1348,8 +1348,7 @@ _compile(PyObject* self_, PyObject* args)
     \_________\_____/        /
                \____________/
 
-   It also helps that SRE_CODE is always an unsigned type, either 2 bytes or 4
-   bytes wide (the latter if Python is compiled for "wide" unicode support).
+   It also helps that SRE_CODE is always an unsigned type.
 */
 
 /* Defining this one enables tracing of the validator */
author	Serhiy Storchaka <storchaka@gmail.com>	2013-10-27 06:07:46 (GMT)
committer	Serhiy Storchaka <storchaka@gmail.com>	2013-10-27 06:07:46 (GMT)
commit	1985f7b133d2ff1f695354c50a09a7c859a1d5a4 (patch)
tree	b4b22575877c830ff8aba95d0875e9986e6cdb40
parent	b9dcfea092fa223de2a89c8eae6bb4e7dc2d8959 (diff)
parent	efa5a39fa594738d99dd8829400a9b7697d98b29 (diff)
download	cpython-1985f7b133d2ff1f695354c50a09a7c859a1d5a4.zip cpython-1985f7b133d2ff1f695354c50a09a7c859a1d5a4.tar.gz cpython-1985f7b133d2ff1f695354c50a09a7c859a1d5a4.tar.bz2