diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2013-10-27 06:07:46 (GMT) |
---|---|---|
committer | Serhiy Storchaka <storchaka@gmail.com> | 2013-10-27 06:07:46 (GMT) |
commit | 1985f7b133d2ff1f695354c50a09a7c859a1d5a4 (patch) | |
tree | b4b22575877c830ff8aba95d0875e9986e6cdb40 /Lib | |
parent | b9dcfea092fa223de2a89c8eae6bb4e7dc2d8959 (diff) | |
parent | efa5a39fa594738d99dd8829400a9b7697d98b29 (diff) | |
download | cpython-1985f7b133d2ff1f695354c50a09a7c859a1d5a4.zip cpython-1985f7b133d2ff1f695354c50a09a7c859a1d5a4.tar.gz cpython-1985f7b133d2ff1f695354c50a09a7c859a1d5a4.tar.bz2 |
Issue #19405: Fixed outdated comments in the _sre module.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/sre_compile.py | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index e194aaa..691659d 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -270,10 +270,10 @@ def _mk_bitmap(bits): # set is constructed. Then, this bitmap is sliced into chunks of 256 # characters, duplicate chunks are eliminated, and each chunk is # given a number. In the compiled expression, the charset is -# represented by a 16-bit word sequence, consisting of one word for -# the number of different chunks, a sequence of 256 bytes (128 words) +# represented by a 32-bit word sequence, consisting of one word for +# the number of different chunks, a sequence of 256 bytes (64 words) # of chunk numbers indexed by their original chunk position, and a -# sequence of chunks (16 words each). +# sequence of 256-bit chunks (8 words each). # Compression is normally good: in a typical charset, large ranges of # Unicode will be either completely excluded (e.g. if only cyrillic @@ -286,9 +286,9 @@ def _mk_bitmap(bits): # less significant byte is a bit index in the chunk (just like the # CHARSET matching). -# In UCS-4 mode, the BIGCHARSET opcode still supports only subsets +# The BIGCHARSET opcode still supports only subsets # of the basic multilingual plane; an efficient representation -# for all of UTF-16 has not yet been developed. This means, +# for all of Unicode has not yet been developed. This means, # in particular, that negated charsets cannot be represented as # bigcharsets. |