diff options
author | Walter Dörwald <walter@livinglogic.de> | 2009-04-25 14:05:52 (GMT) |
---|---|---|
committer | Walter Dörwald <walter@livinglogic.de> | 2009-04-25 14:05:52 (GMT) |
commit | 6c863d1ab2c86d6c67ea9162e75fca4a6f5f901a (patch) | |
tree | 3eae051be1547d7653072953333d6b5ed104dcff /Tools/unicode | |
parent | 22999a69e1e810757823abef87b744ad3686f908 (diff) | |
download | cpython-6c863d1ab2c86d6c67ea9162e75fca4a6f5f901a.zip cpython-6c863d1ab2c86d6c67ea9162e75fca4a6f5f901a.tar.gz cpython-6c863d1ab2c86d6c67ea9162e75fca4a6f5f901a.tar.bz2 |
Merged revisions 71894 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r71894 | walter.doerwald | 2009-04-25 16:03:16 +0200 (Sa, 25 Apr 2009) | 4 lines
Issue #5828 (Invalid behavior of unicode.lower): Fixed bogus logic in
makeunicodedata.py and regenerated the Unicode database (This fixes
u'\u1d79'.lower() == '\x00').
........
Diffstat (limited to 'Tools/unicode')
-rw-r--r-- | Tools/unicode/makeunicodedata.py | 43 |
1 files changed, 21 insertions, 22 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index 8ede83c..3cd5a1f 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -371,33 +371,32 @@ def makeunicodetype(unicode, trace): flags |= UPPER_MASK # use delta predictor for upper/lower/title if it fits if record[12]: - upper = int(record[12], 16) - char - if -32768 <= upper <= 32767 and delta: - upper = upper & 0xffff - else: - upper += char - delta = False + upper = int(record[12], 16) else: - upper = 0 + upper = char if record[13]: - lower = int(record[13], 16) - char - if -32768 <= lower <= 32767 and delta: - lower = lower & 0xffff - else: - lower += char - delta = False + lower = int(record[13], 16) else: - lower = 0 + lower = char if record[14]: - title = int(record[14], 16) - char - if -32768 <= lower <= 32767 and delta: - title = title & 0xffff - else: - title += char - delta = False + title = int(record[14], 16) + else: + # UCD.html says that a missing title char means that + # it defaults to the uppercase character, not to the + # character itself. Apparently, in the current UCD (5.x) + # this feature is never used + title = upper + upper_d = upper - char + lower_d = lower - char + title_d = title - char + if -32768 <= upper_d <= 32767 and \ + -32768 <= lower_d <= 32767 and \ + -32768 <= title_d <= 32767: + # use deltas + upper = upper_d & 0xffff + lower = lower_d & 0xffff + title = title_d & 0xffff else: - title = 0 - if not delta: flags |= NODELTA_MASK # decimal digit, integer digit decimal = 0 |