summaryrefslogtreecommitdiffstats
path: root/Tools/unicode
diff options
context:
space:
mode:
authorWalter Dörwald <walter@livinglogic.de>2009-04-25 14:13:56 (GMT)
committerWalter Dörwald <walter@livinglogic.de>2009-04-25 14:13:56 (GMT)
commit1b08b30743ec823cc01a4efbe44b721986e5cb51 (patch)
tree3e5160e43fa38323ede3edc802b377cd28cdb03c /Tools/unicode
parent939f9c898a2835a600e346696e3e82fbfc5c27fa (diff)
downloadcpython-1b08b30743ec823cc01a4efbe44b721986e5cb51.zip
cpython-1b08b30743ec823cc01a4efbe44b721986e5cb51.tar.gz
cpython-1b08b30743ec823cc01a4efbe44b721986e5cb51.tar.bz2
Merged revisions 71894 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r71894 | walter.doerwald | 2009-04-25 16:03:16 +0200 (Sa, 25 Apr 2009) | 4 lines Issue #5828 (Invalid behavior of unicode.lower): Fixed bogus logic in makeunicodedata.py and regenerated the Unicode database (This fixes u'\u1d79'.lower() == '\x00'). ........
Diffstat (limited to 'Tools/unicode')
-rw-r--r--Tools/unicode/makeunicodedata.py43
1 files changed, 21 insertions, 22 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index 10fd991..930a0df 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -383,33 +383,32 @@ def makeunicodetype(unicode, trace):
flags |= XID_CONTINUE_MASK
# use delta predictor for upper/lower/title if it fits
if record[12]:
- upper = int(record[12], 16) - char
- if -32768 <= upper <= 32767 and delta:
- upper = upper & 0xffff
- else:
- upper += char
- delta = False
+ upper = int(record[12], 16)
else:
- upper = 0
+ upper = char
if record[13]:
- lower = int(record[13], 16) - char
- if -32768 <= lower <= 32767 and delta:
- lower = lower & 0xffff
- else:
- lower += char
- delta = False
+ lower = int(record[13], 16)
else:
- lower = 0
+ lower = char
if record[14]:
- title = int(record[14], 16) - char
- if -32768 <= lower <= 32767 and delta:
- title = title & 0xffff
- else:
- title += char
- delta = False
+ title = int(record[14], 16)
+ else:
+ # UCD.html says that a missing title char means that
+ # it defaults to the uppercase character, not to the
+ # character itself. Apparently, in the current UCD (5.x)
+ # this feature is never used
+ title = upper
+ upper_d = upper - char
+ lower_d = lower - char
+ title_d = title - char
+ if -32768 <= upper_d <= 32767 and \
+ -32768 <= lower_d <= 32767 and \
+ -32768 <= title_d <= 32767:
+ # use deltas
+ upper = upper_d & 0xffff
+ lower = lower_d & 0xffff
+ title = title_d & 0xffff
else:
- title = 0
- if not delta:
flags |= NODELTA_MASK
# decimal digit, integer digit
decimal = 0