diff options
author | James Gerity <snoopjedi@gmail.com> | 2023-09-20 05:07:47 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-20 05:07:47 (GMT) |
commit | def828995a35a289c9f03500903b5917df93465f (patch) | |
tree | 775572a1b8bcf3707b41aac832a5bfe456dc34f5 /Tools/unicode | |
parent | 1293fcc3c6b67b7e8d0081863ec6387e162341eb (diff) | |
download | cpython-def828995a35a289c9f03500903b5917df93465f.zip cpython-def828995a35a289c9f03500903b5917df93465f.tar.gz cpython-def828995a35a289c9f03500903b5917df93465f.tar.bz2 |
fixes gh-109559: Update `unicodedata` for Unicode 15.1.0 (GH-109560)
---------
Co-authored-by: Benjamin Peterson <benjamin@python.org>
Diffstat (limited to 'Tools/unicode')
-rw-r--r-- | Tools/unicode/makeunicodedata.py | 29 |
1 files changed, 17 insertions, 12 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index 034642d..6bf5274 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -44,7 +44,7 @@ VERSION = "3.3" # * Doc/library/stdtypes.rst, and # * Doc/library/unicodedata.rst # * Doc/reference/lexical_analysis.rst (two occurrences) -UNIDATA_VERSION = "15.0.0" +UNIDATA_VERSION = "15.1.0" UNICODE_DATA = "UnicodeData%s.txt" COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt" EASTASIAN_WIDTH = "EastAsianWidth%s.txt" @@ -101,15 +101,16 @@ EXTENDED_CASE_MASK = 0x4000 # these ranges need to match unicodedata.c:is_unified_ideograph cjk_ranges = [ - ('3400', '4DBF'), - ('4E00', '9FFF'), - ('20000', '2A6DF'), - ('2A700', '2B739'), - ('2B740', '2B81D'), - ('2B820', '2CEA1'), - ('2CEB0', '2EBE0'), - ('30000', '3134A'), - ('31350', '323AF'), + ('3400', '4DBF'), # CJK Ideograph Extension A CJK + ('4E00', '9FFF'), # CJK Ideograph + ('20000', '2A6DF'), # CJK Ideograph Extension B + ('2A700', '2B739'), # CJK Ideograph Extension C + ('2B740', '2B81D'), # CJK Ideograph Extension D + ('2B820', '2CEA1'), # CJK Ideograph Extension E + ('2CEB0', '2EBE0'), # CJK Ideograph Extension F + ('2EBF0', '2EE5D'), # CJK Ideograph Extension I + ('30000', '3134A'), # CJK Ideograph Extension G + ('31350', '323AF'), # CJK Ideograph Extension H ] @@ -1105,11 +1106,15 @@ class UnicodeData: table[i].east_asian_width = widths[i] self.widths = widths - for char, (p,) in UcdFile(DERIVED_CORE_PROPERTIES, version).expanded(): + for char, (propname, *propinfo) in UcdFile(DERIVED_CORE_PROPERTIES, version).expanded(): + if propinfo: + # this is not a binary property, ignore it + continue + if table[char]: # Some properties (e.g. Default_Ignorable_Code_Point) # apply to unassigned code points; ignore them - table[char].binary_properties.add(p) + table[char].binary_properties.add(propname) for char_range, value in UcdFile(LINE_BREAK, version): if value not in MANDATORY_LINE_BREAKS: |