summaryrefslogtreecommitdiffstats
path: root/Tools/unicode
diff options
context:
space:
mode:
authorJames Gerity <snoopjedi@gmail.com>2023-09-20 05:07:47 (GMT)
committerGitHub <noreply@github.com>2023-09-20 05:07:47 (GMT)
commitdef828995a35a289c9f03500903b5917df93465f (patch)
tree775572a1b8bcf3707b41aac832a5bfe456dc34f5 /Tools/unicode
parent1293fcc3c6b67b7e8d0081863ec6387e162341eb (diff)
downloadcpython-def828995a35a289c9f03500903b5917df93465f.zip
cpython-def828995a35a289c9f03500903b5917df93465f.tar.gz
cpython-def828995a35a289c9f03500903b5917df93465f.tar.bz2
fixes gh-109559: Update `unicodedata` for Unicode 15.1.0 (GH-109560)
--------- Co-authored-by: Benjamin Peterson <benjamin@python.org>
Diffstat (limited to 'Tools/unicode')
-rw-r--r--Tools/unicode/makeunicodedata.py29
1 files changed, 17 insertions, 12 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index 034642d..6bf5274 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -44,7 +44,7 @@ VERSION = "3.3"
# * Doc/library/stdtypes.rst, and
# * Doc/library/unicodedata.rst
# * Doc/reference/lexical_analysis.rst (two occurrences)
-UNIDATA_VERSION = "15.0.0"
+UNIDATA_VERSION = "15.1.0"
UNICODE_DATA = "UnicodeData%s.txt"
COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
@@ -101,15 +101,16 @@ EXTENDED_CASE_MASK = 0x4000
# these ranges need to match unicodedata.c:is_unified_ideograph
cjk_ranges = [
- ('3400', '4DBF'),
- ('4E00', '9FFF'),
- ('20000', '2A6DF'),
- ('2A700', '2B739'),
- ('2B740', '2B81D'),
- ('2B820', '2CEA1'),
- ('2CEB0', '2EBE0'),
- ('30000', '3134A'),
- ('31350', '323AF'),
+ ('3400', '4DBF'), # CJK Ideograph Extension A CJK
+ ('4E00', '9FFF'), # CJK Ideograph
+ ('20000', '2A6DF'), # CJK Ideograph Extension B
+ ('2A700', '2B739'), # CJK Ideograph Extension C
+ ('2B740', '2B81D'), # CJK Ideograph Extension D
+ ('2B820', '2CEA1'), # CJK Ideograph Extension E
+ ('2CEB0', '2EBE0'), # CJK Ideograph Extension F
+ ('2EBF0', '2EE5D'), # CJK Ideograph Extension I
+ ('30000', '3134A'), # CJK Ideograph Extension G
+ ('31350', '323AF'), # CJK Ideograph Extension H
]
@@ -1105,11 +1106,15 @@ class UnicodeData:
table[i].east_asian_width = widths[i]
self.widths = widths
- for char, (p,) in UcdFile(DERIVED_CORE_PROPERTIES, version).expanded():
+ for char, (propname, *propinfo) in UcdFile(DERIVED_CORE_PROPERTIES, version).expanded():
+ if propinfo:
+ # this is not a binary property, ignore it
+ continue
+
if table[char]:
# Some properties (e.g. Default_Ignorable_Code_Point)
# apply to unassigned code points; ignore them
- table[char].binary_properties.add(p)
+ table[char].binary_properties.add(propname)
for char_range, value in UcdFile(LINE_BREAK, version):
if value not in MANDATORY_LINE_BREAKS: