summaryrefslogtreecommitdiffstats
path: root/Modules/unicodedata.c
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2002-11-23 22:10:29 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2002-11-23 22:10:29 (GMT)
commit8d93ca138355a85a9dedc2abc225e984ea0e6d29 (patch)
treee8819fe313a90afaa2e35a7c20413343fbc869f3 /Modules/unicodedata.c
parent677bde2dd14ac2c8f170779adcc732f991db8bd6 (diff)
downloadcpython-8d93ca138355a85a9dedc2abc225e984ea0e6d29.zip
cpython-8d93ca138355a85a9dedc2abc225e984ea0e6d29.tar.gz
cpython-8d93ca138355a85a9dedc2abc225e984ea0e6d29.tar.bz2
Verify that the code in CJK UNIFIED IDEOGRAPH- actually denotes an ideograph.
Diffstat (limited to 'Modules/unicodedata.c')
-rw-r--r--Modules/unicodedata.c15
1 files changed, 12 insertions, 3 deletions
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 502b5bd..c5ba6fc 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -581,6 +581,15 @@ static char *hangul_syllables[][3] = {
};
static int
+is_unified_ideograph(Py_UCS4 code)
+{
+ return (
+ (0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
+ (0x4E00 <= code && code <= 0x9FA5) || /* CJK Ideograph */
+ (0x20000 <= code && code <= 0x2A6D6));/* CJK Ideograph Extension B */
+}
+
+static int
_getucname(Py_UCS4 code, char* buffer, int buflen)
{
int offset;
@@ -610,9 +619,7 @@ _getucname(Py_UCS4 code, char* buffer, int buflen)
return 1;
}
- if ((0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
- (0x4E00 <= code && code <= 0x9FA5) || /* CJK Ideograph */
- (0x20000 <= code && code <= 0x2A6D6)) {/* CJK Ideograph Extension B */
+ if (is_unified_ideograph(code)) {
if (buflen < 28)
/* Worst case: CJK UNIFIED IDEOGRAPH-20000 */
return 0;
@@ -743,6 +750,8 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
return 0;
name++;
}
+ if (!is_unified_ideograph(v))
+ return 0;
*code = v;
return 1;
}