diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2002-11-23 18:01:32 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2002-11-23 18:01:32 (GMT) |
commit | ef7fe2e8137824248cf45d316535b76dae302b5a (patch) | |
tree | 01a0b67fae5af758f8c8cc18459266756f4e1074 /Modules | |
parent | 8579efc86c18b7b824ec080582f032674e2f8a5e (diff) | |
download | cpython-ef7fe2e8137824248cf45d316535b76dae302b5a.zip cpython-ef7fe2e8137824248cf45d316535b76dae302b5a.tar.gz cpython-ef7fe2e8137824248cf45d316535b76dae302b5a.tar.bz2 |
Implement names for CJK unified ideographs. Add name to KeyError output.
Verify that the lookup for an existing name succeeds.
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/unicodedata.c | 40 |
1 files changed, 39 insertions, 1 deletions
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index 330b376..3620936 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -348,6 +348,16 @@ _getucname(Py_UCS4 code, char* buffer, int buflen) return 1; } + if ((0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */ + (0x4E00 <= code && code <= 0x9FA5) || /* CJK Ideograph */ + (0x20000 <= code && code <= 0x2A6D6)) {/* CJK Ideograph Extension B */ + if (buflen < 28) + /* Worst case: CJK UNIFIED IDEOGRAPH-20000 */ + return 0; + sprintf(buffer, "CJK UNIFIED IDEOGRAPH-%X", code); + return 1; + } + if (code >= 0x110000) return 0; @@ -449,6 +459,30 @@ _getcode(const char* name, int namelen, Py_UCS4* code) *code = SBase + (L*VCount+V)*TCount + T; return 1; } + /* Otherwise, it's an illegal syllable name. */ + return 0; + } + + /* Check for unified ideographs. */ + if (strncmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) { + /* Four or five hexdigits must follow. */ + v = 0; + name += 22; + namelen -= 22; + if (namelen != 4 && namelen != 5) + return 0; + while (namelen--) { + v *= 16; + if (*name >= '0' && *name <= '9') + v += *name - '0'; + else if (*name >= 'A' && *name <= 'F') + v += *name - 'A' + 10; + else + return 0; + name++; + } + *code = v; + return 1; } /* the following is the same as python's dictionary lookup, with @@ -535,7 +569,11 @@ unicodedata_lookup(PyObject* self, PyObject* args) return NULL; if (!_getcode(name, namelen, &code)) { - PyErr_SetString(PyExc_KeyError, "undefined character name"); + char fmt[] = "undefined character name '%s'"; + char *buf = PyMem_MALLOC(sizeof(fmt) + namelen); + sprintf(buf, fmt, name); + PyErr_SetString(PyExc_KeyError, buf); + PyMem_FREE(buf); return NULL; } |