summaryrefslogtreecommitdiffstats
path: root/Modules/unicodedata.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/unicodedata.c')
-rw-r--r--Modules/unicodedata.c40
1 files changed, 39 insertions, 1 deletions
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index 330b376..3620936 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -348,6 +348,16 @@ _getucname(Py_UCS4 code, char* buffer, int buflen)
return 1;
}
+ if ((0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
+ (0x4E00 <= code && code <= 0x9FA5) || /* CJK Ideograph */
+ (0x20000 <= code && code <= 0x2A6D6)) {/* CJK Ideograph Extension B */
+ if (buflen < 28)
+ /* Worst case: CJK UNIFIED IDEOGRAPH-20000 */
+ return 0;
+ sprintf(buffer, "CJK UNIFIED IDEOGRAPH-%X", code);
+ return 1;
+ }
+
if (code >= 0x110000)
return 0;
@@ -449,6 +459,30 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
*code = SBase + (L*VCount+V)*TCount + T;
return 1;
}
+ /* Otherwise, it's an illegal syllable name. */
+ return 0;
+ }
+
+ /* Check for unified ideographs. */
+ if (strncmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
+ /* Four or five hexdigits must follow. */
+ v = 0;
+ name += 22;
+ namelen -= 22;
+ if (namelen != 4 && namelen != 5)
+ return 0;
+ while (namelen--) {
+ v *= 16;
+ if (*name >= '0' && *name <= '9')
+ v += *name - '0';
+ else if (*name >= 'A' && *name <= 'F')
+ v += *name - 'A' + 10;
+ else
+ return 0;
+ name++;
+ }
+ *code = v;
+ return 1;
}
/* the following is the same as python's dictionary lookup, with
@@ -535,7 +569,11 @@ unicodedata_lookup(PyObject* self, PyObject* args)
return NULL;
if (!_getcode(name, namelen, &code)) {
- PyErr_SetString(PyExc_KeyError, "undefined character name");
+ char fmt[] = "undefined character name '%s'";
+ char *buf = PyMem_MALLOC(sizeof(fmt) + namelen);
+ sprintf(buf, fmt, name);
+ PyErr_SetString(PyExc_KeyError, buf);
+ PyMem_FREE(buf);
return NULL;
}