From ee865c64da9892df7ebfe7863025e0122634e4ea Mon Sep 17 00:00:00 2001 From: Fredrik Lundh Date: Fri, 19 Jan 2001 11:00:42 +0000 Subject: added "getcode" and "getname" methods to the ucnhash module (they're probably more useful for the test code than for any applications, but one never knows...) --- Lib/test/test_ucn.py | 17 +++++++++++++ Modules/ucnhash.c | 71 ++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 75 insertions(+), 13 deletions(-) diff --git a/Lib/test/test_ucn.py b/Lib/test/test_ucn.py index a33d111..a51dc9a 100644 --- a/Lib/test/test_ucn.py +++ b/Lib/test/test_ucn.py @@ -37,6 +37,23 @@ s = u"\N{LATIN CAPITAL LETTER T}" \ u"\N{LATIN SMALL LETTER P}" \ u"\N{FULL STOP}" verify(s == u"The rEd fOx ate the sheep.", s) + +import ucnhash + +# minimal sanity check +for char in "SPAM": + name = "LATIN SMALL LETTER %s" % char + code = ucnhash.getcode(name) + verify(ucnhash.getname(code) == name) + +# loop over all characters in the database +for code in range(65536): + try: + name = ucnhash.getname(code) + verify(ucnhash.getcode(name) == code) + except ValueError: + pass + print "done." # misc. symbol testing diff --git a/Modules/ucnhash.c b/Modules/ucnhash.c index 67a8895..201707f 100644 --- a/Modules/ucnhash.c +++ b/Modules/ucnhash.c @@ -22,7 +22,6 @@ typedef struct { * Generated on: Fri Jul 14 08:00:58 2000 */ -#define cKeys 10538 #define k_cHashElements 18836 #define k_cchMaxKey 83 #define k_cKeys 10538 @@ -111,12 +110,6 @@ hash(const char *key, unsigned int cch) return ((unsigned long)(G[ f1(key, cch) ]) + (unsigned long)(G[ f2(key, cch) ]) ) % k_cHashElements; } -const _Py_UnicodeCharacterName * -getValue(unsigned long iKey) -{ - return (_Py_UnicodeCharacterName *) &aucn[iKey]; -} - static int mystrnicmp(const char *s1, const char *s2, size_t count) { @@ -136,22 +129,34 @@ mystrnicmp(const char *s1, const char *s2, size_t count) /* bindings for the new API */ static int -ucnhash_getname(Py_UCS4 code, char* buffer, int buflen) +getname(Py_UCS4 code, char* buffer, int buflen) { + int i; + + /* brute force search */ + for (i = 0; i < k_cKeys; i++) + if (aucn[i].value == code) { + int len = strlen(aucn[i].pszUCN); + if (buflen <= len) + return 0; + memcpy(buffer, aucn[i].pszUCN, len+1); + return 1; + } + return 0; } static int -ucnhash_getcode(const char* name, int namelen, Py_UCS4* code) +getcode(const char* name, int namelen, Py_UCS4* code) { unsigned long j; j = hash(name, namelen); - if (j > cKeys || mystrnicmp(name, getValue(j)->pszUCN, namelen) != 0) + if (j > k_cKeys || mystrnicmp(name, aucn[j].pszUCN, namelen) != 0) return 0; - *code = getValue(j)->value; + *code = aucn[j].value; return 1; } @@ -159,13 +164,53 @@ ucnhash_getcode(const char* name, int namelen, Py_UCS4* code) static const _PyUnicode_Name_CAPI hashAPI = { sizeof(_PyUnicode_Name_CAPI), - ucnhash_getname, - ucnhash_getcode + getname, + getcode }; +/* -------------------------------------------------------------------- */ +/* Python bindings */ + +static PyObject * +ucnhash_getname(PyObject* self, PyObject* args) +{ + char name[256]; + + int code; + if (!PyArg_ParseTuple(args, "i", &code)) + return NULL; + + if (!getname((Py_UCS4) code, name, sizeof(name))) { + PyErr_SetString(PyExc_ValueError, "undefined character code"); + return NULL; + } + + return Py_BuildValue("s", name); +} + +static PyObject * +ucnhash_getcode(PyObject* self, PyObject* args) +{ + Py_UCS4 code; + + char* name; + int namelen; + if (!PyArg_ParseTuple(args, "s#", &name, &namelen)) + return NULL; + + if (!getcode(name, namelen, &code)) { + PyErr_SetString(PyExc_ValueError, "undefined character name"); + return NULL; + } + + return Py_BuildValue("i", code); +} + static PyMethodDef ucnhash_methods[] = { + {"getname", ucnhash_getname, 1}, + {"getcode", ucnhash_getcode, 1}, {NULL, NULL}, }; -- cgit v0.12