summaryrefslogtreecommitdiffstats
path: root/Modules/ucnhash.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/ucnhash.c')
-rw-r--r--Modules/ucnhash.c196
1 files changed, 3 insertions, 193 deletions
diff --git a/Modules/ucnhash.c b/Modules/ucnhash.c
index bdcdab1..424b6c5 100644
--- a/Modules/ucnhash.c
+++ b/Modules/ucnhash.c
@@ -1,212 +1,22 @@
-/* unicode character name tables */
-/* rewritten for Python 2.1 by Fredrik Lundh (fredrik@pythonware.com) */
+/* obsolete -- remove this file! */
#include "Python.h"
-#include "ucnhash.h"
-
-/* data file generated by Tools/unicode/makeunicodedata.py */
-#include "unicodename_db.h"
-
-/* -------------------------------------------------------------------- */
-/* database code (cut and pasted from the unidb package) */
-
-static unsigned long
-gethash(const char *s, int len, int scale)
-{
- int i;
- unsigned long h = 0;
- unsigned long ix;
- for (i = 0; i < len; i++) {
- h = (h * scale) + (unsigned char) toupper(s[i]);
- ix = h & 0xff000000;
- if (ix)
- h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff;
- }
- return h;
-}
-
-static int
-getname(Py_UCS4 code, char* buffer, int buflen)
-{
- int offset;
- int i;
- int word;
- unsigned char* w;
-
- if (code < 0 || code >= 65536)
- return 0;
-
- /* get offset into phrasebook */
- offset = phrasebook_offset1[(code>>phrasebook_shift)];
- offset = phrasebook_offset2[(offset<<phrasebook_shift) +
- (code&((1<<phrasebook_shift)-1))];
- if (!offset)
- return 0;
-
- i = 0;
-
- for (;;) {
- /* get word index */
- word = phrasebook[offset] - phrasebook_short;
- if (word >= 0) {
- word = (word << 8) + phrasebook[offset+1];
- offset += 2;
- } else
- word = phrasebook[offset++];
- if (i) {
- if (i > buflen)
- return 0; /* buffer overflow */
- buffer[i++] = ' ';
- }
- /* copy word string from lexicon. the last character in the
- word has bit 7 set. the last word in a string ends with
- 0x80 */
- w = lexicon + lexicon_offset[word];
- while (*w < 128) {
- if (i >= buflen)
- return 0; /* buffer overflow */
- buffer[i++] = *w++;
- }
- if (i >= buflen)
- return 0; /* buffer overflow */
- buffer[i++] = *w & 127;
- if (*w == 128)
- break; /* end of word */
- }
-
- return 1;
-}
-
-static int
-cmpname(int code, const char* name, int namelen)
-{
- /* check if code corresponds to the given name */
- int i;
- char buffer[NAME_MAXLEN];
- if (!getname(code, buffer, sizeof(buffer)))
- return 0;
- for (i = 0; i < namelen; i++) {
- if (toupper(name[i]) != buffer[i])
- return 0;
- }
- return buffer[namelen] == '\0';
-}
-
-static int
-getcode(const char* name, int namelen, Py_UCS4* code)
-{
- unsigned int h, v;
- unsigned int mask = code_size-1;
- unsigned int i, incr;
-
- /* the following is the same as python's dictionary lookup, with
- only minor changes. see the makeunicodedata script for more
- details */
-
- h = (unsigned int) gethash(name, namelen, code_magic);
- i = (~h) & mask;
- v = code_hash[i];
- if (!v)
- return 0;
- if (cmpname(v, name, namelen)) {
- *code = v;
- return 1;
- }
- incr = (h ^ (h >> 3)) & mask;
- if (!incr)
- incr = mask;
- for (;;) {
- i = (i + incr) & mask;
- v = code_hash[i];
- if (!v)
- return -1;
- if (cmpname(v, name, namelen)) {
- *code = v;
- return 1;
- }
- incr = incr << 1;
- if (incr > mask)
- incr = incr ^ code_poly;
- }
-}
-
-static const _PyUnicode_Name_CAPI hashAPI =
-{
- sizeof(_PyUnicode_Name_CAPI),
- getname,
- getcode
-};
-
-/* -------------------------------------------------------------------- */
-/* Python bindings */
-
-static PyObject *
-ucnhash_getname(PyObject* self, PyObject* args)
-{
- char name[NAME_MAXLEN];
-
- int code;
- if (!PyArg_ParseTuple(args, "i", &code))
- return NULL;
-
- if (!getname((Py_UCS4) code, name, sizeof(name))) {
- PyErr_SetString(PyExc_ValueError, "undefined character code");
- return NULL;
- }
-
- return Py_BuildValue("s", name);
-}
-
-static PyObject *
-ucnhash_getcode(PyObject* self, PyObject* args)
-{
- Py_UCS4 code;
-
- char* name;
- int namelen;
- if (!PyArg_ParseTuple(args, "s#", &name, &namelen))
- return NULL;
-
- if (!getcode(name, namelen, &code)) {
- PyErr_SetString(PyExc_ValueError, "undefined character name");
- return NULL;
- }
-
- return Py_BuildValue("i", code);
-}
static
PyMethodDef ucnhash_methods[] =
{
- {"getname", ucnhash_getname, 1},
- {"getcode", ucnhash_getcode, 1},
{NULL, NULL},
};
-static char *ucnhash_docstring = "ucnhash hash function module";
-
+static char *ucnhash_docstring = "ucnhash hash function module (obsolete)";
-/* Create PyMethodObjects and register them in the module's dict */
DL_EXPORT(void)
initucnhash(void)
{
- PyObject *m, *d, *v;
-
- m = Py_InitModule4(
+ Py_InitModule4(
"ucnhash", /* Module name */
ucnhash_methods, /* Method list */
ucnhash_docstring, /* Module doc-string */
(PyObject *)NULL, /* always pass this as *self */
PYTHON_API_VERSION); /* API Version */
- if (!m)
- return;
-
- d = PyModule_GetDict(m);
- if (!d)
- return;
-
- /* Export C API */
- v = PyCObject_FromVoidPtr((void *) &hashAPI, NULL);
- PyDict_SetItemString(d, "Unicode_Names_CAPI", v);
- Py_XDECREF(v);
}