diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2001-01-19 19:45:02 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2001-01-19 19:45:02 (GMT) |
commit | 7c1e4bbe25d3f9d044daa7f24b182e8e40112ad3 (patch) | |
tree | 2761009a3a1c610352489c4ebd94259f1cee5f03 | |
parent | 5458fcf9c5f81e2a09235971f1232ccd5f26d4f1 (diff) | |
download | cpython-7c1e4bbe25d3f9d044daa7f24b182e8e40112ad3.zip cpython-7c1e4bbe25d3f9d044daa7f24b182e8e40112ad3.tar.gz cpython-7c1e4bbe25d3f9d044daa7f24b182e8e40112ad3.tar.bz2 |
gethash/cmpname both looked beyond the end of the character name.
This patch makes u"\N{x}" a bit less dependent on pure luck...
-rw-r--r-- | Modules/ucnhash.c | 30 |
1 files changed, 14 insertions, 16 deletions
diff --git a/Modules/ucnhash.c b/Modules/ucnhash.c index cc2a38e..d7b3f2c 100644 --- a/Modules/ucnhash.c +++ b/Modules/ucnhash.c @@ -11,18 +11,19 @@ /* database code (cut and pasted from the unidb package) */ static unsigned long -gethash(const char *s) +gethash(const char *s, int len) { + int i; unsigned long h = 0; - unsigned long i; - while (*s) { + unsigned long ix; + for (i = 0; i < len; i++) { /* magic value 47 was chosen to minimize the number of collisions for the uninames dataset. see the makeunicodedata script for more background */ - h = (h * 47) + (unsigned char) toupper(*s++); - i = h & 0xff000000; - if (i) - h = (h ^ ((i>>24) & 0xff)) & 0x00ffffff; + h = (h * 47) + (unsigned char) toupper(s[i]); + ix = h & 0xff000000; + if (ix) + h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff; } return h; } @@ -80,21 +81,18 @@ getname(Py_UCS4 code, char* buffer, int buflen) } static int -cmpname(int code, const char* name) +cmpname(int code, const char* name, int namelen) { /* check if code corresponds to the given name */ int i; char buffer[NAME_MAXLEN]; if (!getname(code, buffer, sizeof(buffer))) return 0; - i = 0; - for (;;) { + for (i = 0; i < namelen; i++) { if (toupper(name[i]) != buffer[i]) return 0; - if (!name[i] || !buffer[i]) - return 1; - i++; } + return buffer[namelen] == '\0'; } static int @@ -108,12 +106,12 @@ getcode(const char* name, int namelen, Py_UCS4* code) only minor changes. see the makeunicodedata script for more details */ - h = (unsigned int) gethash(name); + h = (unsigned int) gethash(name, namelen); i = (~h) & mask; v = code_hash[i]; if (!v) return 0; - if (cmpname(v, name)) { + if (cmpname(v, name, namelen)) { *code = v; return 1; } @@ -125,7 +123,7 @@ getcode(const char* name, int namelen, Py_UCS4* code) v = code_hash[i]; if (!v) return -1; - if (cmpname(v, name)) { + if (cmpname(v, name, namelen)) { *code = v; return 1; } |