summaryrefslogtreecommitdiffstats
path: root/Modules/soundex.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/soundex.c')
-rw-r--r--Modules/soundex.c141
1 files changed, 141 insertions, 0 deletions
diff --git a/Modules/soundex.c b/Modules/soundex.c
new file mode 100644
index 0000000..b481e1c
--- /dev/null
+++ b/Modules/soundex.c
@@ -0,0 +1,141 @@
+/*
+ $Header$
+
+ Perform soundex comparisons on strings.
+
+ Soundex is an algorithm that hashes English strings into numerical value.
+ Strings that sound the same are hashed to the same value. This allows
+ for non-literal string matching.
+
+ From: David Wayne Williams <dwwillia@iucf.indiana.edu>
+*/
+
+#include <string.h>
+#include <ctype.h>
+#include "Python.h"
+
+void soundex_hash(char *str, char *result)
+{
+ char *sptr = str; /* pointer into str */
+ char *rptr = result; /* pointer into result */
+
+ if(*str == NULL)
+ {
+ strcpy(result,"000000");
+ return;
+ }
+
+ /* Preserve the first character of the input string.
+ */
+ *(rptr++) = toupper(*(sptr++));
+
+ /* Translate the rest of the input string into result. The following
+ transformations are used:
+
+ 1) All vowles, W, and H, are skipped.
+
+ 2) BFPV = 1
+ CGJKQSXZ = 2
+ DT = 3
+ L = 4
+ MN = 5
+
+ 3) Only translate the first of adjacent equal translations. I.E.
+ remove duplicate digits.
+ */
+
+ for(;(rptr - result) < 6 && *sptr != NULL;sptr++)
+ {
+ switch (toupper(*sptr))
+ {
+ case 'W':
+ case 'H':
+ case 'A':
+ case 'I':
+ case 'O':
+ case 'U':
+ case 'Y':
+ break;
+ case 'B':
+ case 'F':
+ case 'P':
+ case 'V':
+ if(*(rptr - 1) != '1')
+ *(rptr++) = '1';
+ break;
+ case 'C':
+ case 'G':
+ case 'J':
+ case 'K':
+ case 'Q':
+ case 'S':
+ case 'X':
+ case 'Z':
+ if(*(rptr - 1) != '2')
+ *(rptr++) = '2';
+ break;
+ case 'D':
+ case 'T':
+ if(*(rptr - 1) != '3')
+ *(rptr++) = '3';
+ break;
+ case 'L':
+ if(*(rptr - 1) != '4')
+ *(rptr++) = '4';
+ break;
+ case 'M':
+ case 'N':
+ if(*(rptr - 1) != '5')
+ *(rptr++) = '5';
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Pad 0's on right side of string out to 6 characters.
+ */
+ for(; rptr < result + 6; rptr++)
+ *rptr = '0';
+
+ /* Terminate the result string.
+ */
+ *(result + 6) = NULL;
+}
+
+
+static PyObject *
+sound_similar(PyObject *self, PyObject *args)
+{
+ char *str1, *str2;
+ int return_value;
+ char res1[7], res2[7];
+
+ if(!PyArg_ParseTuple(args, "ss", &str1, &str2))
+ return NULL;
+
+ soundex_hash(str1, res1);
+ soundex_hash(str2, res2);
+
+ if(!strcmp(res1,res2))
+ return Py_BuildValue("i",1);
+ else
+ return Py_BuildValue("i",0);
+}
+
+/* Python Method Table.
+ */
+static PyMethodDef SoundexMethods[] =
+{
+ {"sound_similar", sound_similar, 1},
+ {NULL, NULL } /* sentinel */
+};
+
+
+/* Register the method table.
+ */
+void
+initsoundex()
+{
+ (void) Py_InitModule("soundex",SoundexMethods);
+}