diff options
-rw-r--r-- | Modules/regexpr.c | 54 | ||||
-rw-r--r-- | Modules/regexpr.h | 12 | ||||
-rw-r--r-- | Modules/reopmodule.c | 131 |
3 files changed, 165 insertions, 32 deletions
diff --git a/Modules/regexpr.c b/Modules/regexpr.c index 6b6ccbef..2d30171 100644 --- a/Modules/regexpr.c +++ b/Modules/regexpr.c @@ -1,7 +1,3 @@ -/* - * -*- mode: c-mode; c-file-style: python -*- - */ - /* regexpr.c * * Author: Tatu Ylonen <ylo@ngs.fi> @@ -472,16 +468,15 @@ static int regexp_ansi_sequences; #define MAX_NESTING 100 /* max nesting level of operators */ #define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)] -#define Sword 1 -static char re_syntax_table[256]; +char re_syntax_table[256]; -static void re_compile_initialize(void) +void re_compile_initialize(void) { int a; static int syntax_table_inited = 0; - + if (!syntax_table_inited) { syntax_table_inited = 1; @@ -491,7 +486,11 @@ static void re_compile_initialize(void) for (a = 'A'; a <= 'Z'; a++) re_syntax_table[a] = Sword; for (a = '0'; a <= '9'; a++) - re_syntax_table[a] = Sword; + re_syntax_table[a] = Sword | Sdigit; + re_syntax_table['_'] = Sword; + for (a = 9; a <= 13; a++) + re_syntax_table[a] = Swhitespace; + re_syntax_table[' '] = Swhitespace; } re_compile_initialized = 1; for (a = 0; a < 256; a++) @@ -602,13 +601,12 @@ static void re_compile_fastmap_aux(char *code, return; /* we have already been here */ visited[pos] = 1; for (;;) - switch (code[pos++]) - { + switch (code[pos++]) { case Cend: - { - *can_be_null = 1; - return; - } + { + *can_be_null = 1; + return; + } case Cbol: case Cbegbuf: case Cendbuf: @@ -1609,9 +1607,6 @@ int re_match(regexp_t bufp, NEW_STATE(state, bufp->num_registers); - if (!re_compile_initialized) - re_compile_initialize(); - continue_matching: switch (*code++) { @@ -1883,11 +1878,11 @@ int re_match(regexp_t bufp, { if (text == textend) goto fail; - if (SYNTAX(*text) != Sword) + if (SYNTAX(*text) & Sword) goto fail; if (text == textstart) goto continue_matching; - if (SYNTAX(text[-1]) != Sword) + if (!(SYNTAX(text[-1]) & Sword)) goto continue_matching; goto fail; } @@ -1895,11 +1890,11 @@ int re_match(regexp_t bufp, { if (text == textstart) goto fail; - if (SYNTAX(text[-1]) != Sword) + if (!(SYNTAX(text[-1]) & Sword)) goto fail; if (text == textend) goto continue_matching; - if (SYNTAX(*text) == Sword) + if (SYNTAX(*text) & Sword) goto fail; goto continue_matching; } @@ -1910,7 +1905,7 @@ int re_match(regexp_t bufp, if (text == textstart || text == textend) goto continue_matching; - if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)) + if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)) goto continue_matching; goto fail; } @@ -1920,21 +1915,21 @@ int re_match(regexp_t bufp, * beginning and end of buffer. */ if (text == textstart || text == textend) goto fail; - if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))) + if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))) goto fail; goto continue_matching; } case Csyntaxspec: { NEXTCHAR(ch); - if (SYNTAX(ch) != (unsigned char)*code++) + if (!(SYNTAX(ch) & (unsigned char)*code++)) goto fail; goto continue_matching; } case Cnotsyntaxspec: { NEXTCHAR(ch); - if (SYNTAX(ch) != (unsigned char)*code++) + if (SYNTAX(ch) & (unsigned char)*code++) break; goto continue_matching; } @@ -2067,3 +2062,10 @@ int re_search(regexp_t bufp, } return -1; } + +/* +** Local Variables: +** mode: c +** c-file-style: "python" +** End: +*/ diff --git a/Modules/regexpr.h b/Modules/regexpr.h index 1221802..91f00b9 100644 --- a/Modules/regexpr.h +++ b/Modules/regexpr.h @@ -67,10 +67,16 @@ typedef struct re_registers #define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR) #define RE_SYNTAX_EMACS 0 +#define Sword 1 +#define Swhitespace 2 +#define Sdigit 4 + /* Rename all exported symbols to avoid conflicts with similarly named symbols in some systems' standard C libraries... */ #define re_syntax _Py_re_syntax +#define re_syntax_table _Py_re_syntax_table +#define re_compile_initialize _Py_re_compile_initialize #define re_set_syntax _Py_re_set_syntax #define re_compile_pattern _Py_re_compile_pattern #define re_match _Py_re_match @@ -85,6 +91,10 @@ extern int re_syntax; /* This is the actual syntax mask. It was added so that Python could do * syntax-dependent munging of patterns before compilation. */ +extern char re_syntax_table[256]; + +void re_compile_initialize(void); + int re_set_syntax(int syntax); /* This sets the syntax to use and returns the previous syntax. The * syntax is specified by a bit mask of the above defined bits. */ @@ -133,6 +143,8 @@ int re_exec(char *s); #else /* HAVE_PROTOTYPES */ extern int re_syntax; +extern char re_syntax_table[256]; +void re_compile_initialize(); int re_set_syntax(); char *re_compile_pattern(); int re_match(); diff --git a/Modules/reopmodule.c b/Modules/reopmodule.c index 9b928f5..0817626 100644 --- a/Modules/reopmodule.c +++ b/Modules/reopmodule.c @@ -43,6 +43,13 @@ PERFORMANCE OF THIS SOFTWARE. static PyObject *ReopError; /* Exception */ +#define IGNORECASE 0x01 +#define MULTILINE 0x02 +#define DOTALL 0x04 +#define VERBOSE 0x08 + +static char *reop_casefold; + static PyObject * makeresult(regs, num_regs) struct re_registers *regs; @@ -90,6 +97,10 @@ reop_match(self, args) int flags, pos, result; struct re_pattern_buffer bufp; struct re_registers re_regs; + PyObject *modules = NULL; + PyObject *reopmodule = NULL; + PyObject *reopdict = NULL; + PyObject *casefold = NULL; if (!PyArg_Parse(args, "(s#iiis#is#i)", &(bufp.buffer), &(bufp.allocated), @@ -102,20 +113,44 @@ reop_match(self, args) /* XXX sanity-check the input data */ bufp.used=bufp.allocated; - bufp.translate=NULL; + if (flags & IGNORECASE) + { + if ((modules = PyImport_GetModuleDict()) == NULL) + return NULL; + + if ((reopmodule = PyDict_GetItemString(modules, + "reop")) == NULL) + return NULL; + + if ((reopdict = PyModule_GetDict(reopmodule)) == NULL) + return NULL; + + if ((casefold = PyDict_GetItemString(reopdict, + "casefold")) == NULL) + return NULL; + + bufp.translate = PyString_AsString(casefold); + } + else + bufp.translate=NULL; bufp.fastmap_accurate=1; bufp.can_be_null=can_be_null; bufp.uses_registers=1; bufp.anchor=anchor; - for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;} + for(i=0; i<bufp.num_registers; i++) { + re_regs.start[i]=-1; + re_regs.end[i]=-1; + } result = re_match(&bufp, string, stringlen, pos, &re_regs); + if (result < -1) { /* Failure like stack overflow */ PyErr_SetString(ReopError, "match failure"); + return NULL; } if (result == -1) { @@ -136,6 +171,10 @@ reop_search(self, args) int flags, pos, result; struct re_pattern_buffer bufp; struct re_registers re_regs; + PyObject *modules = NULL; + PyObject *reopmodule = NULL; + PyObject *reopdict = NULL; + PyObject *casefold = NULL; if (!PyArg_Parse(args, "(s#iiis#is#i)", &(bufp.buffer), &(bufp.allocated), @@ -148,26 +187,51 @@ reop_search(self, args) /* XXX sanity-check the input data */ bufp.used=bufp.allocated; - bufp.translate=NULL; + if (flags & IGNORECASE) + { + if ((modules = PyImport_GetModuleDict()) == NULL) + return NULL; + + if ((reopmodule = PyDict_GetItemString(modules, + "reop")) == NULL) + return NULL; + + if ((reopdict = PyModule_GetDict(reopmodule)) == NULL) + return NULL; + + if ((casefold = PyDict_GetItemString(reopdict, + "casefold")) == NULL) + return NULL; + + bufp.translate = PyString_AsString(casefold); + } + else + bufp.translate=NULL; bufp.fastmap_accurate=1; bufp.can_be_null=can_be_null; bufp.uses_registers=1; bufp.anchor=anchor; - for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;} + for(i = 0; i < bufp.num_registers; i++) { + re_regs.start[i] = -1; + re_regs.end[i] = -1; + } result = re_search(&bufp, string, stringlen, pos, stringlen-pos, &re_regs); + if (result < -1) { /* Failure like stack overflow */ PyErr_SetString(ReopError, "match failure"); return NULL; } + if (result == -1) { Py_INCREF(Py_None); return Py_None; } + return makeresult(&re_regs, bufp.num_registers); } @@ -345,10 +409,13 @@ static struct PyMethodDef reop_global_methods[] = { void initreop() { - PyObject *m, *d, *v; + PyObject *m, *d, *k, *v, *o; int i; char *s; - + char j[2]; + + re_compile_initialize(); + m = Py_InitModule("reop", reop_global_methods); d = PyModule_GetDict(m); @@ -370,12 +437,64 @@ initreop() else s[i] = i; } + if (PyDict_SetItemString(d, "casefold", v) < 0) goto finally; Py_DECREF(v); + /* Initialize the syntax table */ + + o = PyDict_New(); + if (o == NULL) + goto finally; + + j[1] = '\0'; + for (i = 0; i < 256; i++) + { + j[0] = i; + k = PyString_FromStringAndSize(j, 1); + if (k == NULL) + goto finally; + v = PyInt_FromLong(re_syntax_table[i]); + if (v == NULL) + goto finally; + if (PyDict_SetItem(o, k, v) < 0) + goto finally; + Py_DECREF(k); + Py_DECREF(v); + } + + if (PyDict_SetItemString(d, "syntax_table", o) < 0) + goto finally; + Py_DECREF(o); + + v = PyInt_FromLong(Sword); + if (v == NULL) + goto finally; + + if (PyDict_SetItemString(d, "word", v) < 0) + goto finally; + Py_DECREF(v); + + v = PyInt_FromLong(Swhitespace); + if (v == NULL) + goto finally; + + if (PyDict_SetItemString(d, "whitespace", v) < 0) + goto finally; + Py_DECREF(v); + + v = PyInt_FromLong(Sdigit); + if (v == NULL) + goto finally; + + if (PyDict_SetItemString(d, "digit", v) < 0) + goto finally; + Py_DECREF(v); + if (!PyErr_Occurred()) return; + finally: Py_FatalError("can't initialize reop module"); } |