diff options
Diffstat (limited to 'Modules/regexmodule.c')
-rw-r--r-- | Modules/regexmodule.c | 382 |
1 files changed, 190 insertions, 192 deletions
diff --git a/Modules/regexmodule.c b/Modules/regexmodule.c index 33801af..9cbd4bf 100644 --- a/Modules/regexmodule.c +++ b/Modules/regexmodule.c @@ -31,24 +31,22 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. /* This uses Tatu Ylonen's copyleft-free reimplementation of GNU regular expressions */ -#include "allobjects.h" -#include "modsupport.h" +#include "Python.h" #include "regexpr.h" -#include <ctype.h> -static object *RegexError; /* Exception */ +static PyObject *RegexError; /* Exception */ typedef struct { - OB_HEAD + PyObject_HEAD struct re_pattern_buffer re_patbuf; /* The compiled expression */ struct re_registers re_regs; /* The registers from the last match */ char re_fastmap[256]; /* Storage for fastmap */ - object *re_translate; /* String object for translate table */ - object *re_lastok; /* String object last matched/searched */ - object *re_groupindex; /* Group name to index dictionary */ - object *re_givenpat; /* Pattern with symbolic groups */ - object *re_realpat; /* Pattern without symbolic groups */ + PyObject *re_translate; /* String object for translate table */ + PyObject *re_lastok; /* String object last matched/searched */ + PyObject *re_groupindex; /* Group name to index dictionary */ + PyObject *re_givenpat; /* Pattern with symbolic groups */ + PyObject *re_realpat; /* Pattern without symbolic groups */ } regexobject; /* Regex object methods */ @@ -57,99 +55,99 @@ static void reg_dealloc(re) regexobject *re; { - XDEL(re->re_patbuf.buffer); - XDECREF(re->re_translate); - XDECREF(re->re_lastok); - XDECREF(re->re_groupindex); - XDECREF(re->re_givenpat); - XDECREF(re->re_realpat); - DEL(re); + PyMem_XDEL(re->re_patbuf.buffer); + Py_XDECREF(re->re_translate); + Py_XDECREF(re->re_lastok); + Py_XDECREF(re->re_groupindex); + Py_XDECREF(re->re_givenpat); + Py_XDECREF(re->re_realpat); + PyMem_DEL(re); } -static object * +static PyObject * makeresult(regs) struct re_registers *regs; { - object *v = newtupleobject(RE_NREGS); + PyObject *v = PyTuple_New(RE_NREGS); if (v != NULL) { int i; for (i = 0; i < RE_NREGS; i++) { - object *w; - w = mkvalue("(ii)", regs->start[i], regs->end[i]); + PyObject *w; + w = Py_BuildValue("(ii)", regs->start[i], regs->end[i]); if (w == NULL) { - XDECREF(v); + Py_XDECREF(v); v = NULL; break; } - settupleitem(v, i, w); + PyTuple_SetItem(v, i, w); } } return v; } -static object * +static PyObject * reg_match(re, args) regexobject *re; - object *args; + PyObject *args; { - object *argstring; + PyObject *argstring; char *buffer; int size; int offset; int result; - if (getargs(args, "S", &argstring)) { + if (PyArg_Parse(args, "S", &argstring)) { offset = 0; } else { - err_clear(); - if (!getargs(args, "(Si)", &argstring, &offset)) + PyErr_Clear(); + if (!PyArg_Parse(args, "(Si)", &argstring, &offset)) return NULL; } - buffer = getstringvalue(argstring); - size = getstringsize(argstring); + buffer = PyString_AsString(argstring); + size = PyString_Size(argstring); if (offset < 0 || offset > size) { - err_setstr(RegexError, "match offset out of range"); + PyErr_SetString(RegexError, "match offset out of range"); return NULL; } - XDECREF(re->re_lastok); + Py_XDECREF(re->re_lastok); re->re_lastok = NULL; result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs); if (result < -1) { /* Failure like stack overflow */ - err_setstr(RegexError, "match failure"); + PyErr_SetString(RegexError, "match failure"); return NULL; } if (result >= 0) { - INCREF(argstring); + Py_INCREF(argstring); re->re_lastok = argstring; } - return newintobject((long)result); /* Length of the match or -1 */ + return PyInt_FromLong((long)result); /* Length of the match or -1 */ } -static object * +static PyObject * reg_search(re, args) regexobject *re; - object *args; + PyObject *args; { - object *argstring; + PyObject *argstring; char *buffer; int size; int offset; int range; int result; - if (getargs(args, "S", &argstring)) { + if (PyArg_Parse(args, "S", &argstring)) { offset = 0; } else { - err_clear(); - if (!getargs(args, "(Si)", &argstring, &offset)) + PyErr_Clear(); + if (!PyArg_Parse(args, "(Si)", &argstring, &offset)) return NULL; } - buffer = getstringvalue(argstring); - size = getstringsize(argstring); + buffer = PyString_AsString(argstring); + size = PyString_Size(argstring); if (offset < 0 || offset > size) { - err_setstr(RegexError, "search offset out of range"); + PyErr_SetString(RegexError, "search offset out of range"); return NULL; } /* NB: In Emacs 18.57, the documentation for re_search[_2] and @@ -157,159 +155,159 @@ reg_search(re, args) |range| positions are tried, while the code tries |range|+1 positions. It seems more productive to believe the code! */ range = size - offset; - XDECREF(re->re_lastok); + Py_XDECREF(re->re_lastok); re->re_lastok = NULL; result = re_search(&re->re_patbuf, buffer, size, offset, range, &re->re_regs); if (result < -1) { /* Failure like stack overflow */ - err_setstr(RegexError, "match failure"); + PyErr_SetString(RegexError, "match failure"); return NULL; } if (result >= 0) { - INCREF(argstring); + Py_INCREF(argstring); re->re_lastok = argstring; } - return newintobject((long)result); /* Position of the match or -1 */ + return PyInt_FromLong((long)result); /* Position of the match or -1 */ } -static object * +static PyObject * reg_group(re, args) regexobject *re; - object *args; + PyObject *args; { int i, a, b; - if (args != NULL && is_tupleobject(args)) { - int n = gettuplesize(args); - object *res = newtupleobject(n); + if (args != NULL && PyTuple_Check(args)) { + int n = PyTuple_Size(args); + PyObject *res = PyTuple_New(n); if (res == NULL) return NULL; for (i = 0; i < n; i++) { - object *v = reg_group(re, gettupleitem(args, i)); + PyObject *v = reg_group(re, PyTuple_GetItem(args, i)); if (v == NULL) { - DECREF(res); + Py_DECREF(res); return NULL; } - settupleitem(res, i, v); + PyTuple_SetItem(res, i, v); } return res; } - if (!getargs(args, "i", &i)) { - object *n; - err_clear(); - if (!getargs(args, "S", &n)) + if (!PyArg_Parse(args, "i", &i)) { + PyObject *n; + PyErr_Clear(); + if (!PyArg_Parse(args, "S", &n)) return NULL; else { - object *index; + PyObject *index; if (re->re_groupindex == NULL) index = NULL; else - index = mappinglookup(re->re_groupindex, n); + index = PyDict_GetItem(re->re_groupindex, n); if (index == NULL) { - err_setstr(RegexError, "group() group name doesn't exist"); + PyErr_SetString(RegexError, "group() group name doesn't exist"); return NULL; } - i = getintvalue(index); + i = PyInt_AsLong(index); } } if (i < 0 || i >= RE_NREGS) { - err_setstr(RegexError, "group() index out of range"); + PyErr_SetString(RegexError, "group() index out of range"); return NULL; } if (re->re_lastok == NULL) { - err_setstr(RegexError, + PyErr_SetString(RegexError, "group() only valid after successful match/search"); return NULL; } a = re->re_regs.start[i]; b = re->re_regs.end[i]; if (a < 0 || b < 0) { - INCREF(None); - return None; + Py_INCREF(Py_None); + return Py_None; } - return newsizedstringobject(getstringvalue(re->re_lastok)+a, b-a); + return PyString_FromStringAndSize(PyString_AsString(re->re_lastok)+a, b-a); } -static struct methodlist reg_methods[] = { - {"match", (method)reg_match}, - {"search", (method)reg_search}, - {"group", (method)reg_group}, +static struct PyMethodDef reg_methods[] = { + {"match", (PyCFunction)reg_match}, + {"search", (PyCFunction)reg_search}, + {"group", (PyCFunction)reg_group}, {NULL, NULL} /* sentinel */ }; -static object * +static PyObject * reg_getattr(re, name) regexobject *re; char *name; { if (strcmp(name, "regs") == 0) { if (re->re_lastok == NULL) { - INCREF(None); - return None; + Py_INCREF(Py_None); + return Py_None; } return makeresult(&re->re_regs); } if (strcmp(name, "last") == 0) { if (re->re_lastok == NULL) { - INCREF(None); - return None; + Py_INCREF(Py_None); + return Py_None; } - INCREF(re->re_lastok); + Py_INCREF(re->re_lastok); return re->re_lastok; } if (strcmp(name, "translate") == 0) { if (re->re_translate == NULL) { - INCREF(None); - return None; + Py_INCREF(Py_None); + return Py_None; } - INCREF(re->re_translate); + Py_INCREF(re->re_translate); return re->re_translate; } if (strcmp(name, "groupindex") == 0) { if (re->re_groupindex == NULL) { - INCREF(None); - return None; + Py_INCREF(Py_None); + return Py_None; } - INCREF(re->re_groupindex); + Py_INCREF(re->re_groupindex); return re->re_groupindex; } if (strcmp(name, "realpat") == 0) { if (re->re_realpat == NULL) { - INCREF(None); - return None; + Py_INCREF(Py_None); + return Py_None; } - INCREF(re->re_realpat); + Py_INCREF(re->re_realpat); return re->re_realpat; } if (strcmp(name, "givenpat") == 0) { if (re->re_givenpat == NULL) { - INCREF(None); - return None; + Py_INCREF(Py_None); + return Py_None; } - INCREF(re->re_givenpat); + Py_INCREF(re->re_givenpat); return re->re_givenpat; } if (strcmp(name, "__members__") == 0) { - object *list = newlistobject(6); + PyObject *list = PyList_New(6); if (list) { - setlistitem(list, 0, newstringobject("last")); - setlistitem(list, 1, newstringobject("regs")); - setlistitem(list, 2, newstringobject("translate")); - setlistitem(list, 3, newstringobject("groupindex")); - setlistitem(list, 4, newstringobject("realpat")); - setlistitem(list, 5, newstringobject("givenpat")); - if (err_occurred()) { - DECREF(list); + PyList_SetItem(list, 0, PyString_FromString("last")); + PyList_SetItem(list, 1, PyString_FromString("regs")); + PyList_SetItem(list, 2, PyString_FromString("translate")); + PyList_SetItem(list, 3, PyString_FromString("groupindex")); + PyList_SetItem(list, 4, PyString_FromString("realpat")); + PyList_SetItem(list, 5, PyString_FromString("givenpat")); + if (PyErr_Occurred()) { + Py_DECREF(list); list = NULL; } } return list; } - return findmethod(reg_methods, (object *)re, name); + return Py_FindMethod(reg_methods, (PyObject *)re, name); } -static typeobject Regextype = { - OB_HEAD_INIT(&Typetype) +static PyTypeObject Regextype = { + PyObject_HEAD_INIT(&PyType_Type) 0, /*ob_size*/ "regex", /*tp_name*/ sizeof(regexobject), /*tp_size*/ @@ -323,90 +321,90 @@ static typeobject Regextype = { 0, /*tp_repr*/ }; -static object * +static PyObject * newregexobject(pattern, translate, givenpat, groupindex) - object *pattern; - object *translate; - object *givenpat; - object *groupindex; + PyObject *pattern; + PyObject *translate; + PyObject *givenpat; + PyObject *groupindex; { regexobject *re; - char *pat = getstringvalue(pattern); - int size = getstringsize(pattern); + char *pat = PyString_AsString(pattern); + int size = PyString_Size(pattern); - if (translate != NULL && getstringsize(translate) != 256) { - err_setstr(RegexError, + if (translate != NULL && PyString_Size(translate) != 256) { + PyErr_SetString(RegexError, "translation table must be 256 bytes"); return NULL; } - re = NEWOBJ(regexobject, &Regextype); + re = PyObject_NEW(regexobject, &Regextype); if (re != NULL) { char *error; re->re_patbuf.buffer = NULL; re->re_patbuf.allocated = 0; re->re_patbuf.fastmap = re->re_fastmap; if (translate) - re->re_patbuf.translate = getstringvalue(translate); + re->re_patbuf.translate = PyString_AsString(translate); else re->re_patbuf.translate = NULL; - XINCREF(translate); + Py_XINCREF(translate); re->re_translate = translate; re->re_lastok = NULL; re->re_groupindex = groupindex; - INCREF(pattern); + Py_INCREF(pattern); re->re_realpat = pattern; - INCREF(givenpat); + Py_INCREF(givenpat); re->re_givenpat = givenpat; error = re_compile_pattern(pat, size, &re->re_patbuf); if (error != NULL) { - err_setstr(RegexError, error); - DECREF(re); + PyErr_SetString(RegexError, error); + Py_DECREF(re); re = NULL; } } - return (object *)re; + return (PyObject *)re; } -static object * +static PyObject * regex_compile(self, args) - object *self; - object *args; + PyObject *self; + PyObject *args; { - object *pat = NULL; - object *tran = NULL; - if (!getargs(args, "S", &pat)) { - err_clear(); - if (!getargs(args, "(SS)", &pat, &tran)) + PyObject *pat = NULL; + PyObject *tran = NULL; + if (!PyArg_Parse(args, "S", &pat)) { + PyErr_Clear(); + if (!PyArg_Parse(args, "(SS)", &pat, &tran)) return NULL; } return newregexobject(pat, tran, pat, NULL); } -static object * +static PyObject * symcomp(pattern, gdict) - object *pattern; - object *gdict; + PyObject *pattern; + PyObject *gdict; { - char *opat = getstringvalue(pattern); - char *oend = opat + getstringsize(pattern); + char *opat = PyString_AsString(pattern); + char *oend = opat + PyString_Size(pattern); int group_count = 0; int escaped = 0; char *o = opat; char *n; char name_buf[128]; char *g; - object *npattern; + PyObject *npattern; int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1; if (oend == opat) { - INCREF(pattern); + Py_INCREF(pattern); return pattern; } - npattern = newsizedstringobject((char*)NULL, getstringsize(pattern)); + npattern = PyString_FromStringAndSize((char*)NULL, PyString_Size(pattern)); if (npattern == NULL) return NULL; - n = getstringvalue(npattern); + n = PyString_AsString(npattern); while (o < oend) { if (*o == '(' && escaped == require_escape) { @@ -423,16 +421,16 @@ symcomp(pattern, gdict) g = name_buf; for (++o; o < oend;) { if (*o == '>') { - object *group_name = NULL; - object *group_index = NULL; + PyObject *group_name = NULL; + PyObject *group_index = NULL; *g++ = '\0'; - group_name = newstringobject(name_buf); - group_index = newintobject(group_count); + group_name = PyString_FromString(name_buf); + group_index = PyInt_FromLong(group_count); if (group_name == NULL || group_index == NULL - || mappinginsert(gdict, group_name, group_index) != 0) { - XDECREF(group_name); - XDECREF(group_index); - XDECREF(npattern); + || PyDict_SetItem(gdict, group_name, group_index) != 0) { + Py_XDECREF(group_name); + Py_XDECREF(group_index); + Py_XDECREF(npattern); return NULL; } ++o; /* eat the '>' */ @@ -468,7 +466,7 @@ symcomp(pattern, gdict) } } - if (resizestring(&npattern, n - getstringvalue(npattern)) == 0) + if (_PyString_Resize(&npattern, n - PyString_AsString(npattern)) == 0) return npattern; else { return NULL; @@ -476,89 +474,89 @@ symcomp(pattern, gdict) } -static object * +static PyObject * regex_symcomp(self, args) - object *self; - object *args; + PyObject *self; + PyObject *args; { - object *pattern; - object *tran = NULL; - object *gdict = NULL; - object *npattern; - if (!getargs(args, "S", &pattern)) { - err_clear(); - if (!getargs(args, "(SS)", &pattern, &tran)) + PyObject *pattern; + PyObject *tran = NULL; + PyObject *gdict = NULL; + PyObject *npattern; + if (!PyArg_Parse(args, "S", &pattern)) { + PyErr_Clear(); + if (!PyArg_Parse(args, "(SS)", &pattern, &tran)) return NULL; } - gdict = newmappingobject(); + gdict = PyDict_New(); if (gdict == NULL || (npattern = symcomp(pattern, gdict)) == NULL) { - DECREF(gdict); - DECREF(pattern); + Py_DECREF(gdict); + Py_DECREF(pattern); return NULL; } return newregexobject(npattern, tran, pattern, gdict); } -static object *cache_pat; -static object *cache_prog; +static PyObject *cache_pat; +static PyObject *cache_prog; static int update_cache(pat) - object *pat; + PyObject *pat; { if (pat != cache_pat) { - XDECREF(cache_pat); + Py_XDECREF(cache_pat); cache_pat = NULL; - XDECREF(cache_prog); - cache_prog = regex_compile((object *)NULL, pat); + Py_XDECREF(cache_prog); + cache_prog = regex_compile((PyObject *)NULL, pat); if (cache_prog == NULL) return -1; cache_pat = pat; - INCREF(cache_pat); + Py_INCREF(cache_pat); } return 0; } -static object * +static PyObject * regex_match(self, args) - object *self; - object *args; + PyObject *self; + PyObject *args; { - object *pat, *string; - if (!getargs(args, "(SS)", &pat, &string)) + PyObject *pat, *string; + if (!PyArg_Parse(args, "(SS)", &pat, &string)) return NULL; if (update_cache(pat) < 0) return NULL; return reg_match((regexobject *)cache_prog, string); } -static object * +static PyObject * regex_search(self, args) - object *self; - object *args; + PyObject *self; + PyObject *args; { - object *pat, *string; - if (!getargs(args, "(SS)", &pat, &string)) + PyObject *pat, *string; + if (!PyArg_Parse(args, "(SS)", &pat, &string)) return NULL; if (update_cache(pat) < 0) return NULL; return reg_search((regexobject *)cache_prog, string); } -static object * +static PyObject * regex_set_syntax(self, args) - object *self, *args; + PyObject *self, *args; { int syntax; - if (!getintarg(args, &syntax)) + if (!PyArg_Parse(args, "i", &syntax)) return NULL; syntax = re_set_syntax(syntax); - return newintobject((long)syntax); + return PyInt_FromLong((long)syntax); } -static struct methodlist regex_global_methods[] = { +static struct PyMethodDef regex_global_methods[] = { {"compile", regex_compile, 0}, {"symcomp", regex_symcomp, 0}, {"match", regex_match, 0}, @@ -569,28 +567,28 @@ static struct methodlist regex_global_methods[] = { initregex() { - object *m, *d, *v; + PyObject *m, *d, *v; - m = initmodule("regex", regex_global_methods); - d = getmoduledict(m); + m = Py_InitModule("regex", regex_global_methods); + d = PyModule_GetDict(m); /* Initialize regex.error exception */ - RegexError = newstringobject("regex.error"); - if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0) - fatal("can't define regex.error"); + RegexError = PyString_FromString("regex.error"); + if (RegexError == NULL || PyDict_SetItemString(d, "error", RegexError) != 0) + Py_FatalError("can't define regex.error"); /* Initialize regex.casefold constant */ - v = newsizedstringobject((char *)NULL, 256); + v = PyString_FromStringAndSize((char *)NULL, 256); if (v != NULL) { int i; - char *s = getstringvalue(v); + char *s = PyString_AsString(v); for (i = 0; i < 256; i++) { if (isupper(i)) s[i] = tolower(i); else s[i] = i; } - dictinsert(d, "casefold", v); - DECREF(v); + PyDict_SetItemString(d, "casefold", v); + Py_DECREF(v); } } |