summaryrefslogtreecommitdiffstats
path: root/Modules/regexmodule.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/regexmodule.c')
-rw-r--r--Modules/regexmodule.c382
1 files changed, 190 insertions, 192 deletions
diff --git a/Modules/regexmodule.c b/Modules/regexmodule.c
index 33801af..9cbd4bf 100644
--- a/Modules/regexmodule.c
+++ b/Modules/regexmodule.c
@@ -31,24 +31,22 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
/* This uses Tatu Ylonen's copyleft-free reimplementation of
GNU regular expressions */
-#include "allobjects.h"
-#include "modsupport.h"
+#include "Python.h"
#include "regexpr.h"
-#include <ctype.h>
-static object *RegexError; /* Exception */
+static PyObject *RegexError; /* Exception */
typedef struct {
- OB_HEAD
+ PyObject_HEAD
struct re_pattern_buffer re_patbuf; /* The compiled expression */
struct re_registers re_regs; /* The registers from the last match */
char re_fastmap[256]; /* Storage for fastmap */
- object *re_translate; /* String object for translate table */
- object *re_lastok; /* String object last matched/searched */
- object *re_groupindex; /* Group name to index dictionary */
- object *re_givenpat; /* Pattern with symbolic groups */
- object *re_realpat; /* Pattern without symbolic groups */
+ PyObject *re_translate; /* String object for translate table */
+ PyObject *re_lastok; /* String object last matched/searched */
+ PyObject *re_groupindex; /* Group name to index dictionary */
+ PyObject *re_givenpat; /* Pattern with symbolic groups */
+ PyObject *re_realpat; /* Pattern without symbolic groups */
} regexobject;
/* Regex object methods */
@@ -57,99 +55,99 @@ static void
reg_dealloc(re)
regexobject *re;
{
- XDEL(re->re_patbuf.buffer);
- XDECREF(re->re_translate);
- XDECREF(re->re_lastok);
- XDECREF(re->re_groupindex);
- XDECREF(re->re_givenpat);
- XDECREF(re->re_realpat);
- DEL(re);
+ PyMem_XDEL(re->re_patbuf.buffer);
+ Py_XDECREF(re->re_translate);
+ Py_XDECREF(re->re_lastok);
+ Py_XDECREF(re->re_groupindex);
+ Py_XDECREF(re->re_givenpat);
+ Py_XDECREF(re->re_realpat);
+ PyMem_DEL(re);
}
-static object *
+static PyObject *
makeresult(regs)
struct re_registers *regs;
{
- object *v = newtupleobject(RE_NREGS);
+ PyObject *v = PyTuple_New(RE_NREGS);
if (v != NULL) {
int i;
for (i = 0; i < RE_NREGS; i++) {
- object *w;
- w = mkvalue("(ii)", regs->start[i], regs->end[i]);
+ PyObject *w;
+ w = Py_BuildValue("(ii)", regs->start[i], regs->end[i]);
if (w == NULL) {
- XDECREF(v);
+ Py_XDECREF(v);
v = NULL;
break;
}
- settupleitem(v, i, w);
+ PyTuple_SetItem(v, i, w);
}
}
return v;
}
-static object *
+static PyObject *
reg_match(re, args)
regexobject *re;
- object *args;
+ PyObject *args;
{
- object *argstring;
+ PyObject *argstring;
char *buffer;
int size;
int offset;
int result;
- if (getargs(args, "S", &argstring)) {
+ if (PyArg_Parse(args, "S", &argstring)) {
offset = 0;
}
else {
- err_clear();
- if (!getargs(args, "(Si)", &argstring, &offset))
+ PyErr_Clear();
+ if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
return NULL;
}
- buffer = getstringvalue(argstring);
- size = getstringsize(argstring);
+ buffer = PyString_AsString(argstring);
+ size = PyString_Size(argstring);
if (offset < 0 || offset > size) {
- err_setstr(RegexError, "match offset out of range");
+ PyErr_SetString(RegexError, "match offset out of range");
return NULL;
}
- XDECREF(re->re_lastok);
+ Py_XDECREF(re->re_lastok);
re->re_lastok = NULL;
result = re_match(&re->re_patbuf, buffer, size, offset, &re->re_regs);
if (result < -1) {
/* Failure like stack overflow */
- err_setstr(RegexError, "match failure");
+ PyErr_SetString(RegexError, "match failure");
return NULL;
}
if (result >= 0) {
- INCREF(argstring);
+ Py_INCREF(argstring);
re->re_lastok = argstring;
}
- return newintobject((long)result); /* Length of the match or -1 */
+ return PyInt_FromLong((long)result); /* Length of the match or -1 */
}
-static object *
+static PyObject *
reg_search(re, args)
regexobject *re;
- object *args;
+ PyObject *args;
{
- object *argstring;
+ PyObject *argstring;
char *buffer;
int size;
int offset;
int range;
int result;
- if (getargs(args, "S", &argstring)) {
+ if (PyArg_Parse(args, "S", &argstring)) {
offset = 0;
}
else {
- err_clear();
- if (!getargs(args, "(Si)", &argstring, &offset))
+ PyErr_Clear();
+ if (!PyArg_Parse(args, "(Si)", &argstring, &offset))
return NULL;
}
- buffer = getstringvalue(argstring);
- size = getstringsize(argstring);
+ buffer = PyString_AsString(argstring);
+ size = PyString_Size(argstring);
if (offset < 0 || offset > size) {
- err_setstr(RegexError, "search offset out of range");
+ PyErr_SetString(RegexError, "search offset out of range");
return NULL;
}
/* NB: In Emacs 18.57, the documentation for re_search[_2] and
@@ -157,159 +155,159 @@ reg_search(re, args)
|range| positions are tried, while the code tries |range|+1
positions. It seems more productive to believe the code! */
range = size - offset;
- XDECREF(re->re_lastok);
+ Py_XDECREF(re->re_lastok);
re->re_lastok = NULL;
result = re_search(&re->re_patbuf, buffer, size, offset, range,
&re->re_regs);
if (result < -1) {
/* Failure like stack overflow */
- err_setstr(RegexError, "match failure");
+ PyErr_SetString(RegexError, "match failure");
return NULL;
}
if (result >= 0) {
- INCREF(argstring);
+ Py_INCREF(argstring);
re->re_lastok = argstring;
}
- return newintobject((long)result); /* Position of the match or -1 */
+ return PyInt_FromLong((long)result); /* Position of the match or -1 */
}
-static object *
+static PyObject *
reg_group(re, args)
regexobject *re;
- object *args;
+ PyObject *args;
{
int i, a, b;
- if (args != NULL && is_tupleobject(args)) {
- int n = gettuplesize(args);
- object *res = newtupleobject(n);
+ if (args != NULL && PyTuple_Check(args)) {
+ int n = PyTuple_Size(args);
+ PyObject *res = PyTuple_New(n);
if (res == NULL)
return NULL;
for (i = 0; i < n; i++) {
- object *v = reg_group(re, gettupleitem(args, i));
+ PyObject *v = reg_group(re, PyTuple_GetItem(args, i));
if (v == NULL) {
- DECREF(res);
+ Py_DECREF(res);
return NULL;
}
- settupleitem(res, i, v);
+ PyTuple_SetItem(res, i, v);
}
return res;
}
- if (!getargs(args, "i", &i)) {
- object *n;
- err_clear();
- if (!getargs(args, "S", &n))
+ if (!PyArg_Parse(args, "i", &i)) {
+ PyObject *n;
+ PyErr_Clear();
+ if (!PyArg_Parse(args, "S", &n))
return NULL;
else {
- object *index;
+ PyObject *index;
if (re->re_groupindex == NULL)
index = NULL;
else
- index = mappinglookup(re->re_groupindex, n);
+ index = PyDict_GetItem(re->re_groupindex, n);
if (index == NULL) {
- err_setstr(RegexError, "group() group name doesn't exist");
+ PyErr_SetString(RegexError, "group() group name doesn't exist");
return NULL;
}
- i = getintvalue(index);
+ i = PyInt_AsLong(index);
}
}
if (i < 0 || i >= RE_NREGS) {
- err_setstr(RegexError, "group() index out of range");
+ PyErr_SetString(RegexError, "group() index out of range");
return NULL;
}
if (re->re_lastok == NULL) {
- err_setstr(RegexError,
+ PyErr_SetString(RegexError,
"group() only valid after successful match/search");
return NULL;
}
a = re->re_regs.start[i];
b = re->re_regs.end[i];
if (a < 0 || b < 0) {
- INCREF(None);
- return None;
+ Py_INCREF(Py_None);
+ return Py_None;
}
- return newsizedstringobject(getstringvalue(re->re_lastok)+a, b-a);
+ return PyString_FromStringAndSize(PyString_AsString(re->re_lastok)+a, b-a);
}
-static struct methodlist reg_methods[] = {
- {"match", (method)reg_match},
- {"search", (method)reg_search},
- {"group", (method)reg_group},
+static struct PyMethodDef reg_methods[] = {
+ {"match", (PyCFunction)reg_match},
+ {"search", (PyCFunction)reg_search},
+ {"group", (PyCFunction)reg_group},
{NULL, NULL} /* sentinel */
};
-static object *
+static PyObject *
reg_getattr(re, name)
regexobject *re;
char *name;
{
if (strcmp(name, "regs") == 0) {
if (re->re_lastok == NULL) {
- INCREF(None);
- return None;
+ Py_INCREF(Py_None);
+ return Py_None;
}
return makeresult(&re->re_regs);
}
if (strcmp(name, "last") == 0) {
if (re->re_lastok == NULL) {
- INCREF(None);
- return None;
+ Py_INCREF(Py_None);
+ return Py_None;
}
- INCREF(re->re_lastok);
+ Py_INCREF(re->re_lastok);
return re->re_lastok;
}
if (strcmp(name, "translate") == 0) {
if (re->re_translate == NULL) {
- INCREF(None);
- return None;
+ Py_INCREF(Py_None);
+ return Py_None;
}
- INCREF(re->re_translate);
+ Py_INCREF(re->re_translate);
return re->re_translate;
}
if (strcmp(name, "groupindex") == 0) {
if (re->re_groupindex == NULL) {
- INCREF(None);
- return None;
+ Py_INCREF(Py_None);
+ return Py_None;
}
- INCREF(re->re_groupindex);
+ Py_INCREF(re->re_groupindex);
return re->re_groupindex;
}
if (strcmp(name, "realpat") == 0) {
if (re->re_realpat == NULL) {
- INCREF(None);
- return None;
+ Py_INCREF(Py_None);
+ return Py_None;
}
- INCREF(re->re_realpat);
+ Py_INCREF(re->re_realpat);
return re->re_realpat;
}
if (strcmp(name, "givenpat") == 0) {
if (re->re_givenpat == NULL) {
- INCREF(None);
- return None;
+ Py_INCREF(Py_None);
+ return Py_None;
}
- INCREF(re->re_givenpat);
+ Py_INCREF(re->re_givenpat);
return re->re_givenpat;
}
if (strcmp(name, "__members__") == 0) {
- object *list = newlistobject(6);
+ PyObject *list = PyList_New(6);
if (list) {
- setlistitem(list, 0, newstringobject("last"));
- setlistitem(list, 1, newstringobject("regs"));
- setlistitem(list, 2, newstringobject("translate"));
- setlistitem(list, 3, newstringobject("groupindex"));
- setlistitem(list, 4, newstringobject("realpat"));
- setlistitem(list, 5, newstringobject("givenpat"));
- if (err_occurred()) {
- DECREF(list);
+ PyList_SetItem(list, 0, PyString_FromString("last"));
+ PyList_SetItem(list, 1, PyString_FromString("regs"));
+ PyList_SetItem(list, 2, PyString_FromString("translate"));
+ PyList_SetItem(list, 3, PyString_FromString("groupindex"));
+ PyList_SetItem(list, 4, PyString_FromString("realpat"));
+ PyList_SetItem(list, 5, PyString_FromString("givenpat"));
+ if (PyErr_Occurred()) {
+ Py_DECREF(list);
list = NULL;
}
}
return list;
}
- return findmethod(reg_methods, (object *)re, name);
+ return Py_FindMethod(reg_methods, (PyObject *)re, name);
}
-static typeobject Regextype = {
- OB_HEAD_INIT(&Typetype)
+static PyTypeObject Regextype = {
+ PyObject_HEAD_INIT(&PyType_Type)
0, /*ob_size*/
"regex", /*tp_name*/
sizeof(regexobject), /*tp_size*/
@@ -323,90 +321,90 @@ static typeobject Regextype = {
0, /*tp_repr*/
};
-static object *
+static PyObject *
newregexobject(pattern, translate, givenpat, groupindex)
- object *pattern;
- object *translate;
- object *givenpat;
- object *groupindex;
+ PyObject *pattern;
+ PyObject *translate;
+ PyObject *givenpat;
+ PyObject *groupindex;
{
regexobject *re;
- char *pat = getstringvalue(pattern);
- int size = getstringsize(pattern);
+ char *pat = PyString_AsString(pattern);
+ int size = PyString_Size(pattern);
- if (translate != NULL && getstringsize(translate) != 256) {
- err_setstr(RegexError,
+ if (translate != NULL && PyString_Size(translate) != 256) {
+ PyErr_SetString(RegexError,
"translation table must be 256 bytes");
return NULL;
}
- re = NEWOBJ(regexobject, &Regextype);
+ re = PyObject_NEW(regexobject, &Regextype);
if (re != NULL) {
char *error;
re->re_patbuf.buffer = NULL;
re->re_patbuf.allocated = 0;
re->re_patbuf.fastmap = re->re_fastmap;
if (translate)
- re->re_patbuf.translate = getstringvalue(translate);
+ re->re_patbuf.translate = PyString_AsString(translate);
else
re->re_patbuf.translate = NULL;
- XINCREF(translate);
+ Py_XINCREF(translate);
re->re_translate = translate;
re->re_lastok = NULL;
re->re_groupindex = groupindex;
- INCREF(pattern);
+ Py_INCREF(pattern);
re->re_realpat = pattern;
- INCREF(givenpat);
+ Py_INCREF(givenpat);
re->re_givenpat = givenpat;
error = re_compile_pattern(pat, size, &re->re_patbuf);
if (error != NULL) {
- err_setstr(RegexError, error);
- DECREF(re);
+ PyErr_SetString(RegexError, error);
+ Py_DECREF(re);
re = NULL;
}
}
- return (object *)re;
+ return (PyObject *)re;
}
-static object *
+static PyObject *
regex_compile(self, args)
- object *self;
- object *args;
+ PyObject *self;
+ PyObject *args;
{
- object *pat = NULL;
- object *tran = NULL;
- if (!getargs(args, "S", &pat)) {
- err_clear();
- if (!getargs(args, "(SS)", &pat, &tran))
+ PyObject *pat = NULL;
+ PyObject *tran = NULL;
+ if (!PyArg_Parse(args, "S", &pat)) {
+ PyErr_Clear();
+ if (!PyArg_Parse(args, "(SS)", &pat, &tran))
return NULL;
}
return newregexobject(pat, tran, pat, NULL);
}
-static object *
+static PyObject *
symcomp(pattern, gdict)
- object *pattern;
- object *gdict;
+ PyObject *pattern;
+ PyObject *gdict;
{
- char *opat = getstringvalue(pattern);
- char *oend = opat + getstringsize(pattern);
+ char *opat = PyString_AsString(pattern);
+ char *oend = opat + PyString_Size(pattern);
int group_count = 0;
int escaped = 0;
char *o = opat;
char *n;
char name_buf[128];
char *g;
- object *npattern;
+ PyObject *npattern;
int require_escape = re_syntax & RE_NO_BK_PARENS ? 0 : 1;
if (oend == opat) {
- INCREF(pattern);
+ Py_INCREF(pattern);
return pattern;
}
- npattern = newsizedstringobject((char*)NULL, getstringsize(pattern));
+ npattern = PyString_FromStringAndSize((char*)NULL, PyString_Size(pattern));
if (npattern == NULL)
return NULL;
- n = getstringvalue(npattern);
+ n = PyString_AsString(npattern);
while (o < oend) {
if (*o == '(' && escaped == require_escape) {
@@ -423,16 +421,16 @@ symcomp(pattern, gdict)
g = name_buf;
for (++o; o < oend;) {
if (*o == '>') {
- object *group_name = NULL;
- object *group_index = NULL;
+ PyObject *group_name = NULL;
+ PyObject *group_index = NULL;
*g++ = '\0';
- group_name = newstringobject(name_buf);
- group_index = newintobject(group_count);
+ group_name = PyString_FromString(name_buf);
+ group_index = PyInt_FromLong(group_count);
if (group_name == NULL || group_index == NULL
- || mappinginsert(gdict, group_name, group_index) != 0) {
- XDECREF(group_name);
- XDECREF(group_index);
- XDECREF(npattern);
+ || PyDict_SetItem(gdict, group_name, group_index) != 0) {
+ Py_XDECREF(group_name);
+ Py_XDECREF(group_index);
+ Py_XDECREF(npattern);
return NULL;
}
++o; /* eat the '>' */
@@ -468,7 +466,7 @@ symcomp(pattern, gdict)
}
}
- if (resizestring(&npattern, n - getstringvalue(npattern)) == 0)
+ if (_PyString_Resize(&npattern, n - PyString_AsString(npattern)) == 0)
return npattern;
else {
return NULL;
@@ -476,89 +474,89 @@ symcomp(pattern, gdict)
}
-static object *
+static PyObject *
regex_symcomp(self, args)
- object *self;
- object *args;
+ PyObject *self;
+ PyObject *args;
{
- object *pattern;
- object *tran = NULL;
- object *gdict = NULL;
- object *npattern;
- if (!getargs(args, "S", &pattern)) {
- err_clear();
- if (!getargs(args, "(SS)", &pattern, &tran))
+ PyObject *pattern;
+ PyObject *tran = NULL;
+ PyObject *gdict = NULL;
+ PyObject *npattern;
+ if (!PyArg_Parse(args, "S", &pattern)) {
+ PyErr_Clear();
+ if (!PyArg_Parse(args, "(SS)", &pattern, &tran))
return NULL;
}
- gdict = newmappingobject();
+ gdict = PyDict_New();
if (gdict == NULL
|| (npattern = symcomp(pattern, gdict)) == NULL) {
- DECREF(gdict);
- DECREF(pattern);
+ Py_DECREF(gdict);
+ Py_DECREF(pattern);
return NULL;
}
return newregexobject(npattern, tran, pattern, gdict);
}
-static object *cache_pat;
-static object *cache_prog;
+static PyObject *cache_pat;
+static PyObject *cache_prog;
static int
update_cache(pat)
- object *pat;
+ PyObject *pat;
{
if (pat != cache_pat) {
- XDECREF(cache_pat);
+ Py_XDECREF(cache_pat);
cache_pat = NULL;
- XDECREF(cache_prog);
- cache_prog = regex_compile((object *)NULL, pat);
+ Py_XDECREF(cache_prog);
+ cache_prog = regex_compile((PyObject *)NULL, pat);
if (cache_prog == NULL)
return -1;
cache_pat = pat;
- INCREF(cache_pat);
+ Py_INCREF(cache_pat);
}
return 0;
}
-static object *
+static PyObject *
regex_match(self, args)
- object *self;
- object *args;
+ PyObject *self;
+ PyObject *args;
{
- object *pat, *string;
- if (!getargs(args, "(SS)", &pat, &string))
+ PyObject *pat, *string;
+ if (!PyArg_Parse(args, "(SS)", &pat, &string))
return NULL;
if (update_cache(pat) < 0)
return NULL;
return reg_match((regexobject *)cache_prog, string);
}
-static object *
+static PyObject *
regex_search(self, args)
- object *self;
- object *args;
+ PyObject *self;
+ PyObject *args;
{
- object *pat, *string;
- if (!getargs(args, "(SS)", &pat, &string))
+ PyObject *pat, *string;
+ if (!PyArg_Parse(args, "(SS)", &pat, &string))
return NULL;
if (update_cache(pat) < 0)
return NULL;
return reg_search((regexobject *)cache_prog, string);
}
-static object *
+static PyObject *
regex_set_syntax(self, args)
- object *self, *args;
+ PyObject *self, *args;
{
int syntax;
- if (!getintarg(args, &syntax))
+ if (!PyArg_Parse(args, "i", &syntax))
return NULL;
syntax = re_set_syntax(syntax);
- return newintobject((long)syntax);
+ return PyInt_FromLong((long)syntax);
}
-static struct methodlist regex_global_methods[] = {
+static struct PyMethodDef regex_global_methods[] = {
{"compile", regex_compile, 0},
{"symcomp", regex_symcomp, 0},
{"match", regex_match, 0},
@@ -569,28 +567,28 @@ static struct methodlist regex_global_methods[] = {
initregex()
{
- object *m, *d, *v;
+ PyObject *m, *d, *v;
- m = initmodule("regex", regex_global_methods);
- d = getmoduledict(m);
+ m = Py_InitModule("regex", regex_global_methods);
+ d = PyModule_GetDict(m);
/* Initialize regex.error exception */
- RegexError = newstringobject("regex.error");
- if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0)
- fatal("can't define regex.error");
+ RegexError = PyString_FromString("regex.error");
+ if (RegexError == NULL || PyDict_SetItemString(d, "error", RegexError) != 0)
+ Py_FatalError("can't define regex.error");
/* Initialize regex.casefold constant */
- v = newsizedstringobject((char *)NULL, 256);
+ v = PyString_FromStringAndSize((char *)NULL, 256);
if (v != NULL) {
int i;
- char *s = getstringvalue(v);
+ char *s = PyString_AsString(v);
for (i = 0; i < 256; i++) {
if (isupper(i))
s[i] = tolower(i);
else
s[i] = i;
}
- dictinsert(d, "casefold", v);
- DECREF(v);
+ PyDict_SetItemString(d, "casefold", v);
+ Py_DECREF(v);
}
}