summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Modules/regexpr.c54
-rw-r--r--Modules/regexpr.h12
-rw-r--r--Modules/reopmodule.c131
3 files changed, 165 insertions, 32 deletions
diff --git a/Modules/regexpr.c b/Modules/regexpr.c
index 6b6ccbef..2d30171 100644
--- a/Modules/regexpr.c
+++ b/Modules/regexpr.c
@@ -1,7 +1,3 @@
-/*
- * -*- mode: c-mode; c-file-style: python -*-
- */
-
/* regexpr.c
*
* Author: Tatu Ylonen <ylo@ngs.fi>
@@ -472,16 +468,15 @@ static int regexp_ansi_sequences;
#define MAX_NESTING 100 /* max nesting level of operators */
#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
-#define Sword 1
-static char re_syntax_table[256];
+char re_syntax_table[256];
-static void re_compile_initialize(void)
+void re_compile_initialize(void)
{
int a;
static int syntax_table_inited = 0;
-
+
if (!syntax_table_inited)
{
syntax_table_inited = 1;
@@ -491,7 +486,11 @@ static void re_compile_initialize(void)
for (a = 'A'; a <= 'Z'; a++)
re_syntax_table[a] = Sword;
for (a = '0'; a <= '9'; a++)
- re_syntax_table[a] = Sword;
+ re_syntax_table[a] = Sword | Sdigit;
+ re_syntax_table['_'] = Sword;
+ for (a = 9; a <= 13; a++)
+ re_syntax_table[a] = Swhitespace;
+ re_syntax_table[' '] = Swhitespace;
}
re_compile_initialized = 1;
for (a = 0; a < 256; a++)
@@ -602,13 +601,12 @@ static void re_compile_fastmap_aux(char *code,
return; /* we have already been here */
visited[pos] = 1;
for (;;)
- switch (code[pos++])
- {
+ switch (code[pos++]) {
case Cend:
- {
- *can_be_null = 1;
- return;
- }
+ {
+ *can_be_null = 1;
+ return;
+ }
case Cbol:
case Cbegbuf:
case Cendbuf:
@@ -1609,9 +1607,6 @@ int re_match(regexp_t bufp,
NEW_STATE(state, bufp->num_registers);
- if (!re_compile_initialized)
- re_compile_initialize();
-
continue_matching:
switch (*code++)
{
@@ -1883,11 +1878,11 @@ int re_match(regexp_t bufp,
{
if (text == textend)
goto fail;
- if (SYNTAX(*text) != Sword)
+ if (SYNTAX(*text) & Sword)
goto fail;
if (text == textstart)
goto continue_matching;
- if (SYNTAX(text[-1]) != Sword)
+ if (!(SYNTAX(text[-1]) & Sword))
goto continue_matching;
goto fail;
}
@@ -1895,11 +1890,11 @@ int re_match(regexp_t bufp,
{
if (text == textstart)
goto fail;
- if (SYNTAX(text[-1]) != Sword)
+ if (!(SYNTAX(text[-1]) & Sword))
goto fail;
if (text == textend)
goto continue_matching;
- if (SYNTAX(*text) == Sword)
+ if (SYNTAX(*text) & Sword)
goto fail;
goto continue_matching;
}
@@ -1910,7 +1905,7 @@ int re_match(regexp_t bufp,
if (text == textstart || text == textend)
goto continue_matching;
- if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))
+ if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))
goto continue_matching;
goto fail;
}
@@ -1920,21 +1915,21 @@ int re_match(regexp_t bufp,
* beginning and end of buffer. */
if (text == textstart || text == textend)
goto fail;
- if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)))
+ if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)))
goto fail;
goto continue_matching;
}
case Csyntaxspec:
{
NEXTCHAR(ch);
- if (SYNTAX(ch) != (unsigned char)*code++)
+ if (!(SYNTAX(ch) & (unsigned char)*code++))
goto fail;
goto continue_matching;
}
case Cnotsyntaxspec:
{
NEXTCHAR(ch);
- if (SYNTAX(ch) != (unsigned char)*code++)
+ if (SYNTAX(ch) & (unsigned char)*code++)
break;
goto continue_matching;
}
@@ -2067,3 +2062,10 @@ int re_search(regexp_t bufp,
}
return -1;
}
+
+/*
+** Local Variables:
+** mode: c
+** c-file-style: "python"
+** End:
+*/
diff --git a/Modules/regexpr.h b/Modules/regexpr.h
index 1221802..91f00b9 100644
--- a/Modules/regexpr.h
+++ b/Modules/regexpr.h
@@ -67,10 +67,16 @@ typedef struct re_registers
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
#define RE_SYNTAX_EMACS 0
+#define Sword 1
+#define Swhitespace 2
+#define Sdigit 4
+
/* Rename all exported symbols to avoid conflicts with similarly named
symbols in some systems' standard C libraries... */
#define re_syntax _Py_re_syntax
+#define re_syntax_table _Py_re_syntax_table
+#define re_compile_initialize _Py_re_compile_initialize
#define re_set_syntax _Py_re_set_syntax
#define re_compile_pattern _Py_re_compile_pattern
#define re_match _Py_re_match
@@ -85,6 +91,10 @@ extern int re_syntax;
/* This is the actual syntax mask. It was added so that Python could do
* syntax-dependent munging of patterns before compilation. */
+extern char re_syntax_table[256];
+
+void re_compile_initialize(void);
+
int re_set_syntax(int syntax);
/* This sets the syntax to use and returns the previous syntax. The
* syntax is specified by a bit mask of the above defined bits. */
@@ -133,6 +143,8 @@ int re_exec(char *s);
#else /* HAVE_PROTOTYPES */
extern int re_syntax;
+extern char re_syntax_table[256];
+void re_compile_initialize();
int re_set_syntax();
char *re_compile_pattern();
int re_match();
diff --git a/Modules/reopmodule.c b/Modules/reopmodule.c
index 9b928f5..0817626 100644
--- a/Modules/reopmodule.c
+++ b/Modules/reopmodule.c
@@ -43,6 +43,13 @@ PERFORMANCE OF THIS SOFTWARE.
static PyObject *ReopError; /* Exception */
+#define IGNORECASE 0x01
+#define MULTILINE 0x02
+#define DOTALL 0x04
+#define VERBOSE 0x08
+
+static char *reop_casefold;
+
static PyObject *
makeresult(regs, num_regs)
struct re_registers *regs;
@@ -90,6 +97,10 @@ reop_match(self, args)
int flags, pos, result;
struct re_pattern_buffer bufp;
struct re_registers re_regs;
+ PyObject *modules = NULL;
+ PyObject *reopmodule = NULL;
+ PyObject *reopdict = NULL;
+ PyObject *casefold = NULL;
if (!PyArg_Parse(args, "(s#iiis#is#i)",
&(bufp.buffer), &(bufp.allocated),
@@ -102,20 +113,44 @@ reop_match(self, args)
/* XXX sanity-check the input data */
bufp.used=bufp.allocated;
- bufp.translate=NULL;
+ if (flags & IGNORECASE)
+ {
+ if ((modules = PyImport_GetModuleDict()) == NULL)
+ return NULL;
+
+ if ((reopmodule = PyDict_GetItemString(modules,
+ "reop")) == NULL)
+ return NULL;
+
+ if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
+ return NULL;
+
+ if ((casefold = PyDict_GetItemString(reopdict,
+ "casefold")) == NULL)
+ return NULL;
+
+ bufp.translate = PyString_AsString(casefold);
+ }
+ else
+ bufp.translate=NULL;
bufp.fastmap_accurate=1;
bufp.can_be_null=can_be_null;
bufp.uses_registers=1;
bufp.anchor=anchor;
- for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
+ for(i=0; i<bufp.num_registers; i++) {
+ re_regs.start[i]=-1;
+ re_regs.end[i]=-1;
+ }
result = re_match(&bufp,
string, stringlen, pos,
&re_regs);
+
if (result < -1) {
/* Failure like stack overflow */
PyErr_SetString(ReopError, "match failure");
+
return NULL;
}
if (result == -1) {
@@ -136,6 +171,10 @@ reop_search(self, args)
int flags, pos, result;
struct re_pattern_buffer bufp;
struct re_registers re_regs;
+ PyObject *modules = NULL;
+ PyObject *reopmodule = NULL;
+ PyObject *reopdict = NULL;
+ PyObject *casefold = NULL;
if (!PyArg_Parse(args, "(s#iiis#is#i)",
&(bufp.buffer), &(bufp.allocated),
@@ -148,26 +187,51 @@ reop_search(self, args)
/* XXX sanity-check the input data */
bufp.used=bufp.allocated;
- bufp.translate=NULL;
+ if (flags & IGNORECASE)
+ {
+ if ((modules = PyImport_GetModuleDict()) == NULL)
+ return NULL;
+
+ if ((reopmodule = PyDict_GetItemString(modules,
+ "reop")) == NULL)
+ return NULL;
+
+ if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
+ return NULL;
+
+ if ((casefold = PyDict_GetItemString(reopdict,
+ "casefold")) == NULL)
+ return NULL;
+
+ bufp.translate = PyString_AsString(casefold);
+ }
+ else
+ bufp.translate=NULL;
bufp.fastmap_accurate=1;
bufp.can_be_null=can_be_null;
bufp.uses_registers=1;
bufp.anchor=anchor;
- for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
+ for(i = 0; i < bufp.num_registers; i++) {
+ re_regs.start[i] = -1;
+ re_regs.end[i] = -1;
+ }
result = re_search(&bufp,
string, stringlen, pos, stringlen-pos,
&re_regs);
+
if (result < -1) {
/* Failure like stack overflow */
PyErr_SetString(ReopError, "match failure");
return NULL;
}
+
if (result == -1) {
Py_INCREF(Py_None);
return Py_None;
}
+
return makeresult(&re_regs, bufp.num_registers);
}
@@ -345,10 +409,13 @@ static struct PyMethodDef reop_global_methods[] = {
void
initreop()
{
- PyObject *m, *d, *v;
+ PyObject *m, *d, *k, *v, *o;
int i;
char *s;
-
+ char j[2];
+
+ re_compile_initialize();
+
m = Py_InitModule("reop", reop_global_methods);
d = PyModule_GetDict(m);
@@ -370,12 +437,64 @@ initreop()
else
s[i] = i;
}
+
if (PyDict_SetItemString(d, "casefold", v) < 0)
goto finally;
Py_DECREF(v);
+ /* Initialize the syntax table */
+
+ o = PyDict_New();
+ if (o == NULL)
+ goto finally;
+
+ j[1] = '\0';
+ for (i = 0; i < 256; i++)
+ {
+ j[0] = i;
+ k = PyString_FromStringAndSize(j, 1);
+ if (k == NULL)
+ goto finally;
+ v = PyInt_FromLong(re_syntax_table[i]);
+ if (v == NULL)
+ goto finally;
+ if (PyDict_SetItem(o, k, v) < 0)
+ goto finally;
+ Py_DECREF(k);
+ Py_DECREF(v);
+ }
+
+ if (PyDict_SetItemString(d, "syntax_table", o) < 0)
+ goto finally;
+ Py_DECREF(o);
+
+ v = PyInt_FromLong(Sword);
+ if (v == NULL)
+ goto finally;
+
+ if (PyDict_SetItemString(d, "word", v) < 0)
+ goto finally;
+ Py_DECREF(v);
+
+ v = PyInt_FromLong(Swhitespace);
+ if (v == NULL)
+ goto finally;
+
+ if (PyDict_SetItemString(d, "whitespace", v) < 0)
+ goto finally;
+ Py_DECREF(v);
+
+ v = PyInt_FromLong(Sdigit);
+ if (v == NULL)
+ goto finally;
+
+ if (PyDict_SetItemString(d, "digit", v) < 0)
+ goto finally;
+ Py_DECREF(v);
+
if (!PyErr_Occurred())
return;
+
finally:
Py_FatalError("can't initialize reop module");
}