From 74fb3039972db03a97852eb5ded0c618e7c60d79 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 17 Jul 1997 22:41:38 +0000 Subject: Jeffrey's latests --- Modules/regexpr.c | 54 +++++++++++---------- Modules/regexpr.h | 12 +++++ Modules/reopmodule.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 165 insertions(+), 32 deletions(-) diff --git a/Modules/regexpr.c b/Modules/regexpr.c index 6b6ccbef..2d30171 100644 --- a/Modules/regexpr.c +++ b/Modules/regexpr.c @@ -1,7 +1,3 @@ -/* - * -*- mode: c-mode; c-file-style: python -*- - */ - /* regexpr.c * * Author: Tatu Ylonen @@ -472,16 +468,15 @@ static int regexp_ansi_sequences; #define MAX_NESTING 100 /* max nesting level of operators */ #define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)] -#define Sword 1 -static char re_syntax_table[256]; +char re_syntax_table[256]; -static void re_compile_initialize(void) +void re_compile_initialize(void) { int a; static int syntax_table_inited = 0; - + if (!syntax_table_inited) { syntax_table_inited = 1; @@ -491,7 +486,11 @@ static void re_compile_initialize(void) for (a = 'A'; a <= 'Z'; a++) re_syntax_table[a] = Sword; for (a = '0'; a <= '9'; a++) - re_syntax_table[a] = Sword; + re_syntax_table[a] = Sword | Sdigit; + re_syntax_table['_'] = Sword; + for (a = 9; a <= 13; a++) + re_syntax_table[a] = Swhitespace; + re_syntax_table[' '] = Swhitespace; } re_compile_initialized = 1; for (a = 0; a < 256; a++) @@ -602,13 +601,12 @@ static void re_compile_fastmap_aux(char *code, return; /* we have already been here */ visited[pos] = 1; for (;;) - switch (code[pos++]) - { + switch (code[pos++]) { case Cend: - { - *can_be_null = 1; - return; - } + { + *can_be_null = 1; + return; + } case Cbol: case Cbegbuf: case Cendbuf: @@ -1609,9 +1607,6 @@ int re_match(regexp_t bufp, NEW_STATE(state, bufp->num_registers); - if (!re_compile_initialized) - re_compile_initialize(); - continue_matching: switch (*code++) { @@ -1883,11 +1878,11 @@ int re_match(regexp_t bufp, { if (text == textend) goto fail; - if (SYNTAX(*text) != Sword) + if (SYNTAX(*text) & Sword) goto fail; if (text == textstart) goto continue_matching; - if (SYNTAX(text[-1]) != Sword) + if (!(SYNTAX(text[-1]) & Sword)) goto continue_matching; goto fail; } @@ -1895,11 +1890,11 @@ int re_match(regexp_t bufp, { if (text == textstart) goto fail; - if (SYNTAX(text[-1]) != Sword) + if (!(SYNTAX(text[-1]) & Sword)) goto fail; if (text == textend) goto continue_matching; - if (SYNTAX(*text) == Sword) + if (SYNTAX(*text) & Sword) goto fail; goto continue_matching; } @@ -1910,7 +1905,7 @@ int re_match(regexp_t bufp, if (text == textstart || text == textend) goto continue_matching; - if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)) + if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)) goto continue_matching; goto fail; } @@ -1920,21 +1915,21 @@ int re_match(regexp_t bufp, * beginning and end of buffer. */ if (text == textstart || text == textend) goto fail; - if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))) + if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))) goto fail; goto continue_matching; } case Csyntaxspec: { NEXTCHAR(ch); - if (SYNTAX(ch) != (unsigned char)*code++) + if (!(SYNTAX(ch) & (unsigned char)*code++)) goto fail; goto continue_matching; } case Cnotsyntaxspec: { NEXTCHAR(ch); - if (SYNTAX(ch) != (unsigned char)*code++) + if (SYNTAX(ch) & (unsigned char)*code++) break; goto continue_matching; } @@ -2067,3 +2062,10 @@ int re_search(regexp_t bufp, } return -1; } + +/* +** Local Variables: +** mode: c +** c-file-style: "python" +** End: +*/ diff --git a/Modules/regexpr.h b/Modules/regexpr.h index 1221802..91f00b9 100644 --- a/Modules/regexpr.h +++ b/Modules/regexpr.h @@ -67,10 +67,16 @@ typedef struct re_registers #define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR) #define RE_SYNTAX_EMACS 0 +#define Sword 1 +#define Swhitespace 2 +#define Sdigit 4 + /* Rename all exported symbols to avoid conflicts with similarly named symbols in some systems' standard C libraries... */ #define re_syntax _Py_re_syntax +#define re_syntax_table _Py_re_syntax_table +#define re_compile_initialize _Py_re_compile_initialize #define re_set_syntax _Py_re_set_syntax #define re_compile_pattern _Py_re_compile_pattern #define re_match _Py_re_match @@ -85,6 +91,10 @@ extern int re_syntax; /* This is the actual syntax mask. It was added so that Python could do * syntax-dependent munging of patterns before compilation. */ +extern char re_syntax_table[256]; + +void re_compile_initialize(void); + int re_set_syntax(int syntax); /* This sets the syntax to use and returns the previous syntax. The * syntax is specified by a bit mask of the above defined bits. */ @@ -133,6 +143,8 @@ int re_exec(char *s); #else /* HAVE_PROTOTYPES */ extern int re_syntax; +extern char re_syntax_table[256]; +void re_compile_initialize(); int re_set_syntax(); char *re_compile_pattern(); int re_match(); diff --git a/Modules/reopmodule.c b/Modules/reopmodule.c index 9b928f5..0817626 100644 --- a/Modules/reopmodule.c +++ b/Modules/reopmodule.c @@ -43,6 +43,13 @@ PERFORMANCE OF THIS SOFTWARE. static PyObject *ReopError; /* Exception */ +#define IGNORECASE 0x01 +#define MULTILINE 0x02 +#define DOTALL 0x04 +#define VERBOSE 0x08 + +static char *reop_casefold; + static PyObject * makeresult(regs, num_regs) struct re_registers *regs; @@ -90,6 +97,10 @@ reop_match(self, args) int flags, pos, result; struct re_pattern_buffer bufp; struct re_registers re_regs; + PyObject *modules = NULL; + PyObject *reopmodule = NULL; + PyObject *reopdict = NULL; + PyObject *casefold = NULL; if (!PyArg_Parse(args, "(s#iiis#is#i)", &(bufp.buffer), &(bufp.allocated), @@ -102,20 +113,44 @@ reop_match(self, args) /* XXX sanity-check the input data */ bufp.used=bufp.allocated; - bufp.translate=NULL; + if (flags & IGNORECASE) + { + if ((modules = PyImport_GetModuleDict()) == NULL) + return NULL; + + if ((reopmodule = PyDict_GetItemString(modules, + "reop")) == NULL) + return NULL; + + if ((reopdict = PyModule_GetDict(reopmodule)) == NULL) + return NULL; + + if ((casefold = PyDict_GetItemString(reopdict, + "casefold")) == NULL) + return NULL; + + bufp.translate = PyString_AsString(casefold); + } + else + bufp.translate=NULL; bufp.fastmap_accurate=1; bufp.can_be_null=can_be_null; bufp.uses_registers=1; bufp.anchor=anchor; - for(i=0; i