diff options
author | Guido van Rossum <guido@python.org> | 1991-12-30 01:42:57 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1991-12-30 01:42:57 (GMT) |
commit | 6f4c43d4fd65076e81605622e39fa85a883a1872 (patch) | |
tree | c9144fc96871d00f36dfef1ece266e33b25a3993 /Modules/regexmodule.c | |
parent | 7e73fd0024f82c9ecce2e75da3af116ffa760993 (diff) | |
download | cpython-6f4c43d4fd65076e81605622e39fa85a883a1872.zip cpython-6f4c43d4fd65076e81605622e39fa85a883a1872.tar.gz cpython-6f4c43d4fd65076e81605622e39fa85a883a1872.tar.bz2 |
Initial revision
Diffstat (limited to 'Modules/regexmodule.c')
-rw-r--r-- | Modules/regexmodule.c | 306 |
1 files changed, 306 insertions, 0 deletions
diff --git a/Modules/regexmodule.c b/Modules/regexmodule.c new file mode 100644 index 0000000..5e82832 --- /dev/null +++ b/Modules/regexmodule.c @@ -0,0 +1,306 @@ +/* +XXX support translate table +XXX support range parameter on search +XXX support mstop parameter on search +*/ + +/*********************************************************** +Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The +Netherlands. + + All Rights Reserved + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the names of Stichting Mathematisch +Centrum or CWI not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior permission. + +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE +FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +******************************************************************/ + +/* Regular expression objects */ +/* This uses GNU regex.c, from subdirectory regex !!! */ + +#include "allobjects.h" +#include "modsupport.h" + +#include "regex.h" + +static object *RegexError; /* Exception */ + +typedef struct { + OB_HEAD + object *re_string; /* The string (for printing) */ + struct re_pattern_buffer re_patbuf; /* The compiled expression */ + struct re_registers re_regs; /* The registers from the last match */ + int re_regs_valid; /* Nonzero if the registers are valid */ + char re_fastmap[256]; /* Storage for fastmap */ +} regexobject; + +/* Regex object methods */ + +static void +reg_dealloc(re) + regexobject *re; +{ + XDECREF(re->re_string); + XDEL(re->re_patbuf.buffer); + XDEL(re->re_patbuf.translate); + DEL(re); +} + +static object * +makeresult(regs) + struct re_registers *regs; +{ + object *v = newtupleobject(RE_NREGS); + if (v != NULL) { + int i; + for (i = 0; i < RE_NREGS; i++) { + object *w, *u; + if ( (w = newtupleobject(2)) == NULL || + (u = newintobject(regs->start[i])) == NULL || + settupleitem(w, 0, u) != 0 || + (u = newintobject(regs->end[i])) == NULL || + settupleitem(w, 1, u) != 0) { + XDECREF(w); + DECREF(v); + return NULL; + } + settupleitem(v, i, w); + } + } + return v; +} + +static object * +reg_match(re, args) + regexobject *re; + object *args; +{ + object *v; + char *buffer; + int offset; + int result; + if (args != NULL && is_stringobject(args)) { + v = args; + offset = 0; + } + else if (!getstrintarg(args, &v, &offset)) + return NULL; + buffer = getstringvalue(v); + re->re_regs_valid = 0; + result = re_match(&re->re_patbuf, buffer, getstringsize(v), + offset, &re->re_regs); + if (result < -1) { + /* Failure like stack overflow */ + err_setstr(RegexError, "match failure"); + return NULL; + } + re->re_regs_valid = result >= 0; + return newintobject((long)result); /* Length of the match or -1 */ +} +static object * +reg_search(re, args) + regexobject *re; + object *args; +{ + object *v; + char *buffer; + int size; + int offset; + int range; + int result; + if (args != NULL && is_stringobject(args)) { + v = args; + offset = 0; + } + else if (!getstrintarg(args, &v, &offset)) + return NULL; + buffer = getstringvalue(v); + size = getstringsize(v); + if (offset < 0 || offset > size) { + err_setstr(RegexError, "search offset out of range"); + return NULL; + } + range = size - offset + 1; + re->re_regs_valid = 0; + result = re_search(&re->re_patbuf, buffer, size, offset, range, + &re->re_regs); + if (result < -1) { + /* Failure like stack overflow */ + err_setstr(RegexError, "match failure"); + return NULL; + } + re->re_regs_valid = result >= 0; + return newintobject((long)result); /* Position of the match or -1 */ +} + +static object * +reg_regs(re, args) + regexobject *re; + object *args; +{ + if (!re->re_regs_valid) { + err_setstr(RegexError, + "regs only valid after successful match/search"); + return NULL; + } + return makeresult(&re->re_regs); +} + +static struct methodlist reg_methods[] = { + {"match", reg_match}, + {"search", reg_search}, + {"regs", reg_regs}, + {NULL, NULL} /* sentinel */ +}; + +static object * +reg_getattr(re, name) + regexobject *re; + char *name; +{ + return findmethod(reg_methods, (object *)re, name); +} + +static typeobject Regextype = { + OB_HEAD_INIT(&Typetype) + 0, /*ob_size*/ + "regex", /*tp_name*/ + sizeof(regexobject), /*tp_size*/ + 0, /*tp_itemsize*/ + /* methods */ + reg_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + reg_getattr, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ +}; + +static regexobject * +newregexobject(string) + object *string; +{ + regexobject *re; + re = NEWOBJ(regexobject, &Regextype); + if (re != NULL) { + char *error; + INCREF(string); + re->re_string = string; + re->re_patbuf.buffer = NULL; + re->re_patbuf.allocated = 0; + re->re_patbuf.fastmap = re->re_fastmap; + re->re_patbuf.translate = NULL; + re->re_regs_valid = 0; + error = re_compile_pattern(getstringvalue(string), + getstringsize(string), + &re->re_patbuf); + if (error != NULL) { + err_setstr(RegexError, error); + DECREF(re); + re = NULL; + } + } + return re; +} + +static object * +regex_compile(self, args) + object *self; + object *args; +{ + object *string; + if (!getstrarg(args, &string)) + return NULL; + return (object *)newregexobject(string); +} + +static object *cache_pat; +static object *cache_prog; + +static int +update_cache(pat) + object *pat; +{ + if (pat != cache_pat) { + XDECREF(cache_pat); + cache_pat = NULL; + XDECREF(cache_prog); + cache_prog = regex_compile((object *)NULL, pat); + if (cache_prog == NULL) + return -1; + cache_pat = pat; + INCREF(cache_pat); + } + return 0; +} + +static object * +regex_match(self, args) + object *self; + object *args; +{ + object *pat, *string; + if (!getstrstrarg(args, &pat, &string)) + return NULL; + if (update_cache(pat) < 0) + return NULL; + return reg_match((regexobject *)cache_prog, string); +} + +static object * +regex_search(self, args) + object *self; + object *args; +{ + object *pat, *string; + if (!getstrstrarg(args, &pat, &string)) + return NULL; + if (update_cache(pat) < 0) + return NULL; + return reg_search((regexobject *)cache_prog, string); +} + +static object * +regex_set_syntax(self, args) + object *self, *args; +{ + int syntax; + if (!getintarg(args, &syntax)) + return NULL; + syntax = re_set_syntax(syntax); + return newintobject((long)syntax); +} + +static struct methodlist regex_global_methods[] = { + {"compile", regex_compile}, + {"match", regex_match}, + {"search", regex_search}, + {"set_syntax", regex_set_syntax}, + {NULL, NULL} /* sentinel */ +}; + +initregex() +{ + object *m, *d; + + m = initmodule("regex", regex_global_methods); + d = getmoduledict(m); + + /* Initialize regex.error exception */ + RegexError = newstringobject("regex.error"); + if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0) + fatal("can't define regex.error"); +} |