summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1991-12-30 01:42:57 (GMT)
committerGuido van Rossum <guido@python.org>1991-12-30 01:42:57 (GMT)
commit6f4c43d4fd65076e81605622e39fa85a883a1872 (patch)
treec9144fc96871d00f36dfef1ece266e33b25a3993
parent7e73fd0024f82c9ecce2e75da3af116ffa760993 (diff)
downloadcpython-6f4c43d4fd65076e81605622e39fa85a883a1872.zip
cpython-6f4c43d4fd65076e81605622e39fa85a883a1872.tar.gz
cpython-6f4c43d4fd65076e81605622e39fa85a883a1872.tar.bz2
Initial revision
-rw-r--r--Modules/regexmodule.c306
1 files changed, 306 insertions, 0 deletions
diff --git a/Modules/regexmodule.c b/Modules/regexmodule.c
new file mode 100644
index 0000000..5e82832
--- /dev/null
+++ b/Modules/regexmodule.c
@@ -0,0 +1,306 @@
+/*
+XXX support translate table
+XXX support range parameter on search
+XXX support mstop parameter on search
+*/
+
+/***********************************************************
+Copyright 1991 by Stichting Mathematisch Centrum, Amsterdam, The
+Netherlands.
+
+ All Rights Reserved
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the names of Stichting Mathematisch
+Centrum or CWI not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior permission.
+
+STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
+THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
+FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+******************************************************************/
+
+/* Regular expression objects */
+/* This uses GNU regex.c, from subdirectory regex !!! */
+
+#include "allobjects.h"
+#include "modsupport.h"
+
+#include "regex.h"
+
+static object *RegexError; /* Exception */
+
+typedef struct {
+ OB_HEAD
+ object *re_string; /* The string (for printing) */
+ struct re_pattern_buffer re_patbuf; /* The compiled expression */
+ struct re_registers re_regs; /* The registers from the last match */
+ int re_regs_valid; /* Nonzero if the registers are valid */
+ char re_fastmap[256]; /* Storage for fastmap */
+} regexobject;
+
+/* Regex object methods */
+
+static void
+reg_dealloc(re)
+ regexobject *re;
+{
+ XDECREF(re->re_string);
+ XDEL(re->re_patbuf.buffer);
+ XDEL(re->re_patbuf.translate);
+ DEL(re);
+}
+
+static object *
+makeresult(regs)
+ struct re_registers *regs;
+{
+ object *v = newtupleobject(RE_NREGS);
+ if (v != NULL) {
+ int i;
+ for (i = 0; i < RE_NREGS; i++) {
+ object *w, *u;
+ if ( (w = newtupleobject(2)) == NULL ||
+ (u = newintobject(regs->start[i])) == NULL ||
+ settupleitem(w, 0, u) != 0 ||
+ (u = newintobject(regs->end[i])) == NULL ||
+ settupleitem(w, 1, u) != 0) {
+ XDECREF(w);
+ DECREF(v);
+ return NULL;
+ }
+ settupleitem(v, i, w);
+ }
+ }
+ return v;
+}
+
+static object *
+reg_match(re, args)
+ regexobject *re;
+ object *args;
+{
+ object *v;
+ char *buffer;
+ int offset;
+ int result;
+ if (args != NULL && is_stringobject(args)) {
+ v = args;
+ offset = 0;
+ }
+ else if (!getstrintarg(args, &v, &offset))
+ return NULL;
+ buffer = getstringvalue(v);
+ re->re_regs_valid = 0;
+ result = re_match(&re->re_patbuf, buffer, getstringsize(v),
+ offset, &re->re_regs);
+ if (result < -1) {
+ /* Failure like stack overflow */
+ err_setstr(RegexError, "match failure");
+ return NULL;
+ }
+ re->re_regs_valid = result >= 0;
+ return newintobject((long)result); /* Length of the match or -1 */
+}
+static object *
+reg_search(re, args)
+ regexobject *re;
+ object *args;
+{
+ object *v;
+ char *buffer;
+ int size;
+ int offset;
+ int range;
+ int result;
+ if (args != NULL && is_stringobject(args)) {
+ v = args;
+ offset = 0;
+ }
+ else if (!getstrintarg(args, &v, &offset))
+ return NULL;
+ buffer = getstringvalue(v);
+ size = getstringsize(v);
+ if (offset < 0 || offset > size) {
+ err_setstr(RegexError, "search offset out of range");
+ return NULL;
+ }
+ range = size - offset + 1;
+ re->re_regs_valid = 0;
+ result = re_search(&re->re_patbuf, buffer, size, offset, range,
+ &re->re_regs);
+ if (result < -1) {
+ /* Failure like stack overflow */
+ err_setstr(RegexError, "match failure");
+ return NULL;
+ }
+ re->re_regs_valid = result >= 0;
+ return newintobject((long)result); /* Position of the match or -1 */
+}
+
+static object *
+reg_regs(re, args)
+ regexobject *re;
+ object *args;
+{
+ if (!re->re_regs_valid) {
+ err_setstr(RegexError,
+ "regs only valid after successful match/search");
+ return NULL;
+ }
+ return makeresult(&re->re_regs);
+}
+
+static struct methodlist reg_methods[] = {
+ {"match", reg_match},
+ {"search", reg_search},
+ {"regs", reg_regs},
+ {NULL, NULL} /* sentinel */
+};
+
+static object *
+reg_getattr(re, name)
+ regexobject *re;
+ char *name;
+{
+ return findmethod(reg_methods, (object *)re, name);
+}
+
+static typeobject Regextype = {
+ OB_HEAD_INIT(&Typetype)
+ 0, /*ob_size*/
+ "regex", /*tp_name*/
+ sizeof(regexobject), /*tp_size*/
+ 0, /*tp_itemsize*/
+ /* methods */
+ reg_dealloc, /*tp_dealloc*/
+ 0, /*tp_print*/
+ reg_getattr, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ 0, /*tp_compare*/
+ 0, /*tp_repr*/
+};
+
+static regexobject *
+newregexobject(string)
+ object *string;
+{
+ regexobject *re;
+ re = NEWOBJ(regexobject, &Regextype);
+ if (re != NULL) {
+ char *error;
+ INCREF(string);
+ re->re_string = string;
+ re->re_patbuf.buffer = NULL;
+ re->re_patbuf.allocated = 0;
+ re->re_patbuf.fastmap = re->re_fastmap;
+ re->re_patbuf.translate = NULL;
+ re->re_regs_valid = 0;
+ error = re_compile_pattern(getstringvalue(string),
+ getstringsize(string),
+ &re->re_patbuf);
+ if (error != NULL) {
+ err_setstr(RegexError, error);
+ DECREF(re);
+ re = NULL;
+ }
+ }
+ return re;
+}
+
+static object *
+regex_compile(self, args)
+ object *self;
+ object *args;
+{
+ object *string;
+ if (!getstrarg(args, &string))
+ return NULL;
+ return (object *)newregexobject(string);
+}
+
+static object *cache_pat;
+static object *cache_prog;
+
+static int
+update_cache(pat)
+ object *pat;
+{
+ if (pat != cache_pat) {
+ XDECREF(cache_pat);
+ cache_pat = NULL;
+ XDECREF(cache_prog);
+ cache_prog = regex_compile((object *)NULL, pat);
+ if (cache_prog == NULL)
+ return -1;
+ cache_pat = pat;
+ INCREF(cache_pat);
+ }
+ return 0;
+}
+
+static object *
+regex_match(self, args)
+ object *self;
+ object *args;
+{
+ object *pat, *string;
+ if (!getstrstrarg(args, &pat, &string))
+ return NULL;
+ if (update_cache(pat) < 0)
+ return NULL;
+ return reg_match((regexobject *)cache_prog, string);
+}
+
+static object *
+regex_search(self, args)
+ object *self;
+ object *args;
+{
+ object *pat, *string;
+ if (!getstrstrarg(args, &pat, &string))
+ return NULL;
+ if (update_cache(pat) < 0)
+ return NULL;
+ return reg_search((regexobject *)cache_prog, string);
+}
+
+static object *
+regex_set_syntax(self, args)
+ object *self, *args;
+{
+ int syntax;
+ if (!getintarg(args, &syntax))
+ return NULL;
+ syntax = re_set_syntax(syntax);
+ return newintobject((long)syntax);
+}
+
+static struct methodlist regex_global_methods[] = {
+ {"compile", regex_compile},
+ {"match", regex_match},
+ {"search", regex_search},
+ {"set_syntax", regex_set_syntax},
+ {NULL, NULL} /* sentinel */
+};
+
+initregex()
+{
+ object *m, *d;
+
+ m = initmodule("regex", regex_global_methods);
+ d = getmoduledict(m);
+
+ /* Initialize regex.error exception */
+ RegexError = newstringobject("regex.error");
+ if (RegexError == NULL || dictinsert(d, "error", RegexError) != 0)
+ fatal("can't define regex.error");
+}