summaryrefslogtreecommitdiffstats
path: root/generic/tclRegexp.h
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclRegexp.h')
-rw-r--r--generic/tclRegexp.h274
1 files changed, 247 insertions, 27 deletions
diff --git a/generic/tclRegexp.h b/generic/tclRegexp.h
index 986316b..c545590 100644
--- a/generic/tclRegexp.h
+++ b/generic/tclRegexp.h
@@ -1,40 +1,260 @@
-/*
- * Definitions etc. for regexp(3) routines.
+/*
+ * tclRegexp.h --
+ *
+ * This file contains definitions used internally by Henry
+ * Spencer's regular expression code.
*
- * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
- * not the System V one.
+ * Copyright (c) 1998 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., and Sun Microsystems Inc., none of
+ * whom are responsible for the results. The author thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Copyright (c) 1998 by Sun Microsystems, Inc.
*
- * SCCS: @(#) tclRegexp.h 1.6 96/04/02 18:43:57
+ * See the file "license.terms" for information on usage and redistribution
+ * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+ *
+ * SCCS: @(#) tclRegexp.h 1.22 98/01/28 20:44:28
+ */
+
+#ifndef _TCLREGEXP
+#define _TCLREGEXP
+
+#ifndef _TCLINT
+#include "tclInt.h"
+#endif
+
+/*
+ * The following definitions were culled from wctype.h and wchar.h.
+ * Those two header files are now gone. Eventually we should replace all
+ * instances of, e.g., iswalnum() with TclUniCharIsAlnum() in the regexp
+ * code.
+ */
+
+#undef wint_t
+#define wint_t int
+
+#undef WEOF
+#undef WCHAR_MIN
+#undef WCHAR_MAX
+
+#define WEOF -1
+#define WCHAR_MIN 0x0000
+#define WCHAR_MAX 0xffff
+
+#undef iswalnum
+#undef iswalpha
+#undef iswdigit
+#undef iswspace
+
+#define iswalnum(x) TclUniCharIsAlnum(x)
+#define iswalpha(x) TclUniCharIsAlpha(x)
+#define iswdigit(x) TclUniCharIsDigit(x)
+#define iswspace(x) TclUniCharIsSpace(x)
+
+#undef wcslen
+#undef wcsncmp
+
+#define wcslen TclUniCharLen
+#define wcsncmp TclUniCharNcmp
+
+/*
+ * The following definitions were added by JO to make Tcl compile
+ * under SunOS, where off_t and wchar_t aren't defined; perhaps all of
+ * the code below can be collapsed into a few simple definitions?
*/
-#ifndef _REGEXP
-#define _REGEXP 1
+#ifndef __RE_REGOFF_T
+# define __RE_REGOFF_T int
+#endif
+#ifndef __RE_WCHAR_T
+# define __RE_WCHAR_T Tcl_UniChar
+#endif
-#ifndef _TCL
-#include "tcl.h"
+/*
+ * regoff_t has to be large enough to hold either off_t or ssize_t,
+ * and must be signed; it's only a guess that off_t is big enough, so we
+ * offer an override.
+ */
+#ifdef __RE_REGOFF_T
+typedef __RE_REGOFF_T regoff_t; /* offset type for result reporting */
+#else
+typedef off_t regoff_t;
#endif
/*
- * NSUBEXP must be at least 10, and no greater than 117 or the parser
- * will not work properly.
+ * We offer the option of using a non-wchar_t type in the w prototypes so
+ * that <regex.h> can be included without first including (e.g.) <wchar.h>.
+ * Note that __RE_WCHAR_T must in fact be the same type as wchar_t!
+ */
+#ifdef __RE_WCHAR_T
+typedef __RE_WCHAR_T re_wchar; /* internal name for the type */
+#else
+typedef wchar_t re_wchar;
+#endif
+
+#define REMAGIC 0xfed7
+
+/*
+ * other interface types
+ */
+
+/* the biggie, a compiled RE (or rather, a front end to same) */
+typedef struct {
+ int re_magic; /* magic number */
+ size_t re_nsub; /* number of subexpressions */
+ int re_info; /* information about RE */
+# define REG_UBACKREF 000001
+# define REG_ULOOKAHEAD 000002
+# define REG_UBOUNDS 000004
+# define REG_UBRACES 000010
+# define REG_UBSALNUM 000020
+# define REG_UPBOTCH 000040
+# define REG_UBBS 000100
+# define REG_UNONPOSIX 000200
+# define REG_UUNSPEC 000400
+# define REG_UUNPORT 001000
+# define REG_ULOCALE 002000
+# define REG_UEMPTYMATCH 004000
+ int re_csize; /* sizeof(character) */
+ VOID *re_guts; /* none of your business :-) */
+ VOID *re_fns; /* none of your business :-) */
+} regex_t;
+
+/* result reporting (may acquire more fields later) */
+typedef struct {
+ regoff_t rm_so; /* start of substring */
+ regoff_t rm_eo; /* end of substring */
+} regmatch_t;
+
+
+
+/*
+ * compilation
+ ^ int regcomp(regex_t *, const char *, int);
+ ^ int re_comp(regex_t *, const char *, size_t, int);
+ ^ #ifndef __RE_NOWIDE
+ ^ int re_wcomp(regex_t *, const re_wchar *, size_t, int);
+ ^ #endif
+ */
+
+#define REG_DUMP 004000 /* none of your business :-) */
+#define REG_FAKE 010000 /* none of your business :-) */
+#define REG_PROGRESS 020000 /* none of your business :-) */
+
+
+
+/*
+ * execution
+ ^ int regexec(regex_t *, const char *, size_t, regmatch_t [], int);
+ ^ int re_exec(regex_t *, const char *, size_t, size_t, regmatch_t [], int);
+ ^ #ifndef __RE_NOWIDE
+ ^ int re_wexec(regex_t *, const re_wchar *, size_t, size_t, regmatch_t [], int);
+ ^ #endif
+ */
+#define REG_FTRACE 0010 /* none of your business */
+#define REG_MTRACE 0020 /* none of your business */
+#define REG_SMALL 0040 /* none of your business */
+
+/*
+ * error reporting
+ * Be careful if modifying the list of error codes -- the table used by
+ * regerror() is generated automatically from this file!
+ *
+ * Note that there is no wchar_t variant of regerror at this time; what
+ * kind of character is used for error reports is independent of what kind
+ * is used in matching.
+ *
+ ^ extern size_t regerror(int, const regex_t *, char *, size_t);
+ */
+#define REG_OKAY 0 /* no errors detected */
+#define REG_NOMATCH 1 /* regexec() failed to match */
+#define REG_BADPAT 2 /* invalid regular expression */
+#define REG_ECOLLATE 3 /* invalid collating element */
+#define REG_ECTYPE 4 /* invalid character class */
+#define REG_EESCAPE 5 /* invalid escape \ sequence */
+#define REG_ESUBREG 6 /* invalid backreference number */
+#define REG_EBRACK 7 /* brackets [] not balanced */
+#define REG_EPAREN 8 /* parentheses () not balanced */
+#define REG_EBRACE 9 /* braces {} not balanced */
+#define REG_BADBR 10 /* invalid repetition count(s) */
+#define REG_ERANGE 11 /* invalid character range */
+#define REG_ESPACE 12 /* out of memory */
+#define REG_BADRPT 13 /* quantifier operand invalid */
+#define REG_EMPTY 14 /* empty regular expression */
+#define REG_ASSERT 15 /* "can't happen" -- you found a bug */
+#define REG_INVARG 16 /* invalid argument to regex routine */
+#define REG_MIXED 17 /* char RE applied to wchar_t string (etc.) */
+#define REG_BADOPT 18 /* invalid embedded option */
+#define REG_IMPOSS 19 /* can never match */
+/* two specials for debugging and testing */
+#define REG_ATOI 101 /* convert error-code name to number */
+#define REG_ITOA 102 /* convert error-code number to name */
+
+
+
+/*
+ * the prototypes, as possibly munched by fwd
+ */
+/* =====^!^===== begin forwards =====^!^===== */
+/* automatically gathered by fwd; do not hand-edit */
+/* === regex.h === */
+EXTERN int re_ucomp _ANSI_ARGS_((regex_t *, const Tcl_UniChar *,
+ size_t, int));
+EXTERN int re_uexec _ANSI_ARGS_((regex_t *, const Tcl_UniChar *,
+ size_t, size_t, regmatch_t [], int));
+EXTERN VOID regfree _ANSI_ARGS_((regex_t *));
+EXTERN size_t regerror _ANSI_ARGS_((int, const regex_t *, char *, size_t));
+/* automatically gathered by fwd; do not hand-edit */
+/* =====^!^===== end forwards =====^!^===== */
+
+/*
+ * The TclRegexp structure encapsulates a compiled regex_t,
+ * the flags that were used to compile it, and an array of pointers
+ * that are used to indicate subexpressions after a call to Tcl_RegExpExec.
*/
-#define NSUBEXP 20
+typedef struct TclRegexp {
+ int flags; /* Regexp compile flags. */
+ regex_t re; /* Compiled re, includes number of
+ * subexpressions. */
+ CONST char *string; /* Last string matched with this regexp
+ * (UTF-8), so Tcl_RegExpRange() can convert
+ * the matches from character indices to UTF-8
+ * byte offsets. */
+ regmatch_t *matches; /* Array of indices into the Tcl_UniChar
+ * representation of the last string matched
+ * with this regexp to indicate the location
+ * of subexpressions. */
+} TclRegexp;
+
+/*
+ * Functions exported from the regexp package for the test package to use.
+ */
+
+EXTERN void TclRegError _ANSI_ARGS_((Tcl_Interp *interp, char *msg,
+ int status));
+
+#endif /* _TCLREGEXP */
+
+
-typedef struct regexp {
- char *startp[NSUBEXP];
- char *endp[NSUBEXP];
- char regstart; /* Internal use only. */
- char reganch; /* Internal use only. */
- char *regmust; /* Internal use only. */
- int regmlen; /* Internal use only. */
- char program[1]; /* Unwarranted chumminess with compiler. */
-} regexp;
-EXTERN regexp *TclRegComp _ANSI_ARGS_((char *exp));
-EXTERN int TclRegExec _ANSI_ARGS_((regexp *prog, char *string, char *start));
-EXTERN void TclRegSub _ANSI_ARGS_((regexp *prog, char *source, char *dest));
-EXTERN void TclRegError _ANSI_ARGS_((char *msg));
-EXTERN char *TclGetRegError _ANSI_ARGS_((void));
-#endif /* REGEXP */