From 58132c6799fe89ed5f08da4ff5d0f5631e075b8c Mon Sep 17 00:00:00 2001
From: Guido van Rossum <guido@python.org>
Date: Wed, 17 Dec 1997 00:24:13 +0000
Subject: AMK's latest; plus three null bytes that I added for purify

---
 Modules/pcre-int.h      | 293 +++++++++++++++++++++++++++++++++++++++++++++++
 Modules/pcre-internal.h | 294 ------------------------------------------------
 Modules/pcre.h          |   8 +-
 Modules/pcremodule.c    |  42 ++++---
 Modules/pypcre.c        | 267 ++++++++++++++++++++-----------------------
 5 files changed, 447 insertions(+), 457 deletions(-)
 create mode 100644 Modules/pcre-int.h
 delete mode 100644 Modules/pcre-internal.h

diff --git a/Modules/pcre-int.h b/Modules/pcre-int.h
new file mode 100644
index 0000000..2e3e1af
--- /dev/null
+++ b/Modules/pcre-int.h
@@ -0,0 +1,293 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+
+#define PCRE_VERSION       "1.02 12-Dec-1997"
+
+
+/* This is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language. See
+the file Tech.Notes for some information on the internals.
+
+Written by: Philip Hazel <ph10@cam.ac.uk>
+
+           Copyright (c) 1997 University of Cambridge
+
+-----------------------------------------------------------------------------
+Permission is granted to anyone to use this software for any purpose on any
+computer system, and to redistribute it freely, subject to the following
+restrictions:
+
+1. This software is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+2. The origin of this software must not be misrepresented, either by
+   explicit claim or by omission.
+
+3. Altered versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+-----------------------------------------------------------------------------
+*/
+
+/* This header contains definitions that are shared between the different
+modules, but which are not relevant to the outside. */
+
+
+/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
+define a macro for memmove() if USE_BCOPY is defined. */
+
+#ifdef USE_BCOPY
+#define memmove(a, b, c) bcopy(b, a, c)
+#endif
+ 
+/* Standard C headers plus the external interface definition */
+
+#include <ctype.h>
+#include <limits.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pcre.h"
+
+/* Private options flags start at the most significant end of the two bytes.
+The public options defined in pcre.h start at the least significant end. Make
+sure they don't overlap! */
+
+#define PCRE_FIRSTSET           0x8000  /* first_char is set */
+#define PCRE_STARTLINE          0x4000  /* start after \n for multiline */
+#define PCRE_COMPILED_CASELESS  0x2000  /* like it says */
+
+/* Options for the "extra" block produced by pcre_study(). */
+
+#define PCRE_STUDY_CASELESS 0x01     /* study was caseless */
+#define PCRE_STUDY_MAPPED   0x02     /* a map of starting chars exists */
+
+/* Masks for identifying the public options: all permitted at compile time,
+only some permitted at run or study time. */
+
+#ifdef FOR_PYTHON
+#define PUBLIC_OPTIONS \
+  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
+   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_LOCALE)
+#else
+#define PUBLIC_OPTIONS \
+  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
+   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA)
+#endif
+#define PUBLIC_EXEC_OPTIONS \
+  (PCRE_CASELESS|PCRE_ANCHORED|PCRE_MULTILINE|PCRE_NOTBOL|PCRE_NOTEOL| \
+   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY)
+
+#define PUBLIC_STUDY_OPTIONS (PCRE_CASELESS)
+
+/* Magic number to provide a small check against being handed junk. */
+
+#define MAGIC_NUMBER  0x50435245   /* 'PCRE' */
+
+/* Miscellaneous definitions */
+
+typedef int BOOL;
+
+#define FALSE   0
+#define TRUE    1
+
+/* These are escaped items that aren't just an encoding of a particular data
+value such as \n. They must have non-zero values, as check_escape() returns
+their negation. Also, they must appear in the same order as in the opcode
+definitions below, up to ESC_Z. The final one must be ESC_REF as subsequent
+values are used for \1, \2, \3, etc. There is a test in the code for an escape
+greater than ESC_b and less than ESC_X to detect the types that may be
+repeated. If any new escapes are put in-between that don't consume a character,
+that code will have to change. */
+
+enum { ESC_A = 1, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w,
+
+                    /* These are not Perl escapes, so can't appear in the */
+       ESC_X,       /* simple table-lookup because they must be conditional */
+                    /* on PCRE_EXTRA. */
+       ESC_Z,
+       ESC_REF };
+
+/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
+that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
+OP_EOD must correspond in order to the list of escapes immediately above. */
+
+enum {
+  OP_END,            /* End of pattern */
+
+  /* Values corresponding to backslashed metacharacters */
+
+  OP_SOD,            /* Start of data: \A */
+  OP_NOT_WORD_BOUNDARY,  /* \B */
+  OP_WORD_BOUNDARY,      /* \b */
+  OP_NOT_DIGIT,          /* \D */
+  OP_DIGIT,              /* \d */
+  OP_NOT_WHITESPACE,     /* \S */
+  OP_WHITESPACE,         /* \s */
+  OP_NOT_WORDCHAR,       /* \W */
+  OP_WORDCHAR,           /* \w */
+  OP_CUT,            /* The analogue of Prolog's "cut" operation (extension) */
+  OP_EOD,            /* End of data: \Z. */
+
+  OP_NOT_WORD_BOUNDARY_L,  /* localized \B */
+  OP_WORD_BOUNDARY_L,      /* localized \b */
+  OP_NOT_WORDCHAR_L,       /* localized \W */
+  OP_WORDCHAR_L,           /* localized \w */
+
+  OP_CIRC,           /* Start of line - varies with multiline switch */
+  OP_DOLL,           /* End of line - varies with multiline switch */
+  OP_ANY,            /* Match any character */
+  OP_CHARS,          /* Match string of characters */
+  OP_NOT,            /* Match anything but the following char */
+
+  OP_STAR,           /* The maximizing and minimizing versions of */
+  OP_MINSTAR,        /* all these opcodes must come in pairs, with */
+  OP_PLUS,           /* the minimizing one second. */
+  OP_MINPLUS,        /* This first set applies to single characters */
+  OP_QUERY,
+  OP_MINQUERY,
+  OP_UPTO,           /* From 0 to n matches */
+  OP_MINUPTO,
+  OP_EXACT,          /* Exactly n matches */
+
+  OP_NOTSTAR,        /* The maximizing and minimizing versions of */
+  OP_NOTMINSTAR,     /* all these opcodes must come in pairs, with */
+  OP_NOTPLUS,        /* the minimizing one second. */
+  OP_NOTMINPLUS,     /* This first set applies to "not" single characters */
+  OP_NOTQUERY,
+  OP_NOTMINQUERY,
+  OP_NOTUPTO,        /* From 0 to n matches */
+  OP_NOTMINUPTO,
+  OP_NOTEXACT,       /* Exactly n matches */
+
+  OP_TYPESTAR,       /* The maximizing and minimizing versions of */
+  OP_TYPEMINSTAR,    /* all these opcodes must come in pairs, with */
+  OP_TYPEPLUS,       /* the minimizing one second. These codes must */
+  OP_TYPEMINPLUS,    /* be in exactly the same order as those above. */
+  OP_TYPEQUERY,      /* This set applies to character types such as \d */
+  OP_TYPEMINQUERY,
+  OP_TYPEUPTO,       /* From 0 to n matches */
+  OP_TYPEMINUPTO,
+  OP_TYPEEXACT,      /* Exactly n matches */
+
+  OP_CRSTAR,         /* The maximizing and minimizing versions of */
+  OP_CRMINSTAR,      /* all these opcodes must come in pairs, with */
+  OP_CRPLUS,         /* the minimizing one second. These codes must */
+  OP_CRMINPLUS,      /* be in exactly the same order as those above. */
+  OP_CRQUERY,        /* These are for character classes and back refs */
+  OP_CRMINQUERY,
+  OP_CRRANGE,        /* These are different to the three seta above. */
+  OP_CRMINRANGE,
+
+  OP_CLASS,          /* Match a character class */
+  OP_CLASS_L,        /* Match a character class */
+  OP_REF,            /* Match a back reference */
+
+  OP_ALT,            /* Start of alternation */
+  OP_KET,            /* End of group that doesn't have an unbounded repeat */
+  OP_KETRMAX,        /* These two must remain together and in this */
+  OP_KETRMIN,        /* order. They are for groups the repeat for ever. */
+
+  OP_ASSERT,
+  OP_ASSERT_NOT,
+  OP_ONCE,           /* Once matched, don't back up into the subpattern */
+
+  OP_BRAZERO,        /* These two must remain together and in this */
+  OP_BRAMINZERO,     /* order. */
+
+  OP_BRA             /* This and greater values are used for brackets that
+                        extract substrings. */
+};
+
+/* The highest extraction number. This is limited by the number of opcodes
+left after OP_BRA, i.e. 255 - OP_BRA. We actually set it somewhat lower. */
+
+#define EXTRACT_MAX  99
+
+/* The texts of compile-time error messages are defined as macros here so that
+they can be accessed by the POSIX wrapper and converted into error codes.  Yes,
+I could have used error codes in the first place, but didn't feel like changing
+just to accommodate the POSIX wrapper. */
+
+#define ERR1  "\\ at end of pattern"
+#define ERR2  "\\c at end of pattern"
+#define ERR3  "unrecognized character follows \\"
+#define ERR4  "numbers out of order in {} quantifier"
+#define ERR5  "number too big in {} quantifier"
+#define ERR6  "missing terminating ] for character class"
+#define ERR7  "invalid escape sequence in character class"
+#define ERR8  "range out of order in character class"
+#define ERR9  "nothing to repeat"
+#define ERR10 "operand of unlimited repeat could match the empty string"
+#define ERR11 "internal error: unexpected repeat"
+#define ERR12 "unrecognized character after (?"
+#define ERR13 "too many capturing parenthesized sub-patterns"
+#define ERR14 "missing )"
+#define ERR15 "back reference to non-existent subpattern"
+#define ERR16 "erroffset passed as NULL"
+#define ERR17 "unknown option bit(s) set"
+#define ERR18 "missing ) after comment"
+#define ERR19 "too many sets of parentheses"
+#define ERR20 "regular expression too large"
+#define ERR21 "failed to get memory"
+#define ERR22 "unmatched brackets"
+#define ERR23 "internal error: code overflow"
+
+/* All character handling must be done as unsigned characters. Otherwise there
+are problems with top-bit-set characters and functions such as isspace().
+However, we leave the interface to the outside world as char *, because that
+should make things easier for callers. We define a short type for unsigned char
+to save lots of typing. I tried "uchar", but it causes problems on Digital
+Unix, where it is defined in sys/types, so use "uschar" instead. */
+
+typedef unsigned char uschar;
+
+/* The real format of the start of the pcre block; the actual code vector
+runs on as long as necessary after the end. */
+
+typedef struct real_pcre {
+  unsigned int  magic_number;
+  unsigned short int options;
+  unsigned char top_bracket;
+  unsigned char top_backref;
+  unsigned char first_char;
+  unsigned char code[1];
+} real_pcre;
+
+/* The real format of the extra block returned by pcre_study(). */
+
+typedef struct real_pcre_extra {
+  unsigned char options;
+  unsigned char start_bits[32];
+} real_pcre_extra;
+
+/* Global tables from chartables.c */
+
+extern uschar pcre_lcc[];
+extern uschar pcre_fcc[];
+extern uschar pcre_cbits[];
+extern uschar pcre_ctypes[];
+
+/* Bit definitions for entries in pcre_ctypes[]. */
+
+#define ctype_space   0x01
+#define ctype_letter  0x02
+#define ctype_digit   0x04
+#define ctype_xdigit  0x08
+#define ctype_word    0x10   /* alphameric or '_' */
+#define ctype_odigit  0x20   /* octal digit */
+#define ctype_meta    0x80   /* regexp meta char or zero (end pattern) */
+
+/* Offsets for the bitmap tables */
+
+#define cbit_digit    0
+#define cbit_letter  32
+#define cbit_word    64
+#define cbit_space   96
+#define cbit_length 128      /* Length of the cbits table */
+
+/* End of internal.h */
diff --git a/Modules/pcre-internal.h b/Modules/pcre-internal.h
deleted file mode 100644
index 735c02d..0000000
--- a/Modules/pcre-internal.h
+++ /dev/null
@@ -1,294 +0,0 @@
-/*************************************************
-*      Perl-Compatible Regular Expressions       *
-*************************************************/
-
-
-#define PCRE_VERSION       "1.01 19-Nov-1997"
-
-
-/* This is a library of functions to support regular expressions whose syntax
-and semantics are as close as possible to those of the Perl 5 language. See
-the file Tech.Notes for some information on the internals.
-
-Written by: Philip Hazel <ph10@cam.ac.uk>
-
-           Copyright (c) 1997 University of Cambridge
-
------------------------------------------------------------------------------
-Permission is granted to anyone to use this software for any purpose on any
-computer system, and to redistribute it freely, subject to the following
-restrictions:
-
-1. This software is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
-2. The origin of this software must not be misrepresented, either by
-   explicit claim or by omission.
-
-3. Altered versions must be plainly marked as such, and must not be
-   misrepresented as being the original software.
------------------------------------------------------------------------------
-*/
-
-/* This header contains definitions that are shared between the different
-modules, but which are not relevant to the outside. */
-
-
-/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
-define a macro for memmove() if USE_BCOPY is defined. */
-
-#ifdef USE_BCOPY
-#define memmove(a, b, c) bcopy(b, a, c)
-#endif
- 
-/* Standard C headers plus the external interface definition */
-
-#include <ctype.h>
-#include <limits.h>
-#include <setjmp.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "pcre.h"
-
-/* Private options flags start at the most significant end of the two bytes.
-The public options defined in pcre.h start at the least significant end. Make
-sure they don't overlap! */
-
-#define PCRE_FIRSTSET           0x8000  /* first_char is set */
-#define PCRE_STARTLINE          0x4000  /* start after \n for multiline */
-#define PCRE_COMPILED_CASELESS  0x2000  /* like it says */
-
-/* Options for the "extra" block produced by pcre_study(). */
-
-#define PCRE_STUDY_CASELESS 0x01     /* study was caseless */
-#define PCRE_STUDY_MAPPED   0x02     /* a map of starting chars exists */
-
-/* Masks for identifying the public options: all permitted at compile time,
-only some permitted at run or study time. */
-
-#ifdef FOR_PYTHON
-#define PUBLIC_OPTIONS \
-  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
-   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_LOCALE)
-#else
-#define PUBLIC_OPTIONS \
-  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
-   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA)
-#endif
-#define PUBLIC_EXEC_OPTIONS \
-  (PCRE_CASELESS|PCRE_ANCHORED|PCRE_MULTILINE|PCRE_NOTBOL|PCRE_NOTEOL| \
-   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY)
-
-#define PUBLIC_STUDY_OPTIONS (PCRE_CASELESS)
-
-/* Magic number to provide a small check against being handed junk. */
-
-#define MAGIC_NUMBER  0x50435245   /* 'PCRE' */
-
-/* Miscellaneous definitions */
-
-typedef int BOOL;
-
-#define FALSE   0
-#define TRUE    1
-
-/* These are escaped items that aren't just an encoding of a particular data
-value such as \n. They must have non-zero values, as check_escape() returns
-their negation. Also, they must appear in the same order as in the opcode
-definitions below, up to ESC_Z. The final one must be ESC_REF as subsequent
-values are used for \1, \2, \3, etc. There is a test in the code for an escape
-greater than ESC_b and less than ESC_X to detect the types that may be
-repeated. If any new escapes are put in-between that don't consume a character,
-that code will have to change. */
-
-enum { ESC_A = 1, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w,
-
-                    /* These are not Perl escapes, so can't appear in the */
-       ESC_X,       /* simple table-lookup because they must be conditional */
-                    /* on PCRE_EXTRA. */
-       ESC_Z,
-       ESC_REF };
-
-/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
-that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
-OP_EOL must correspond in order to the list of escapes immediately above. */
-
-enum {
-  OP_END,            /* End of pattern */
-
-  /* Values corresponding to backslashed metacharacters */
-
-  OP_SOD,            /* Start of data: \A */
-  OP_NOT_WORD_BOUNDARY,  /* \B */
-  OP_WORD_BOUNDARY,      /* \b */
-  OP_NOT_DIGIT,          /* \D */
-  OP_DIGIT,              /* \d */
-  OP_NOT_WHITESPACE,     /* \S */
-  OP_WHITESPACE,         /* \s */
-  OP_NOT_WORDCHAR,       /* \W */
-  OP_WORDCHAR,           /* \w */
-  OP_CUT,            /* The analogue of Prolog's "cut" operation (extension) */
-  OP_EOD,            /* End of data: or \Z. This must always be the last
-                        of the backslashed meta values. */
-
-  OP_NOT_WORD_BOUNDARY_L,  /* localized \B */
-  OP_WORD_BOUNDARY_L,      /* localized \b */
-  OP_NOT_WORDCHAR_L,       /* localized \W */
-  OP_WORDCHAR_L,           /* localized \w */
-
-  OP_CIRC,           /* Start of line - varies with multiline switch */
-  OP_DOLL,           /* End of line - varies with multiline switch */
-  OP_ANY,            /* Match any character */
-  OP_CHARS,          /* Match string of characters */
-  OP_NOT,            /* Match anything but the following char */
-
-  OP_STAR,           /* The maximizing and minimizing versions of */
-  OP_MINSTAR,        /* all these opcodes must come in pairs, with */
-  OP_PLUS,           /* the minimizing one second. */
-  OP_MINPLUS,        /* This first set applies to single characters */
-  OP_QUERY,
-  OP_MINQUERY,
-  OP_UPTO,           /* From 0 to n matches */
-  OP_MINUPTO,
-  OP_EXACT,          /* Exactly n matches */
-
-  OP_NOTSTAR,        /* The maximizing and minimizing versions of */
-  OP_NOTMINSTAR,     /* all these opcodes must come in pairs, with */
-  OP_NOTPLUS,        /* the minimizing one second. */
-  OP_NOTMINPLUS,     /* This first set applies to "not" single characters */
-  OP_NOTQUERY,
-  OP_NOTMINQUERY,
-  OP_NOTUPTO,        /* From 0 to n matches */
-  OP_NOTMINUPTO,
-  OP_NOTEXACT,       /* Exactly n matches */
-
-  OP_TYPESTAR,       /* The maximizing and minimizing versions of */
-  OP_TYPEMINSTAR,    /* all these opcodes must come in pairs, with */
-  OP_TYPEPLUS,       /* the minimizing one second. These codes must */
-  OP_TYPEMINPLUS,    /* be in exactly the same order as those above. */
-  OP_TYPEQUERY,      /* This set applies to character types such as \d */
-  OP_TYPEMINQUERY,
-  OP_TYPEUPTO,       /* From 0 to n matches */
-  OP_TYPEMINUPTO,
-  OP_TYPEEXACT,      /* Exactly n matches */
-
-  OP_CRSTAR,         /* The maximizing and minimizing versions of */
-  OP_CRMINSTAR,      /* all these opcodes must come in pairs, with */
-  OP_CRPLUS,         /* the minimizing one second. These codes must */
-  OP_CRMINPLUS,      /* be in exactly the same order as those above. */
-  OP_CRQUERY,        /* These are for character classes and back refs */
-  OP_CRMINQUERY,
-  OP_CRRANGE,        /* These are different to the three seta above. */
-  OP_CRMINRANGE,
-
-  OP_CLASS,          /* Match a character class */
-  OP_CLASS_L,        /* Match a character class */
-  OP_REF,            /* Match a back reference */
-
-  OP_ALT,            /* Start of alternation */
-  OP_KET,            /* End of group that doesn't have an unbounded repeat */
-  OP_KETRMAX,        /* These two must remain together and in this */
-  OP_KETRMIN,        /* order. They are for groups the repeat for ever. */
-
-  OP_ASSERT,
-  OP_ASSERT_NOT,
-  OP_ONCE,           /* Once matched, don't back up into the subpattern */
-
-  OP_BRAZERO,        /* These two must remain together and in this */
-  OP_BRAMINZERO,     /* order. */
-
-  OP_BRA             /* This and greater values are used for brackets that
-                        extract substrings. */
-};
-
-/* The highest extraction number. This is limited by the number of opcodes
-left after OP_BRA, i.e. 255 - OP_BRA. We actually set it somewhat lower. */
-
-#define EXTRACT_MAX  99
-
-/* The texts of compile-time error messages are defined as macros here so that
-they can be accessed by the POSIX wrapper and converted into error codes.  Yes,
-I could have used error codes in the first place, but didn't feel like changing
-just to accommodate the POSIX wrapper. */
-
-#define ERR1  "\\ at end of pattern"
-#define ERR2  "\\c at end of pattern"
-#define ERR3  "unrecognized character follows \\"
-#define ERR4  "numbers out of order in {} quantifier"
-#define ERR5  "number too big in {} quantifier"
-#define ERR6  "missing terminating ] for character class"
-#define ERR7  "invalid escape sequence in character class"
-#define ERR8  "range out of order in character class"
-#define ERR9  "nothing to repeat"
-#define ERR10 "operand of unlimited repeat could match the empty string"
-#define ERR11 "internal error: unexpected repeat"
-#define ERR12 "unrecognized character after (?"
-#define ERR13 "too many capturing parenthesized sub-patterns"
-#define ERR14 "missing )"
-#define ERR15 "back reference to non-existent subpattern"
-#define ERR16 "erroffset passed as NULL"
-#define ERR17 "unknown option bit(s) set"
-#define ERR18 "missing ) after comment"
-#define ERR19 "too many sets of parentheses"
-#define ERR20 "regular expression too large"
-#define ERR21 "failed to get memory"
-#define ERR22 "unmatched brackets"
-#define ERR23 "internal error: code overflow"
-
-/* All character handling must be done as unsigned characters. Otherwise there
-are problems with top-bit-set characters and functions such as isspace().
-However, we leave the interface to the outside world as char *, because that
-should make things easier for callers. We define a short type for unsigned char
-to save lots of typing. I tried "uchar", but it causes problems on Digital
-Unix, where it is defined in sys/types, so use "uschar" instead. */
-
-typedef unsigned char uschar;
-
-/* The real format of the start of the pcre block; the actual code vector
-runs on as long as necessary after the end. */
-
-typedef struct real_pcre {
-  unsigned int  magic_number;
-  unsigned short int options;
-  unsigned char top_bracket;
-  unsigned char top_backref;
-  unsigned char first_char;
-  unsigned char code[1];
-} real_pcre;
-
-/* The real format of the extra block returned by pcre_study(). */
-
-typedef struct real_pcre_extra {
-  unsigned char options;
-  unsigned char start_bits[32];
-} real_pcre_extra;
-
-/* Global tables from chartables.c */
-
-extern uschar pcre_lcc[];
-extern uschar pcre_fcc[];
-extern uschar pcre_cbits[];
-extern uschar pcre_ctypes[];
-
-/* Bit definitions for entries in pcre_ctypes[]. */
-
-#define ctype_space   0x01
-#define ctype_letter  0x02
-#define ctype_digit   0x04
-#define ctype_xdigit  0x08
-#define ctype_word    0x10   /* alphameric or '_' */
-#define ctype_odigit  0x20   /* octal digit */
-#define ctype_meta    0x80   /* regexp meta char or zero (end pattern) */
-
-/* Offsets for the bitmap tables */
-
-#define cbit_digit    0
-#define cbit_letter  32
-#define cbit_word    64
-#define cbit_space   96
-#define cbit_length 128      /* Length of the cbits table */
-
-/* End of internal.h */
diff --git a/Modules/pcre.h b/Modules/pcre.h
index 3a215d5..4ba9d9c 100644
--- a/Modules/pcre.h
+++ b/Modules/pcre.h
@@ -55,14 +55,14 @@ extern void  (*pcre_free)(void *);
 /* Functions */
 
 #ifdef FOR_PYTHON
-extern pcre *pcre_compile(const char *, int, char **, int *, PyObject *);
+extern pcre *pcre_compile(const char *, int, const char **, int *, PyObject *);
 #else
-extern pcre *pcre_compile(const char *, int, char **, int *);
+extern pcre *pcre_compile(const char *, int, const char **, int *);
 #endif
 extern int pcre_exec(const pcre *, const pcre_extra *, const char *,
   int, int, int *, int);
 extern int pcre_info(const pcre *, int *, int *);
-extern pcre_extra *pcre_study(const pcre *, int, char **);
-extern char *pcre_version(void);
+extern pcre_extra *pcre_study(const pcre *, int, const char **);
+extern const char *pcre_version(void);
 
 #endif /* End of pcre.h */
diff --git a/Modules/pcremodule.c b/Modules/pcremodule.c
index 8701ba5..2587fa0 100644
--- a/Modules/pcremodule.c
+++ b/Modules/pcremodule.c
@@ -1,5 +1,5 @@
 /***********************************************************
-Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
+Copyright 1997 by Stichting Mathematisch Centrum, Amsterdam,
 The Netherlands.
 
                         All Rights Reserved
@@ -33,6 +33,7 @@ PERFORMANCE OF THIS SOFTWARE.
 
 #include "Python.h"
 
+#include <assert.h>
 #ifndef Py_eval_input
 /* For Python 1.4, graminit.h has to be explicitly included */
 #include "graminit.h"
@@ -44,7 +45,7 @@ PERFORMANCE OF THIS SOFTWARE.
 #endif
 
 #include "pcre.h"
-#include "pcre-internal.h"
+#include "pcre-int.h"
 
 static PyObject *ErrorObject;
 
@@ -127,7 +128,9 @@ PyPcre_exec(self, args)
 	if (count==PCRE_ERROR_NOMATCH) {Py_INCREF(Py_None); return Py_None;}
 	if (count<0)
 	{
-		PyErr_SetObject(ErrorObject, Py_BuildValue("si", "Regex error", count));
+		PyObject *errval = Py_BuildValue("si", "Regex execution error", count);
+		PyErr_SetObject(ErrorObject, errval);
+		Py_XDECREF(errval);
 		return NULL;
 	}
 	
@@ -191,7 +194,7 @@ PyPcre_compile(self, args)
 	PcreObject *rv;
 	PyObject *dictionary;
 	char *pattern, *newpattern;
-	char *error;
+	const char *error;
 	int num_zeros, i, j;
 	
 	int patternlen, options, erroroffset;
@@ -203,12 +206,13 @@ PyPcre_compile(self, args)
 		return NULL;
 
 	/* PCRE doesn't like having null bytes in its pattern, so we have to replace 
-	   any zeros in the string with the characters '\0'. */
-	num_zeros=1;
+	   any zeros in the string with the characters '\000'. This increases the size
+	   of the string by 3*num_zeros, plus 1 byte for the terminating \0.  */
+	num_zeros=1;      /* Start at 1; this will give 3 extra bytes of leeway */
 	for(i=0; i<patternlen; i++) {
 		if (pattern[i]==0) num_zeros++;
 	}
-	newpattern=malloc(patternlen+num_zeros);
+	newpattern=malloc(patternlen + num_zeros*3 + 4); 
 	if (newpattern==NULL) {
 		PyErr_SetString(PyExc_MemoryError, "can't allocate memory for new pattern");
 		return NULL;
@@ -217,10 +221,16 @@ PyPcre_compile(self, args)
 	{
 		if (pattern[i]!=0) newpattern[j]=pattern[i];
 		else {
-			newpattern[j++]='\\';
-			newpattern[j]  ='0';
+			newpattern[j++] ='\\';
+			newpattern[j++] = '0';
+			newpattern[j++] = '0';
+			newpattern[j  ] = '0';
 		}
 	}
+	/* Keep purify happy; for pcre, one null byte is enough! */
+	newpattern[j++]='\0';
+	newpattern[j++]='\0';
+	newpattern[j++]='\0';
 	newpattern[j]='\0';
 
 	rv->regex = pcre_compile((char*)newpattern, options, 
@@ -231,21 +241,27 @@ PyPcre_compile(self, args)
 		PyMem_DEL(rv);
 		if (!PyErr_Occurred())
 		{
-			PyErr_SetObject(ErrorObject, Py_BuildValue("si", error, erroroffset));
+			PyObject *errval = Py_BuildValue("si", error, erroroffset);
+			PyErr_SetObject(ErrorObject, errval);
+			Py_XDECREF(errval);
 		}
 		return NULL;
 	}
 	rv->regex_extra=pcre_study(rv->regex, 0, &error);
 	if (rv->regex_extra==NULL && error!=NULL) 
 	{
+		PyObject *errval = Py_BuildValue("si", error, 0);
 		PyMem_DEL(rv);
-		PyErr_SetObject(ErrorObject, Py_BuildValue("si", error, 0));
+		PyErr_SetObject(ErrorObject, errval);
+		Py_XDECREF(errval);
 		return NULL;
 	}
         rv->num_groups = pcre_info(rv->regex, NULL, NULL);
 	if (rv->num_groups<0) 
 	{
-		PyErr_SetObject(ErrorObject, Py_BuildValue("si", "Regex error", rv->num_groups));
+		PyObject *errval = Py_BuildValue("si", error, rv->num_groups);
+		PyErr_SetObject(ErrorObject, errval);
+		Py_XDECREF(errval);
 		PyMem_DEL(rv);
 		return NULL;
 	}
@@ -526,7 +542,7 @@ PyPcre_expand(self, args)
 				Py_DECREF(r); Py_DECREF(tuple);
 				if (result==NULL)
 				{
-					/* The group() method trigged an exception of some sort */
+					/* The group() method triggered an exception of some sort */
 					Py_DECREF(results);
 					Py_DECREF(value);
 					return NULL;
diff --git a/Modules/pypcre.c b/Modules/pypcre.c
index 699932f..55908e7 100644
--- a/Modules/pypcre.c
+++ b/Modules/pypcre.c
@@ -15,10 +15,9 @@ file by hand, or submit patches to it.
 The Python-specific PCRE distribution can be retrieved from
        http://starship.skyport.net/crew/amk/regex/
 
-The unmodified original PCRE distribution doesn't have a fixed URL
-yet; write Philip Hazel <ph10@cam.ac.uk> for the latest version.
-
-Written by:  Philip Hazel <ph10@cam.ac.uk>
+The unmodified original PCRE distribution is available at
+ftp://ftp.cus.cam.ac.uk/pub/software/programs/pcre/, and is originally
+written by: Philip Hazel <ph10@cam.ac.uk>
 
 Extensively modified by the Python String-SIG: <string-sig@python.org>
 Send bug reports to:                           <string-sig@python.org>
@@ -46,7 +45,7 @@ restrictions:
 
 
 #define FOR_PYTHON
-#include "pcre-internal.h"
+#include "pcre-int.h"
 #include "Python.h"
 #include "mymalloc.h"
 #include <ctype.h>
@@ -254,13 +253,13 @@ Returns:       TRUE if table built, FALSE otherwise
 */
 
 static BOOL
-set_start_bits(uschar *code, uschar *start_bits)
+set_start_bits(const uschar *code, uschar *start_bits)
 {
 register int c;
 
 do
   {
-  uschar *tcode = code + 3;
+  const uschar *tcode = code + 3;
   BOOL try_next = TRUE;
 
   while (try_next)
@@ -466,12 +465,12 @@ Returns:    pointer to a pcre_extra block,
 */
 
 pcre_extra *
-pcre_study(const pcre *external_re, int options, char **errorptr)
+pcre_study(const pcre *external_re, int options, const char **errorptr)
 {
 BOOL caseless;
 uschar start_bits[32];
 real_pcre_extra *extra;
-real_pcre *re = (real_pcre *)external_re;
+const real_pcre *re = (const real_pcre *)external_re;
 
 *errorptr = NULL;
 
@@ -592,7 +591,8 @@ static char rep_max[] = { 0, 0, 0, 0, 1, 1 };
 /* Text forms of OP_ values and things, for debugging */
 
 #ifdef DEBUG
-static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",
+static const char *OP_names[] = { 
+  "End", "\\A", "\\B", "\\b", "\\D", "\\d",
   "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", 
   "localized \\B", "localized \\b", "localized \\W", "localized \\w",
   "^", "$", "Any", "chars",
@@ -627,8 +627,8 @@ static short int escapes[] = {
 
 /* Definition to allow mutual recursion */
 
-static BOOL compile_regex(int, int *, uschar **, uschar **, 
-			   char **, PyObject *);
+static BOOL compile_regex(int, int *, uschar **, const uschar **, 
+			   const char **, PyObject *);
 
 /* Structure for passing "static" information around between the functions
 doing the matching, so that they are thread-safe. */
@@ -645,10 +645,10 @@ typedef struct match_data {
   BOOL   noteol;                /* NOTEOL flag */
   BOOL   dotall;                /* Dot matches any char */
   BOOL   endonly;               /* Dollar not before final \n */
-  uschar *start_subject;        /* Start of the subject string */
-  uschar *end_subject;          /* End of the subject string */
+  const uschar *start_subject;  /* Start of the subject string */
+  const uschar *end_subject;    /* End of the subject string */
   jmp_buf fail_env;             /* Environment for longjump() break out */
-  uschar *end_match_ptr;        /* Subject position at end match */
+  const uschar *end_match_ptr;  /* Subject position at end match */
   int     end_offset_top;       /* Highwater mark at end of match */
   jmp_buf error_env;          /* For longjmp() if an error occurs deep inside a 
 				   matching operation */
@@ -656,7 +656,7 @@ typedef struct match_data {
   int    point;                 /* Point to add next item pushed onto stacks */
   /* Pointers to the 6 stacks */
   int *off_num, *offset_top, *r1, *r2; 
-  uschar **eptr, **ecode; 
+  const uschar **eptr, **ecode; 
 } match_data;
 
 
@@ -680,7 +680,7 @@ void  (*pcre_free)(void *) = free;
 *          Return version string                 *
 *************************************************/
 
-char *
+const char *
 pcre_version(void)
 {
 return PCRE_VERSION;
@@ -710,7 +710,7 @@ Returns:        number of identifying extraction brackets
 int
 pcre_info(const pcre *external_re, int *optptr, int *first_char)
 {
-real_pcre *re = (real_pcre *)external_re;
+const real_pcre *re = (real_pcre *)external_re;
 if (re == NULL) return PCRE_ERROR_NULL;
 if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
 if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
@@ -906,9 +906,9 @@ Arguments:
 */
   
 static int 
-get_group_id(uschar *ptr, char finalchar, char **errorptr)
+get_group_id(const uschar *ptr, char finalchar, const char **errorptr)
 {
-  uschar *start = ptr;
+  const uschar *start = ptr;
 
   /* If the first character is not in \w, or is in \w but is a digit,
      report an error */
@@ -960,10 +960,10 @@ Returns:     zero or positive => a data character
 */
 
 static int
-check_escape(uschar **ptrptr, char **errorptr, int bracount, int options,
-  BOOL isclass)
+check_escape(const uschar **ptrptr, const char **errorptr, int bracount, 
+	     int options, BOOL isclass)
 {
-uschar *ptr = *ptrptr;
+const uschar *ptr = *ptrptr;
 int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
 int i;
 
@@ -1092,7 +1092,7 @@ Returns:    TRUE or FALSE
 */
 
 static BOOL
-is_counted_repeat(uschar *p)
+is_counted_repeat(const uschar *p)
 {
 if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
 while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
@@ -1127,8 +1127,8 @@ Returns:     pointer to '}' on success;
              current ptr on error, with errorptr set
 */
 
-static uschar *
-read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr)
+static const uschar *
+read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
 {
 int min = 0;
 int max = -1;
@@ -1183,16 +1183,16 @@ Returns:     TRUE on success
 
 static BOOL
 compile_branch(int options, int *brackets, uschar **codeptr,
-	       uschar **ptrptr, char **errorptr, PyObject *dictionary)
+	       const uschar **ptrptr, const char **errorptr, PyObject *dictionary)
 {
 int repeat_type, op_type;
 int repeat_min, repeat_max;
 int bravalue, length;
 register int c;
 register uschar *code = *codeptr;
-uschar *ptr = *ptrptr;
+const uschar *ptr = *ptrptr;
+const uschar *oldptr;
 uschar *previous = NULL;
-uschar *oldptr;
 uschar class[32];
 uschar *class_flag;  /* Pointer to the single-byte flag for OP_CLASS_L */
 
@@ -1299,7 +1299,7 @@ for (;; ptr++)
       /* Backslash may introduce a single character, or it may introduce one
       of the specials, which just set a flag. Escaped items are checked for
       validity in the pre-compiling pass. The sequence \b is a special case.
-      Inside a class (and only there) it is treated as backslash. Elsewhere
+      Inside a class (and only there) it is treated as backspace. Elsewhere
       it marks a word boundary. Other escapes have preset maps ready to
       or into the one we are building. We assume they have more than one
       character in them, so set class_count bigger than one. */
@@ -1314,22 +1314,12 @@ for (;; ptr++)
           switch (-c)
             {
             case ESC_d:
-	    if (options & PCRE_LOCALE)
-	      {
-		*class_flag |= 4;
-	      }
-	    else
 	      {
 		for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];
 	      }
             continue;
 
             case ESC_D:
-	    if (options & PCRE_LOCALE)
-	      {
-		*class_flag |= 8;
-	      }
-	    else
 	      {
 		for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];
 	      }
@@ -1360,22 +1350,12 @@ for (;; ptr++)
             continue;
 
             case ESC_s:
-	    if (options & PCRE_LOCALE)
-	      {
-		*class_flag |= 32;
-	      }
-	    else
 	      {
 		for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];
 	      }
             continue;
 
             case ESC_S:
-	    if (options & PCRE_LOCALE)
-	      {
-		*class_flag |= 32;
-	      }
-	    else
 	      {
 		for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];
 	      }
@@ -1795,7 +1775,7 @@ for (;; ptr++)
 	      }
 	      string = PyString_FromStringAndSize((char*)ptr, idlen);
 	      intobj = PyInt_FromLong( brackets[0] + 1 );
-	      if (intobj == NULL || string==NULL)
+	      if (intobj == NULL || string == NULL)
 		{
 		  Py_XDECREF(string);
 		  Py_XDECREF(intobj);
@@ -1803,7 +1783,7 @@ for (;; ptr++)
 		  goto FAILED;
 		}
 	      PyDict_SetItem(dictionary, string, intobj);
-	      Py_DECREF(string); Py_DECREF(intobj);
+	      Py_DECREF(string); Py_DECREF(intobj); /* XXX DECREF commented out! */
 	      ptr += idlen+1;  /* Point to rest of expression */
 	      goto do_grouping_bracket;
 	    }
@@ -1820,7 +1800,6 @@ for (;; ptr++)
 	      }
 	      string = PyString_FromStringAndSize((char *)ptr, idlen);
 	      if (string==NULL)	{
-		  Py_XDECREF(string);
 		  *errorptr = "exception raised";
 		  goto FAILED;
 		}
@@ -1833,6 +1812,10 @@ for (;; ptr++)
 
 	      refnum = PyInt_AsLong(intobj);
 	      Py_DECREF(string); 
+	      /* The caller doesn't own the reference to the value
+		 returned from PyDict_GetItem, so intobj is not
+		 DECREF'ed. */
+
 	      *code++ = OP_REF;
 	      *code++ = refnum;
 	      /* The continue will cause the top-level for() loop to
@@ -1943,7 +1926,7 @@ for (;; ptr++)
       continue;
       }
 
-    /* Reset and fall through */
+    /* Data character: Reset and fall through */
 
     ptr = oldptr;
     c = '\\';
@@ -2035,9 +2018,9 @@ Returns:    TRUE on success
 
 static BOOL
 compile_regex(int options, int *brackets, uschar **codeptr,
-  uschar **ptrptr, char **errorptr, PyObject *dictionary)
+  const uschar **ptrptr, const char **errorptr, PyObject *dictionary)
 {
-uschar *ptr = *ptrptr;
+const uschar *ptr = *ptrptr;
 uschar *code = *codeptr;
 uschar *start_bracket = code;
 
@@ -2103,7 +2086,7 @@ Returns:   TRUE or FALSE
 */
 
 static BOOL
-is_anchored(register uschar *code, BOOL multiline)
+is_anchored(register const uschar *code, BOOL multiline)
 {
 do {
    int op = (int)code[3];
@@ -2132,7 +2115,7 @@ Returns:   TRUE or FALSE
 */
 
 static BOOL
-is_startline(uschar *code)
+is_startline(const uschar *code)
 {
 do {
    if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
@@ -2217,7 +2200,7 @@ Returns:       pointer to compiled data block, or NULL on error,
 */
 
 pcre *
-pcre_compile(const char *pattern, int options, char **errorptr, 
+pcre_compile(const char *pattern, int options, const char **errorptr, 
 	     int *erroroffset, PyObject *dictionary)
 {
 real_pcre *re;
@@ -2227,9 +2210,10 @@ int runlength;
 int c, size;
 int bracount = 0;
 int brastack[200];
-int brastackptr = 0;
 int top_backref = 0;
-uschar *code, *ptr;
+unsigned int brastackptr = 0;
+uschar *code;
+const uschar *ptr;
 
 #ifdef DEBUG
 uschar *code_base, *code_end;
@@ -2268,7 +2252,7 @@ internal flag settings. Make an attempt to correct for any counted white space
 if an "extended" flag setting appears late in the pattern. We can't be so
 clever for #-comments. */
 
-ptr = (uschar *)(pattern - 1);
+ptr = (const uschar *)(pattern - 1);
 while ((c = *(++ptr)) != 0)
   {
   int min, max;
@@ -2295,7 +2279,7 @@ while ((c = *(++ptr)) != 0)
 
     case '\\':
       {
-      uschar *save_ptr = ptr;
+      const uschar *save_ptr = ptr;
       c = check_escape(&ptr, errorptr, bracount, options, FALSE);
       if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
       if (c >= 0)
@@ -2585,7 +2569,7 @@ while ((c = *(++ptr)) != 0)
 
       if (c == '\\')
         {
-        uschar *saveptr = ptr;
+        const uschar *saveptr = ptr;
         c = check_escape(&ptr, errorptr, bracount, options, FALSE);
         if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
         if (c < 0) { ptr = saveptr; break; }
@@ -2633,7 +2617,7 @@ re->options = options;
 error, *errorptr will be set non-NULL, so we don't need to look at the result
 of the function here. */
 
-ptr = (uschar *)pattern;
+ptr = (const uschar *)pattern;
 code = re->code;
 *code = OP_BRA;
 bracount = 0;
@@ -2661,7 +2645,7 @@ if (*errorptr != NULL)
   {
   (pcre_free)(re);
   PCRE_ERROR_RETURN:
-  *erroroffset = ptr - (uschar *)pattern;
+  *erroroffset = ptr - (const uschar *)pattern;
   return NULL;
   }
 
@@ -2947,8 +2931,8 @@ switch(type)
   case OP_WHITESPACE:     return (pcre_ctypes[c] & ctype_space) != 0;
   case OP_NOT_WORDCHAR:   return (pcre_ctypes[c] & ctype_word) == 0;
   case OP_WORDCHAR:       return (pcre_ctypes[c] & ctype_word) != 0;
-  case OP_NOT_WORDCHAR_L: return (c!='_' && !isalpha(c));
-  case OP_WORDCHAR_L:     return (c=='_' || isalpha(c));
+  case OP_NOT_WORDCHAR_L: return (c!='_' && !isalnum(c));
+  case OP_WORDCHAR_L:     return (c=='_' || isalnum(c));
   }
 return FALSE;
 }
@@ -2971,9 +2955,9 @@ Returns:      TRUE if matched
 */
 
 static BOOL
-match_ref(int number, register uschar *eptr, int length, match_data *md)
+match_ref(int number, register const uschar *eptr, int length, match_data *md)
 {
-uschar *p = md->start_subject + md->offset_vector[number];
+const uschar *p = md->start_subject + md->offset_vector[number];
 
 #ifdef DEBUG
 if (eptr >= md->end_subject)
@@ -2992,7 +2976,7 @@ printf("\n");
 
 if (length > md->end_subject - p) return FALSE;
 
-/* Separate the caselesss case for speed */
+/* Separate the caseless case for speed */
 
 if (md->caseless)
   { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }
@@ -3027,8 +3011,8 @@ static int grow_stack(match_data *md)
       else {md->length = 80;}
     }
   PyMem_RESIZE(md->offset_top, int, md->length);
-  PyMem_RESIZE(md->eptr, uschar *, md->length);
-  PyMem_RESIZE(md->ecode, uschar *, md->length);
+  PyMem_RESIZE(md->eptr, const uschar *, md->length);
+  PyMem_RESIZE(md->ecode, const uschar *, md->length);
   PyMem_RESIZE(md->off_num, int, md->length);
   PyMem_RESIZE(md->r1, int, md->length);
   PyMem_RESIZE(md->r2, int, md->length);
@@ -3058,7 +3042,7 @@ Returns:       TRUE if matched
 */
 
 static BOOL
-match(register uschar *eptr, register uschar *ecode, int offset_top,
+match(register const uschar *eptr, register const uschar *ecode, int offset_top,
   match_data *md)
 {
   int save_stack_position = md->point;
@@ -3072,7 +3056,7 @@ for (;;)
   int min, max, ctype;
   register int i;
   register int c;
-  BOOL minimize;
+  BOOL minimize = FALSE;
 
   /* Opening bracket. Check the alternative branches in turn, failing if none
   match. We have to set the start offset if required and there is space
@@ -3085,7 +3069,7 @@ for (;;)
   if ((int)*ecode >= OP_BRA)
     {
     int number = (*ecode - OP_BRA) << 1;
-    int save_offset1, save_offset2;
+    int save_offset1 = 0, save_offset2 = 0;
 
 #ifdef DEBUG
     printf("start bracket %d\n", number/2);
@@ -3212,7 +3196,7 @@ for (;;)
 
     case OP_BRAZERO:
       {
-      uschar *next = ecode+1;
+      const uschar *next = ecode+1;
       if (match(eptr, next, offset_top, md)) SUCCEED;
       do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
       ecode = next + 3;
@@ -3221,7 +3205,7 @@ for (;;)
 
     case OP_BRAMINZERO:
       {
-      uschar *next = ecode+1;
+      const uschar *next = ecode+1;
       do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
       if (match(eptr, next+3, offset_top, md)) SUCCEED;
       ecode++;
@@ -3237,7 +3221,7 @@ for (;;)
     case OP_KETRMAX:
       {
       int number;
-      uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
+      const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
 
       if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)
         {
@@ -3279,11 +3263,10 @@ for (;;)
 
       if (*ecode == OP_KETRMIN)
         {
-	uschar *ptr;
+	const uschar *ptr;
 	if (match(eptr, ecode+3, offset_top, md)) goto succeed;
 	/* Handle alternation inside the BRA...KET; push the additional
-	   alternatives onto the stack 
-	   XXX this tries the alternatives backwards! */
+	   alternatives onto the stack */
 	ptr=prev;
 	do {
 	  ptr += (ptr[1]<<8)+ ptr[2];
@@ -3306,8 +3289,8 @@ for (;;)
         }
       else  /* OP_KETRMAX */
         {
-	uschar *ptr;
-	int points_pushed=0;
+	const uschar *ptr;
+	/*int points_pushed=0;*/
 
 	/* Push one failure point, that will resume matching at the code after 
 	   the KETRMAX opcode. */
@@ -3325,8 +3308,7 @@ for (;;)
 
 	md->offset_vector[number] = eptr - md->start_subject;
 	/* Handle alternation inside the BRA...KET; push each of the
-	   additional alternatives onto the stack 
-	   XXX this tries the alternatives backwards! */
+	   additional alternatives onto the stack */
 	ptr=prev;
 	do {
 	  ptr += (ptr[1]<<8)+ ptr[2];
@@ -3344,15 +3326,15 @@ for (;;)
 	      md->r2[md->point]         = 0; 
 	      md->off_num[md->point]    = 0; 
 	      md->point++;	      
-	      points_pushed++;
+	      /*points_pushed++;*/
 	    }
 	} while (*ptr==OP_ALT);
 	/* Jump to the first (or only) alternative and resume trying to match */
 	ecode=prev+3; goto match_loop;
         }
       }
-    FAIL;
-
+    break;
+    
     /* Start of subject unless notbol, or after internal newline if multiline */
 
     case OP_CIRC:
@@ -3419,9 +3401,9 @@ for (;;)
     case OP_WORD_BOUNDARY_L:
       {
 	BOOL prev_is_word = (eptr != md->start_subject) &&
-	  (isalpha(eptr[-1]) || eptr[-1]=='_');
+	  (isalnum(eptr[-1]) || eptr[-1]=='_');
 	BOOL cur_is_word = (eptr < md->end_subject) &&
-	  (isalpha(eptr[-1]) || eptr[-1]=='_');
+	  (isalnum(*eptr) || *eptr=='_');
 	if ((*ecode++ == OP_WORD_BOUNDARY_L)?
 	    cur_is_word == prev_is_word : cur_is_word != prev_is_word)
 	  FAIL;
@@ -3474,14 +3456,14 @@ for (;;)
     break;
 
     case OP_NOT_WORDCHAR_L:
-    if (eptr >= md->end_subject || (*eptr=='_' || isalpha(*eptr) ))
+    if (eptr >= md->end_subject || (*eptr=='_' || isalnum(*eptr) ))
       return FALSE;
     eptr++;
     ecode++;
     break;
 
     case OP_WORDCHAR_L:
-    if (eptr >= md->end_subject || (*eptr!='_' && !isalpha(*eptr) ))
+    if (eptr >= md->end_subject || (*eptr!='_' && !isalnum(*eptr) ))
       return FALSE;
     eptr++;
     ecode++;
@@ -3577,7 +3559,7 @@ for (;;)
 
       else
         {
-        uschar *pp = eptr;
+        const uschar *pp = eptr;
         for (i = min; i < max; i++)
           {
           if (!match_ref(number, eptr, length, md)) break;
@@ -3601,8 +3583,8 @@ for (;;)
 
     case OP_CLASS:
       {
-      uschar *data = ecode + 1;  /* Save for matching */
-      ecode += 33;               /* Advance past the item */
+      const uschar *data = ecode + 1;  /* Save for matching */
+      ecode += 33;                     /* Advance past the item */
 
       switch (*ecode)
         {
@@ -3685,7 +3667,7 @@ for (;;)
 
       else
         {
-        uschar *pp = eptr;
+        const uschar *pp = eptr;
         for (i = min; i < max; eptr++, i++)
           {
           if (eptr >= md->end_subject) break;
@@ -3710,8 +3692,8 @@ for (;;)
 
    case OP_CLASS_L:
      {
-     uschar *data = ecode + 1;  /* Save for matching */
-      uschar locale_flag = *data;
+      const uschar *data = ecode + 1;  /* Save for matching */
+      const uschar locale_flag = *data;
       ecode++; data++;		/* The localization support adds an extra byte */
 
       ecode += 33;               /* Advance past the item */
@@ -3744,8 +3726,8 @@ for (;;)
         if (eptr >= md->end_subject) FAIL;
         c = *eptr++;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */
-	if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */
-	if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */
+	if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */
+	if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */
 #if 0
 	if ( (locale_flag &  4) && isdigit(c) ) continue;    /* Locale \d */
 	if ( (locale_flag &  8) && !isdigit(c) ) continue;   /* Locale \D */
@@ -3758,8 +3740,8 @@ for (;;)
           c = pcre_fcc[c];
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */
 
-	  if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */
-	  if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */
+	  if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */
+	  if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */
           }
         FAIL;
         }
@@ -3771,15 +3753,15 @@ for (;;)
         if (eptr >= md->end_subject) FAIL;
         c = *eptr++;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;
-	if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */
-	if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */
+	if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */
+	if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */
 
         if (md->runtime_caseless)
           {
           c = pcre_fcc[c];
           if ((data[c/8] & (1 << (c&7))) != 0) continue;
-	  if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */
-	  if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */
+	  if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */
+	  if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */
           }
         FAIL;
         }
@@ -3800,15 +3782,15 @@ for (;;)
           if (i >= max || eptr >= md->end_subject) FAIL;
           c = *eptr++;
           if ((data[c/8] & (1 << (c&7))) != 0) continue;
-	  if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */
-	  if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */
+	  if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */
+	  if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */
 
           if (md->runtime_caseless)
             {
             c = pcre_fcc[c];
             if ((data[c/8] & (1 << (c&7))) != 0) continue;
-	    if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */
-	    if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */
+	    if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */
+	    if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */
             }
           FAIL;
           }
@@ -3819,20 +3801,20 @@ for (;;)
 
       else
         {
-        uschar *pp = eptr;
+        const uschar *pp = eptr;
         for (i = min; i < max; eptr++, i++)
           {
           if (eptr >= md->end_subject) break;
           c = *eptr;
           if ((data[c/8] & (1 << (c&7))) != 0) continue;
-	  if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */
-	  if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */
+	  if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */
+	  if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */
           if (md->runtime_caseless)
             {
             c = pcre_fcc[c];
             if ((data[c/8] & (1 << (c&7))) != 0) continue;
-	    if ( (locale_flag &  1) && (isalpha(c) || c=='_') ) continue;   /* Locale \w */
-	    if ( (locale_flag &  2) && (!isalpha(c) && c!='_') ) continue;   /* Locale \W */
+	    if ( (locale_flag &  1) && (isalnum(c) || c=='_') ) continue;   /* Locale \w */
+	    if ( (locale_flag &  2) && (!isalnum(c) && c!='_') ) continue;   /* Locale \W */
             }
           break;
           }
@@ -3941,7 +3923,7 @@ for (;;)
         }
       else
         {
-        uschar *pp = eptr;
+        const uschar *pp = eptr;
         for (i = min; i < max; i++)
           {
           if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;
@@ -3971,7 +3953,7 @@ for (;;)
         }
       else
         {
-        uschar *pp = eptr;
+        const uschar *pp = eptr;
         for (i = min; i < max; i++)
           {
           if (eptr >= md->end_subject || c != *eptr) break;
@@ -4068,7 +4050,7 @@ for (;;)
         }
       else
         {
-        uschar *pp = eptr;
+        const uschar *pp = eptr;
         for (i = min; i < max; i++)
           {
           if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;
@@ -4098,7 +4080,7 @@ for (;;)
         }
       else
         {
-        uschar *pp = eptr;
+        const uschar *pp = eptr;
         for (i = min; i < max; i++)
           {
           if (eptr >= md->end_subject || c == *eptr) break;
@@ -4191,12 +4173,12 @@ for (;;)
       break;
 
       case OP_NOT_WORDCHAR_L:
-      for (i = 1; i <= min; i++, eptr++) if (*eptr=='_' || isalpha(*eptr))
+      for (i = 1; i <= min; i++, eptr++) if (*eptr=='_' || isalnum(*eptr))
         return FALSE;
       break;
 
       case OP_WORDCHAR_L:
-      for (i = 1; i <= min; i++, eptr++) if (*eptr!='_' && !isalpha(*eptr))
+      for (i = 1; i <= min; i++, eptr++) if (*eptr!='_' && !isalnum(*eptr))
         return FALSE;
       break;
       }
@@ -4225,7 +4207,7 @@ for (;;)
 
     else
       {
-      uschar *pp = eptr;
+      const uschar *pp = eptr;
       switch(ctype)
         {
         case OP_ANY:
@@ -4301,7 +4283,7 @@ for (;;)
 	case OP_NOT_WORDCHAR_L:
 	  for (i = min; i < max; i++)
          {
-         if (eptr >= md->end_subject || (*eptr=='_' || isalpha(*eptr) ) )
+         if (eptr >= md->end_subject || (*eptr=='_' || isalnum(*eptr) ) )
            break;
          eptr++;
          }
@@ -4310,7 +4292,7 @@ for (;;)
        case OP_WORDCHAR_L:
        for (i = min; i < max; i++)
          {
-         if (eptr >= md->end_subject || (*eptr!='_' && !isalpha(*eptr) ) )
+         if (eptr >= md->end_subject || (*eptr!='_' && !isalnum(*eptr) ) )
              break;
           eptr++;
           }
@@ -4399,17 +4381,20 @@ int
 pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
   const char *subject, int length, int options, int *offsets, int offsetcount)
 {
-int resetcount;
-int ocount = offsetcount;
-int first_char = -1;
+  /* The "volatile" directives are to make gcc -Wall stop complaining
+     that these variables can be clobbered by the longjmp.  Hopefully
+     they won't cost too much performance. */ 
+volatile int resetcount;
+volatile int ocount = offsetcount;
+volatile int first_char = -1;
 match_data match_block;
-uschar *start_bits = NULL;
-uschar *start_match = (uschar *)subject;
-uschar *end_subject;
-real_pcre *re = (real_pcre *)external_re;
-real_pcre_extra *extra = (real_pcre_extra *)external_extra;
-BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
-BOOL startline = (re->options & PCRE_STARTLINE) != 0;
+volatile const uschar *start_bits = NULL;
+const uschar *start_match = (uschar *)subject;
+const uschar *end_subject;
+const real_pcre *re = (const real_pcre *)external_re;
+const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
+volatile BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
+volatile BOOL startline = (re->options & PCRE_STARTLINE) != 0;
 
 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
 
@@ -4417,7 +4402,7 @@ if (re == NULL || subject == NULL ||
    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
 if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
 
-match_block.start_subject = (uschar *)subject;
+match_block.start_subject = (const uschar *)subject;
 match_block.end_subject = match_block.start_subject + length;
 end_subject = match_block.end_subject;
 
@@ -4626,13 +4611,3 @@ return match_block.errorcode;
 }
 
 /* End of pcre.c */
-
-
-
-
-
-
-
-
-
-
-- 
cgit v0.12