From 8094611eb8abe9f9d1e1498f36324eebabaa0a09 Mon Sep 17 00:00:00 2001 From: Fredrik Lundh Date: Thu, 29 Jun 2000 18:03:25 +0000 Subject: - fixed another split problem (those semantics are weird...) - got rid of $Id$'s (for the moment, at least). in other words, there should be no more "empty" checkins. - internal: some minor cleanups. --- Lib/sre.py | 8 ++++---- Lib/sre_compile.py | 1 - Lib/sre_constants.py | 1 - Lib/sre_parse.py | 1 - Modules/_sre.c | 23 +++++++++++++++++------ Modules/sre.h | 3 +-- 6 files changed, 22 insertions(+), 15 deletions(-) diff --git a/Lib/sre.py b/Lib/sre.py index e0a51e3..49e3140 100644 --- a/Lib/sre.py +++ b/Lib/sre.py @@ -1,6 +1,5 @@ # # Secret Labs' Regular Expression Engine -# $Id$ # # re-compatible interface for the sre matching engine # @@ -135,13 +134,14 @@ def _split(pattern, string, maxsplit=0): if not m: break b, e = m.span() - if e == i: + if b == e: + if i >= len(string): + break continue append(string[i:b]) if g and b != e: extend(m.groups()) i = e n = n + 1 - if i < len(string): - append(string[i:]) + append(string[i:]) return s diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index a51531b..c042375 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -1,6 +1,5 @@ # # Secret Labs' Regular Expression Engine -# $Id$ # # convert template to internal format # diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py index 1c9810f..f5e7894 100644 --- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -1,6 +1,5 @@ # # Secret Labs' Regular Expression Engine -# $Id$ # # various symbols used by the regular expression engine. # run this script to update the _sre include files! diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 8ab36c8..93a7b5d 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -1,6 +1,5 @@ # # Secret Labs' Regular Expression Engine -# $Id$ # # convert re-style regular expression to sre pattern # diff --git a/Modules/_sre.c b/Modules/_sre.c index dba2afd..206e8d0 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -1,7 +1,6 @@ /* -*- Mode: C; tab-width: 4 -*- * * Secret Labs' Regular Expression Engine - * $Id$ * * regular expression matching engine * @@ -31,7 +30,7 @@ #ifndef SRE_RECURSIVE static char -copyright[] = " SRE 0.9.1 Copyright (c) 1997-2000 by Secret Labs AB "; +copyright[] = " SRE 0.9.2 Copyright (c) 1997-2000 by Secret Labs AB "; #include "Python.h" @@ -56,7 +55,7 @@ copyright[] = " SRE 0.9.1 Copyright (c) 1997-2000 by Secret Labs AB "; #define HAVE_UNICODE #endif -#if defined(WIN32) /* FIXME: don't assume Windows == MSVC */ +#if defined(_MSC_VER) #pragma optimize("agtw", on) /* doesn't seem to make much difference... */ /* fastest possible local call under MSVC */ #define LOCAL(type) static __inline type __fastcall @@ -298,16 +297,21 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) int this, that; switch (at) { + case SRE_AT_BEGINNING: return ((void*) ptr == state->beginning); + case SRE_AT_BEGINNING_LINE: return ((void*) ptr == state->beginning || SRE_IS_LINEBREAK((int) ptr[-1])); + case SRE_AT_END: return ((void*) ptr == state->end); + case SRE_AT_END_LINE: return ((void*) ptr == state->end || SRE_IS_LINEBREAK((int) ptr[0])); + case SRE_AT_BOUNDARY: if (state->beginning == state->end) return 0; @@ -316,6 +320,7 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) this = ((void*) ptr < state->end) ? SRE_IS_WORD((int) ptr[0]) : 0; return this != that; + case SRE_AT_NON_BOUNDARY: if (state->beginning == state->end) return 0; @@ -365,7 +370,8 @@ SRE_MEMBER(SRE_CODE* set, SRE_CHAR ch) break; default: - /* FIXME: internal error */ + /* internal error -- there's not much we can do about it + here, so let's just pretend it didn't match... */ return 0; } } @@ -910,14 +916,19 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern) SRE_CHAR* ptr = state->start; SRE_CHAR* end = state->end; int status = 0; + int prefix_len = 0; + SRE_CODE* prefix = NULL; if (pattern[0] == SRE_OP_INFO) { - /* don't look too far */ + /* args: */ end -= pattern[2]; + prefix_len = pattern[4]; + prefix = pattern + 5; pattern += pattern[1]; - /* FIXME: add support for fast scan */ } + /* if (prefix_len > 0) ... */ + if (pattern[0] == SRE_OP_LITERAL) { /* pattern starts with a literal */ SRE_CHAR chr = (SRE_CHAR) pattern[1]; diff --git a/Modules/sre.h b/Modules/sre.h index 722f890..274f085 100644 --- a/Modules/sre.h +++ b/Modules/sre.h @@ -1,8 +1,7 @@ /* * Secret Labs' Regular Expression Engine - * $Id$ * - * simple regular expression matching engine + * regular expression matching engine * * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved. * -- cgit v0.12