diff options
-rw-r--r-- | Misc/NEWS | 2 | ||||
-rw-r--r-- | Modules/_sre.c | 26 | ||||
-rw-r--r-- | Modules/sre_lib.h | 29 |
3 files changed, 43 insertions, 14 deletions
@@ -17,6 +17,8 @@ Core and Builtins Library ------- +- Issue #18684: Fixed reading out of the buffer in the re module. + - Issue #24259: tarfile now raises a ReadError if an archive is truncated inside a data segment. diff --git a/Modules/_sre.c b/Modules/_sre.c index 4f47393..957ccbc 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -994,7 +994,7 @@ _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, } if (state.start == state.ptr) { - if (last == state.end) + if (last == state.end || state.ptr == state.end) break; /* skip one character */ state.start = (void*) ((char*) state.ptr + state.charsize); @@ -1191,6 +1191,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string, next: /* move on */ + if (state.ptr == state.end) + break; if (state.ptr == state.start) state.start = (void*) ((char*) state.ptr + state.charsize); else @@ -2564,6 +2566,9 @@ _sre_SRE_Scanner_match_impl(ScannerObject *self) PyObject* match; Py_ssize_t status; + if (state->start == NULL) + Py_RETURN_NONE; + state_reset(state); state->ptr = state->start; @@ -2575,10 +2580,14 @@ _sre_SRE_Scanner_match_impl(ScannerObject *self) match = pattern_new_match((PatternObject*) self->pattern, state, status); - if (status == 0 || state->ptr == state->start) + if (status == 0) + state->start = NULL; + else if (state->ptr != state->start) + state->start = state->ptr; + else if (state->ptr != state->end) state->start = (void*) ((char*) state->ptr + state->charsize); else - state->start = state->ptr; + state->start = NULL; return match; } @@ -2597,6 +2606,9 @@ _sre_SRE_Scanner_search_impl(ScannerObject *self) PyObject* match; Py_ssize_t status; + if (state->start == NULL) + Py_RETURN_NONE; + state_reset(state); state->ptr = state->start; @@ -2608,10 +2620,14 @@ _sre_SRE_Scanner_search_impl(ScannerObject *self) match = pattern_new_match((PatternObject*) self->pattern, state, status); - if (status == 0 || state->ptr == state->start) + if (status == 0) + state->start = NULL; + else if (state->ptr != state->start) + state->start = state->ptr; + else if (state->ptr != state->end) state->start = (void*) ((char*) state->ptr + state->charsize); else - state->start = state->ptr; + state->start = NULL; return match; } diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h index 463a908..128c71e 100644 --- a/Modules/sre_lib.h +++ b/Modules/sre_lib.h @@ -30,7 +30,7 @@ SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) SRE_IS_LINEBREAK((int) ptr[-1])); case SRE_AT_END: - return (((void*) (ptr+1) == state->end && + return (((SRE_CHAR *)state->end - ptr == 1 && SRE_IS_LINEBREAK((int) ptr[0])) || ((void*) ptr == state->end)); @@ -1109,9 +1109,9 @@ entrance: /* <ASSERT> <skip> <back> <pattern> */ TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, ctx->ptr, ctx->pattern[1])); - state->ptr = ctx->ptr - ctx->pattern[1]; - if (state->ptr < state->beginning) + if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1]) RETURN_FAILURE; + state->ptr = ctx->ptr - ctx->pattern[1]; DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2); RETURN_ON_FAILURE(ret); ctx->pattern += ctx->pattern[0]; @@ -1122,8 +1122,8 @@ entrance: /* <ASSERT_NOT> <skip> <back> <pattern> */ TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, ctx->ptr, ctx->pattern[1])); - state->ptr = ctx->ptr - ctx->pattern[1]; - if (state->ptr >= state->beginning) { + if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) { + state->ptr = ctx->ptr - ctx->pattern[1]; DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); if (ret) { RETURN_ON_ERROR(ret); @@ -1215,12 +1215,20 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) SRE_CODE* overlap = NULL; int flags = 0; + if (ptr > end) + return 0; + if (pattern[0] == SRE_OP_INFO) { /* optimization info block */ /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ flags = pattern[2]; + if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) { + TRACE(("reject (got %u chars, need %u)\n", + (unsigned int)(end - ptr), pattern[3])); + return 0; + } if (pattern[3] > 1) { /* adjust end point (but make sure we leave at least one character in there, so literal search will work) */ @@ -1338,15 +1346,18 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern) break; ptr++; } - } else + } else { /* general case */ - while (ptr <= end) { + assert(ptr <= end); + while (1) { TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); - state->start = state->ptr = ptr++; + state->start = state->ptr = ptr; status = SRE(match)(state, pattern, 0); - if (status != 0) + if (status != 0 || ptr >= end) break; + ptr++; } + } return status; } |