summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMa Lin <animalize@users.noreply.github.com>2022-04-18 13:50:40 (GMT)
committerGitHub <noreply@github.com>2022-04-18 13:50:40 (GMT)
commita29f858124bc698f6604716b73306c65b63b5054 (patch)
tree4ad90bf4967c3f0dd3e0981a65f31089a8920f0a
parent1c2fcebf3c5e2ab41d376bb481834445617c8f3c (diff)
downloadcpython-a29f858124bc698f6604716b73306c65b63b5054.zip
cpython-a29f858124bc698f6604716b73306c65b63b5054.tar.gz
cpython-a29f858124bc698f6604716b73306c65b63b5054.tar.bz2
bpo-47256: Increasing the depth of backtracking in RE (GH-32411)
Limit the maximum capturing group to 2**30-1 on 64-bit platforms (it was 2**31-1). No change on 32-bit platforms (2**28-1). It allows to reduce the size of SRE(match_context): - On 32 bit platform: 36 bytes, no change. (msvc2022) - On 64 bit platform: 72 bytes -> 56 bytes. (msvc2022/gcc9.4) which leads to increasing the depth of backtracking.
-rw-r--r--Misc/NEWS.d/next/Library/2022-04-16-11-39-59.bpo-47256.1cygyd.rst2
-rw-r--r--Modules/_sre/sre.h12
-rw-r--r--Modules/_sre/sre_lib.h76
3 files changed, 46 insertions, 44 deletions
diff --git a/Misc/NEWS.d/next/Library/2022-04-16-11-39-59.bpo-47256.1cygyd.rst b/Misc/NEWS.d/next/Library/2022-04-16-11-39-59.bpo-47256.1cygyd.rst
new file mode 100644
index 0000000..ac4c52b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-04-16-11-39-59.bpo-47256.1cygyd.rst
@@ -0,0 +1,2 @@
+:mod:`re` module, limit the maximum capturing group to 1,073,741,823 in
+64-bit build, this increases the depth of backtracking.
diff --git a/Modules/_sre/sre.h b/Modules/_sre/sre.h
index 129f559..aff064d 100644
--- a/Modules/_sre/sre.h
+++ b/Modules/_sre/sre.h
@@ -18,10 +18,10 @@
#define SRE_CODE Py_UCS4
#if SIZEOF_SIZE_T > 4
# define SRE_MAXREPEAT (~(SRE_CODE)0)
-# define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2)
+# define SRE_MAXGROUPS ((SRE_CODE)INT32_MAX / 2)
#else
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
-# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2)
+# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_VOID_P / 2)
#endif
typedef struct {
@@ -73,12 +73,12 @@ typedef struct {
Py_ssize_t pos, endpos;
int isbytes;
int charsize; /* character size */
- /* registers */
- Py_ssize_t lastindex;
- Py_ssize_t lastmark;
- const void** mark;
int match_all;
int must_advance;
+ /* marks */
+ int lastmark;
+ int lastindex;
+ const void** mark;
/* dynamically allocated stuff */
char* data_stack;
size_t data_stack_size;
diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h
index 3472e65..db624aa 100644
--- a/Modules/_sre/sre_lib.h
+++ b/Modules/_sre/sre_lib.h
@@ -450,20 +450,23 @@ do { \
#define MARK_PUSH(lastmark) \
do if (lastmark >= 0) { \
- i = lastmark; /* ctx->lastmark may change if reallocated */ \
- DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
+ size_t _marks_size = (lastmark+1) * sizeof(void*); \
+ DATA_STACK_PUSH(state, state->mark, _marks_size); \
} while (0)
#define MARK_POP(lastmark) \
do if (lastmark >= 0) { \
- DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
+ size_t _marks_size = (lastmark+1) * sizeof(void*); \
+ DATA_STACK_POP(state, state->mark, _marks_size, 1); \
} while (0)
#define MARK_POP_KEEP(lastmark) \
do if (lastmark >= 0) { \
- DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
+ size_t _marks_size = (lastmark+1) * sizeof(void*); \
+ DATA_STACK_POP(state, state->mark, _marks_size, 0); \
} while (0)
#define MARK_POP_DISCARD(lastmark) \
do if (lastmark >= 0) { \
- DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
+ size_t _marks_size = (lastmark+1) * sizeof(void*); \
+ DATA_STACK_POP_DISCARD(state, _marks_size); \
} while (0)
#define JUMP_NONE 0
@@ -488,10 +491,10 @@ do { \
ctx->pattern = pattern; \
ctx->ptr = ptr; \
DATA_ALLOC(SRE(match_context), nextctx); \
- nextctx->last_ctx_pos = ctx_pos; \
- nextctx->jump = jumpvalue; \
nextctx->pattern = nextpattern; \
nextctx->toplevel = toplevel_; \
+ nextctx->jump = jumpvalue; \
+ nextctx->last_ctx_pos = ctx_pos; \
pattern = nextpattern; \
ctx_pos = alloc_pos; \
ctx = nextctx; \
@@ -507,18 +510,18 @@ do { \
DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
typedef struct {
- Py_ssize_t last_ctx_pos;
- Py_ssize_t jump;
- const SRE_CHAR* ptr;
- const SRE_CODE* pattern;
Py_ssize_t count;
- Py_ssize_t lastmark;
- Py_ssize_t lastindex;
union {
SRE_CODE chr;
SRE_REPEAT* rep;
} u;
+ int lastmark;
+ int lastindex;
+ const SRE_CODE* pattern;
+ const SRE_CHAR* ptr;
int toplevel;
+ int jump;
+ Py_ssize_t last_ctx_pos;
} SRE(match_context);
#define MAYBE_CHECK_SIGNALS \
@@ -558,8 +561,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
{
const SRE_CHAR* end = (const SRE_CHAR *)state->end;
Py_ssize_t alloc_pos, ctx_pos = -1;
- Py_ssize_t i, ret = 0;
- Py_ssize_t jump;
+ Py_ssize_t ret = 0;
+ int jump;
unsigned int sigcount=0;
SRE(match_context)* ctx;
@@ -607,20 +610,22 @@ dispatch:
/* <MARK> <gid> */
TRACE(("|%p|%p|MARK %d\n", pattern,
ptr, pattern[0]));
- i = pattern[0];
- if (i & 1)
- state->lastindex = i/2 + 1;
- if (i > state->lastmark) {
- /* state->lastmark is the highest valid index in the
- state->mark array. If it is increased by more than 1,
- the intervening marks must be set to NULL to signal
- that these marks have not been encountered. */
- Py_ssize_t j = state->lastmark + 1;
- while (j < i)
- state->mark[j++] = NULL;
- state->lastmark = i;
+ {
+ int i = pattern[0];
+ if (i & 1)
+ state->lastindex = i/2 + 1;
+ if (i > state->lastmark) {
+ /* state->lastmark is the highest valid index in the
+ state->mark array. If it is increased by more than 1,
+ the intervening marks must be set to NULL to signal
+ that these marks have not been encountered. */
+ int j = state->lastmark + 1;
+ while (j < i)
+ state->mark[j++] = NULL;
+ state->lastmark = i;
+ }
+ state->mark[i] = ptr;
}
- state->mark[i] = ptr;
pattern++;
DISPATCH;
@@ -1373,9 +1378,8 @@ dispatch:
/* match backreference */
TRACE(("|%p|%p|GROUPREF %d\n", pattern,
ptr, pattern[0]));
- i = pattern[0];
{
- Py_ssize_t groupref = i+i;
+ int groupref = pattern[0] * 2;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
@@ -1398,9 +1402,8 @@ dispatch:
/* match backreference */
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
ptr, pattern[0]));
- i = pattern[0];
{
- Py_ssize_t groupref = i+i;
+ int groupref = pattern[0] * 2;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
@@ -1424,9 +1427,8 @@ dispatch:
/* match backreference */
TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
ptr, pattern[0]));
- i = pattern[0];
{
- Py_ssize_t groupref = i+i;
+ int groupref = pattern[0] * 2;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
@@ -1450,9 +1452,8 @@ dispatch:
/* match backreference */
TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
ptr, pattern[0]));
- i = pattern[0];
{
- Py_ssize_t groupref = i+i;
+ int groupref = pattern[0] * 2;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
@@ -1476,9 +1477,8 @@ dispatch:
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
ptr, pattern[0]));
/* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
- i = pattern[0];
{
- Py_ssize_t groupref = i+i;
+ int groupref = pattern[0] * 2;
if (groupref >= state->lastmark) {
pattern += pattern[1];
DISPATCH;