diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2017-05-05 05:53:40 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-05-05 05:53:40 (GMT) |
commit | 898ff03e1e7925ecde3da66327d3cdc7e07625ba (patch) | |
tree | 977fc4b98c0e85816348cebd3b12026407c368b6 /Modules/sre_lib.h | |
parent | 647c3d381e67490e82cdbbe6c96e46d5e1628ce2 (diff) | |
download | cpython-898ff03e1e7925ecde3da66327d3cdc7e07625ba.zip cpython-898ff03e1e7925ecde3da66327d3cdc7e07625ba.tar.gz cpython-898ff03e1e7925ecde3da66327d3cdc7e07625ba.tar.bz2 |
bpo-30215: Make re.compile() locale agnostic. (#1361)
Compiled regular expression objects with the re.LOCALE flag no longer
depend on the locale at compile time. Only the locale at matching
time affects the result of matching.
Diffstat (limited to 'Modules/sre_lib.h')
-rw-r--r-- | Modules/sre_lib.h | 69 |
1 files changed, 67 insertions, 2 deletions
diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h index 0865fc6..b540d21 100644 --- a/Modules/sre_lib.h +++ b/Modules/sre_lib.h @@ -101,6 +101,14 @@ SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) } LOCAL(int) +SRE(char_loc_ignore)(SRE_STATE* state, SRE_CODE pattern, SRE_CODE ch) +{ + return ch == pattern + || (SRE_CODE) state->lower(ch) == pattern + || (SRE_CODE) state->upper(ch) == pattern; +} + +LOCAL(int) SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) { /* check if character is a member of the given set */ @@ -187,6 +195,18 @@ SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) } } +LOCAL(int) +SRE(charset_loc_ignore)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) +{ + SRE_CODE lo, up; + lo = state->lower(ch); + if (SRE(charset)(state, set, lo)) + return 1; + + up = state->upper(ch); + return up != lo && SRE(charset)(state, set, up); +} + LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all); LOCAL(Py_ssize_t) @@ -247,6 +267,14 @@ SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) ptr++; break; + case SRE_OP_LITERAL_LOC_IGNORE: + /* repeated literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); + while (ptr < end && SRE(char_loc_ignore)(state, chr, *ptr)) + ptr++; + break; + case SRE_OP_NOT_LITERAL: /* repeated non-literal */ chr = pattern[1]; @@ -269,6 +297,14 @@ SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) ptr++; break; + case SRE_OP_NOT_LITERAL_LOC_IGNORE: + /* repeated non-literal */ + chr = pattern[1]; + TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); + while (ptr < end && !SRE(char_loc_ignore)(state, chr, *ptr)) + ptr++; + break; + default: /* repeated single character pattern */ TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); @@ -651,7 +687,17 @@ entrance: TRACE(("|%p|%p|LITERAL_IGNORE %d\n", ctx->pattern, ctx->ptr, ctx->pattern[0])); if (ctx->ptr >= end || - state->lower(*ctx->ptr) != state->lower(*ctx->pattern)) + state->lower(*ctx->ptr) != *ctx->pattern) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_LITERAL_LOC_IGNORE: + TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", + ctx->pattern, ctx->ptr, ctx->pattern[0])); + if (ctx->ptr >= end + || !SRE(char_loc_ignore)(state, *ctx->pattern, *ctx->ptr)) RETURN_FAILURE; ctx->pattern++; ctx->ptr++; @@ -661,7 +707,17 @@ entrance: TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); if (ctx->ptr >= end || - state->lower(*ctx->ptr) == state->lower(*ctx->pattern)) + state->lower(*ctx->ptr) == *ctx->pattern) + RETURN_FAILURE; + ctx->pattern++; + ctx->ptr++; + break; + + case SRE_OP_NOT_LITERAL_LOC_IGNORE: + TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", + ctx->pattern, ctx->ptr, *ctx->pattern)); + if (ctx->ptr >= end + || SRE(char_loc_ignore)(state, *ctx->pattern, *ctx->ptr)) RETURN_FAILURE; ctx->pattern++; ctx->ptr++; @@ -677,6 +733,15 @@ entrance: ctx->ptr++; break; + case SRE_OP_IN_LOC_IGNORE: + TRACE(("|%p|%p|IN_LOC_IGNORE\n", ctx->pattern, ctx->ptr)); + if (ctx->ptr >= end + || !SRE(charset_loc_ignore)(state, ctx->pattern+1, *ctx->ptr)) + RETURN_FAILURE; + ctx->pattern += ctx->pattern[0]; + ctx->ptr++; + break; + case SRE_OP_JUMP: case SRE_OP_INFO: /* jump forward */ |