summaryrefslogtreecommitdiffstats
path: root/Lib/re.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/re.py')
-rw-r--r--Lib/re.py31
1 files changed, 23 insertions, 8 deletions
diff --git a/Lib/re.py b/Lib/re.py
index 951f239..63a95fd 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -44,7 +44,7 @@ The special characters are:
"|" A|B, creates an RE that will match either A or B.
(...) Matches the RE inside the parentheses.
The contents can be retrieved or matched later in the string.
- (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below).
+ (?aiLmsux) Set the A, I, L, M, S, U, or X flag for the RE (see below).
(?:...) Non-grouping version of regular parentheses.
(?P<name>...) The substring matched by the group is accessible by name.
(?P=name) Matches the text matched earlier by the group named name.
@@ -64,11 +64,18 @@ resulting RE will match the second character.
\Z Matches only at the end of the string.
\b Matches the empty string, but only at the start or end of a word.
\B Matches the empty string, but not at the start or end of a word.
- \d Matches any decimal digit; equivalent to the set [0-9].
- \D Matches any non-digit character; equivalent to the set [^0-9].
+ \d Matches any decimal digit; equivalent to the set [0-9] in
+ bytes patterns or string patterns with the ASCII flag.
+ In string patterns without the ASCII flag, it will match the whole
+ range of Unicode digits.
+ \D Matches any non-digit character; equivalent to [^\d].
\s Matches any whitespace character; equivalent to [ \t\n\r\f\v].
\S Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v].
- \w Matches any alphanumeric character; equivalent to [a-zA-Z0-9_].
+ \w Matches any alphanumeric character; equivalent to [a-zA-Z0-9_]
+ in bytes patterns or string patterns with the ASCII flag.
+ In string patterns without the ASCII flag, it will match the
+ range of Unicode alphanumeric characters (letters plus digits
+ plus underscore).
With LOCALE, it will match the set [0-9_] plus characters defined
as letters for the current locale.
\W Matches the complement of \w.
@@ -87,6 +94,12 @@ This module exports the following functions:
escape Backslash all non-alphanumerics in a string.
Some of the functions in this module takes flags as optional parameters:
+ A ASCII For string patterns, make \w, \W, \b, \B, \d, \D
+ match the corresponding ASCII character categories
+ (rather than the whole Unicode categories, which is the
+ default).
+ For bytes patterns, this flag is the only available
+ behaviour and needn't be specified.
I IGNORECASE Perform case-insensitive matching.
L LOCALE Make \w, \W, \b, \B, dependent on the current locale.
M MULTILINE "^" matches the beginning of lines (after a newline)
@@ -95,7 +108,8 @@ Some of the functions in this module takes flags as optional parameters:
as the end of the string.
S DOTALL "." matches any character at all, including the newline.
X VERBOSE Ignore whitespace and comments for nicer looking RE's.
- U UNICODE Make \w, \W, \b, \B, dependent on the Unicode locale.
+ U UNICODE For compatibility only. Ignored for string patterns (it
+ is the default), and forbidden for bytes patterns.
This module also defines an exception 'error'.
@@ -107,16 +121,17 @@ import sre_parse
# public symbols
__all__ = [ "match", "search", "sub", "subn", "split", "findall",
- "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
- "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
+ "compile", "purge", "template", "escape", "A", "I", "L", "M", "S", "X",
+ "U", "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
"UNICODE", "error" ]
__version__ = "2.2.1"
# flags
+A = ASCII = sre_compile.SRE_FLAG_ASCII # assume ascii "locale"
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
-U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
+U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode "locale"
M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments