diff options
Diffstat (limited to 'Lib/re.py')
-rw-r--r-- | Lib/re.py | 31 |
1 files changed, 23 insertions, 8 deletions
@@ -44,7 +44,7 @@ The special characters are: "|" A|B, creates an RE that will match either A or B. (...) Matches the RE inside the parentheses. The contents can be retrieved or matched later in the string. - (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below). + (?aiLmsux) Set the A, I, L, M, S, U, or X flag for the RE (see below). (?:...) Non-grouping version of regular parentheses. (?P<name>...) The substring matched by the group is accessible by name. (?P=name) Matches the text matched earlier by the group named name. @@ -64,11 +64,18 @@ resulting RE will match the second character. \Z Matches only at the end of the string. \b Matches the empty string, but only at the start or end of a word. \B Matches the empty string, but not at the start or end of a word. - \d Matches any decimal digit; equivalent to the set [0-9]. - \D Matches any non-digit character; equivalent to the set [^0-9]. + \d Matches any decimal digit; equivalent to the set [0-9] in + bytes patterns or string patterns with the ASCII flag. + In string patterns without the ASCII flag, it will match the whole + range of Unicode digits. + \D Matches any non-digit character; equivalent to [^\d]. \s Matches any whitespace character; equivalent to [ \t\n\r\f\v]. \S Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v]. - \w Matches any alphanumeric character; equivalent to [a-zA-Z0-9_]. + \w Matches any alphanumeric character; equivalent to [a-zA-Z0-9_] + in bytes patterns or string patterns with the ASCII flag. + In string patterns without the ASCII flag, it will match the + range of Unicode alphanumeric characters (letters plus digits + plus underscore). With LOCALE, it will match the set [0-9_] plus characters defined as letters for the current locale. \W Matches the complement of \w. @@ -87,6 +94,12 @@ This module exports the following functions: escape Backslash all non-alphanumerics in a string. Some of the functions in this module takes flags as optional parameters: + A ASCII For string patterns, make \w, \W, \b, \B, \d, \D + match the corresponding ASCII character categories + (rather than the whole Unicode categories, which is the + default). + For bytes patterns, this flag is the only available + behaviour and needn't be specified. I IGNORECASE Perform case-insensitive matching. L LOCALE Make \w, \W, \b, \B, dependent on the current locale. M MULTILINE "^" matches the beginning of lines (after a newline) @@ -95,7 +108,8 @@ Some of the functions in this module takes flags as optional parameters: as the end of the string. S DOTALL "." matches any character at all, including the newline. X VERBOSE Ignore whitespace and comments for nicer looking RE's. - U UNICODE Make \w, \W, \b, \B, dependent on the Unicode locale. + U UNICODE For compatibility only. Ignored for string patterns (it + is the default), and forbidden for bytes patterns. This module also defines an exception 'error'. @@ -107,16 +121,17 @@ import sre_parse # public symbols __all__ = [ "match", "search", "sub", "subn", "split", "findall", - "compile", "purge", "template", "escape", "I", "L", "M", "S", "X", - "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", + "compile", "purge", "template", "escape", "A", "I", "L", "M", "S", "X", + "U", "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", "UNICODE", "error" ] __version__ = "2.2.1" # flags +A = ASCII = sre_compile.SRE_FLAG_ASCII # assume ascii "locale" I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale -U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale +U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode "locale" M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments |