From 0b5e61ddca73ad4fe597fb15065115b0285c8849 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 4 Oct 2017 20:09:49 +0300 Subject: bpo-30397: Add re.Pattern and re.Match. (#1646) --- Doc/howto/regex.rst | 16 ++-- Doc/library/fnmatch.rst | 2 +- Doc/library/re.rst | 104 ++++++++++----------- Lib/idlelib/idle_test/test_calltips.py | 2 +- Lib/re.py | 35 +++---- Lib/sre_constants.py | 2 + Lib/telnetlib.py | 4 +- Lib/test/test_optparse.py | 4 +- Lib/test/test_re.py | 4 +- Lib/unittest/case.py | 4 +- .../2017-10-03-22-45-50.bpo-30397.e4F7Kr.rst | 3 + Modules/_sre.c | 41 ++++---- Modules/clinic/_sre.c.h | 6 +- 13 files changed, 120 insertions(+), 107 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-10-03-22-45-50.bpo-30397.e4F7Kr.rst diff --git a/Doc/howto/regex.rst b/Doc/howto/regex.rst index d9b7c90..3121a9f 100644 --- a/Doc/howto/regex.rst +++ b/Doc/howto/regex.rst @@ -402,7 +402,7 @@ should store the result in a variable for later use. :: >>> m = p.match('tempo') >>> m #doctest: +ELLIPSIS - <_sre.SRE_Match object; span=(0, 5), match='tempo'> + Now you can query the :ref:`match object ` for information about the matching string. :ref:`match object ` instances @@ -441,7 +441,7 @@ case. :: >>> print(p.match('::: message')) None >>> m = p.search('::: message'); print(m) #doctest: +ELLIPSIS - <_sre.SRE_Match object; span=(4, 11), match='message'> + >>> m.group() 'message' >>> m.span() @@ -493,7 +493,7 @@ the RE string added as the first argument, and still return either ``None`` or a >>> print(re.match(r'From\s+', 'Fromage amk')) None >>> re.match(r'From\s+', 'From amk Thu May 14 19:12:10 1998') #doctest: +ELLIPSIS - <_sre.SRE_Match object; span=(0, 5), match='From '> + Under the hood, these functions simply create a pattern object for you and call the appropriate method on it. They also store the compiled @@ -685,7 +685,7 @@ given location, they can obviously be matched an infinite number of times. line, the RE to use is ``^From``. :: >>> print(re.search('^From', 'From Here to Eternity')) #doctest: +ELLIPSIS - <_sre.SRE_Match object; span=(0, 4), match='From'> + >>> print(re.search('^From', 'Reciting From Memory')) None @@ -697,11 +697,11 @@ given location, they can obviously be matched an infinite number of times. or any location followed by a newline character. :: >>> print(re.search('}$', '{block}')) #doctest: +ELLIPSIS - <_sre.SRE_Match object; span=(6, 7), match='}'> + >>> print(re.search('}$', '{block} ')) None >>> print(re.search('}$', '{block}\n')) #doctest: +ELLIPSIS - <_sre.SRE_Match object; span=(6, 7), match='}'> + To match a literal ``'$'``, use ``\$`` or enclose it inside a character class, as in ``[$]``. @@ -726,7 +726,7 @@ given location, they can obviously be matched an infinite number of times. >>> p = re.compile(r'\bclass\b') >>> print(p.search('no class at all')) #doctest: +ELLIPSIS - <_sre.SRE_Match object; span=(3, 8), match='class'> + >>> print(p.search('the declassified algorithm')) None >>> print(p.search('one subclass is')) @@ -744,7 +744,7 @@ given location, they can obviously be matched an infinite number of times. >>> print(p.search('no class at all')) None >>> print(p.search('\b' + 'class' + '\b')) #doctest: +ELLIPSIS - <_sre.SRE_Match object; span=(0, 7), match='\x08class\x08'> + Second, inside a character class, where there's no use for this assertion, ``\b`` represents the backspace character, for compatibility with Python's diff --git a/Doc/library/fnmatch.rst b/Doc/library/fnmatch.rst index 634c26e..abf9523 100644 --- a/Doc/library/fnmatch.rst +++ b/Doc/library/fnmatch.rst @@ -86,7 +86,7 @@ patterns. '(?s:.*\\.txt)\\Z' >>> reobj = re.compile(regex) >>> reobj.match('foobar.txt') - <_sre.SRE_Match object; span=(0, 10), match='foobar.txt'> + .. seealso:: diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 138e7d8..7efdd5d 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -492,7 +492,7 @@ form. Compile a regular expression pattern into a :ref:`regular expression object `, which can be used for matching using its - :func:`~regex.match`, :func:`~regex.search` and other methods, described + :func:`~Pattern.match`, :func:`~Pattern.search` and other methods, described below. The expression's behaviour can be modified by specifying a *flags* value. @@ -747,7 +747,7 @@ form. >>> re.sub(r'\sAND\s', ' & ', 'Baked Beans And Spam', flags=re.IGNORECASE) 'Baked Beans & Spam' - The pattern may be a string or an RE object. + The pattern may be a string or a :class:`Pattern` object. The optional argument *count* is the maximum number of pattern occurrences to be replaced; *count* must be a non-negative integer. If omitted or zero, all @@ -861,7 +861,7 @@ Regular Expression Objects Compiled regular expression objects support the following methods and attributes: -.. method:: regex.search(string[, pos[, endpos]]) +.. method:: Pattern.search(string[, pos[, endpos]]) Scan through *string* looking for the first location where this regular expression produces a match, and return a corresponding :ref:`match object @@ -884,11 +884,11 @@ attributes: >>> pattern = re.compile("d") >>> pattern.search("dog") # Match at index 0 - <_sre.SRE_Match object; span=(0, 1), match='d'> + >>> pattern.search("dog", 1) # No match; search doesn't include the "d" -.. method:: regex.match(string[, pos[, endpos]]) +.. method:: Pattern.match(string[, pos[, endpos]]) If zero or more characters at the *beginning* of *string* match this regular expression, return a corresponding :ref:`match object `. @@ -896,86 +896,86 @@ attributes: different from a zero-length match. The optional *pos* and *endpos* parameters have the same meaning as for the - :meth:`~regex.search` method. + :meth:`~Pattern.search` method. >>> pattern = re.compile("o") >>> pattern.match("dog") # No match as "o" is not at the start of "dog". >>> pattern.match("dog", 1) # Match as "o" is the 2nd character of "dog". - <_sre.SRE_Match object; span=(1, 2), match='o'> + If you want to locate a match anywhere in *string*, use - :meth:`~regex.search` instead (see also :ref:`search-vs-match`). + :meth:`~Pattern.search` instead (see also :ref:`search-vs-match`). -.. method:: regex.fullmatch(string[, pos[, endpos]]) +.. method:: Pattern.fullmatch(string[, pos[, endpos]]) If the whole *string* matches this regular expression, return a corresponding :ref:`match object `. Return ``None`` if the string does not match the pattern; note that this is different from a zero-length match. The optional *pos* and *endpos* parameters have the same meaning as for the - :meth:`~regex.search` method. + :meth:`~Pattern.search` method. >>> pattern = re.compile("o[gh]") >>> pattern.fullmatch("dog") # No match as "o" is not at the start of "dog". >>> pattern.fullmatch("ogre") # No match as not the full string matches. >>> pattern.fullmatch("doggie", 1, 3) # Matches within given limits. - <_sre.SRE_Match object; span=(1, 3), match='og'> + .. versionadded:: 3.4 -.. method:: regex.split(string, maxsplit=0) +.. method:: Pattern.split(string, maxsplit=0) Identical to the :func:`split` function, using the compiled pattern. -.. method:: regex.findall(string[, pos[, endpos]]) +.. method:: Pattern.findall(string[, pos[, endpos]]) Similar to the :func:`findall` function, using the compiled pattern, but also accepts optional *pos* and *endpos* parameters that limit the search region like for :meth:`match`. -.. method:: regex.finditer(string[, pos[, endpos]]) +.. method:: Pattern.finditer(string[, pos[, endpos]]) Similar to the :func:`finditer` function, using the compiled pattern, but also accepts optional *pos* and *endpos* parameters that limit the search region like for :meth:`match`. -.. method:: regex.sub(repl, string, count=0) +.. method:: Pattern.sub(repl, string, count=0) Identical to the :func:`sub` function, using the compiled pattern. -.. method:: regex.subn(repl, string, count=0) +.. method:: Pattern.subn(repl, string, count=0) Identical to the :func:`subn` function, using the compiled pattern. -.. attribute:: regex.flags +.. attribute:: Pattern.flags The regex matching flags. This is a combination of the flags given to :func:`.compile`, any ``(?...)`` inline flags in the pattern, and implicit flags such as :data:`UNICODE` if the pattern is a Unicode string. -.. attribute:: regex.groups +.. attribute:: Pattern.groups The number of capturing groups in the pattern. -.. attribute:: regex.groupindex +.. attribute:: Pattern.groupindex A dictionary mapping any symbolic group names defined by ``(?P)`` to group numbers. The dictionary is empty if no symbolic groups were used in the pattern. -.. attribute:: regex.pattern +.. attribute:: Pattern.pattern - The pattern string from which the RE object was compiled. + The pattern string from which the pattern object was compiled. .. versionchanged:: 3.7 @@ -989,7 +989,7 @@ Match Objects ------------- Match objects always have a boolean value of ``True``. -Since :meth:`~regex.match` and :meth:`~regex.search` return ``None`` +Since :meth:`~Pattern.match` and :meth:`~Pattern.search` return ``None`` when there is no match, you can test whether there was a match with a simple ``if`` statement:: @@ -1000,10 +1000,10 @@ when there is no match, you can test whether there was a match with a simple Match objects support the following methods and attributes: -.. method:: match.expand(template) +.. method:: Match.expand(template) Return the string obtained by doing backslash substitution on the template - string *template*, as done by the :meth:`~regex.sub` method. + string *template*, as done by the :meth:`~Pattern.sub` method. Escapes such as ``\n`` are converted to the appropriate characters, and numeric backreferences (``\1``, ``\2``) and named backreferences (``\g<1>``, ``\g``) are replaced by the contents of the @@ -1012,7 +1012,7 @@ Match objects support the following methods and attributes: .. versionchanged:: 3.5 Unmatched groups are replaced with an empty string. -.. method:: match.group([group1, ...]) +.. method:: Match.group([group1, ...]) Returns one or more subgroups of the match. If there is a single argument, the result is a single string; if there are multiple arguments, the result is a @@ -1063,7 +1063,7 @@ Match objects support the following methods and attributes: 'c3' -.. method:: match.__getitem__(g) +.. method:: Match.__getitem__(g) This is identical to ``m.group(g)``. This allows easier access to an individual group from a match: @@ -1079,7 +1079,7 @@ Match objects support the following methods and attributes: .. versionadded:: 3.6 -.. method:: match.groups(default=None) +.. method:: Match.groups(default=None) Return a tuple containing all the subgroups of the match, from 1 up to however many groups are in the pattern. The *default* argument is used for groups that @@ -1102,7 +1102,7 @@ Match objects support the following methods and attributes: ('24', '0') -.. method:: match.groupdict(default=None) +.. method:: Match.groupdict(default=None) Return a dictionary containing all the *named* subgroups of the match, keyed by the subgroup name. The *default* argument is used for groups that did not @@ -1113,8 +1113,8 @@ Match objects support the following methods and attributes: {'first_name': 'Malcolm', 'last_name': 'Reynolds'} -.. method:: match.start([group]) - match.end([group]) +.. method:: Match.start([group]) + Match.end([group]) Return the indices of the start and end of the substring matched by *group*; *group* defaults to zero (meaning the whole matched substring). Return ``-1`` if @@ -1137,28 +1137,28 @@ Match objects support the following methods and attributes: 'tony@tiger.net' -.. method:: match.span([group]) +.. method:: Match.span([group]) For a match *m*, return the 2-tuple ``(m.start(group), m.end(group))``. Note that if *group* did not contribute to the match, this is ``(-1, -1)``. *group* defaults to zero, the entire match. -.. attribute:: match.pos +.. attribute:: Match.pos - The value of *pos* which was passed to the :meth:`~regex.search` or - :meth:`~regex.match` method of a :ref:`regex object `. This is + The value of *pos* which was passed to the :meth:`~Pattern.search` or + :meth:`~Pattern.match` method of a :ref:`regex object `. This is the index into the string at which the RE engine started looking for a match. -.. attribute:: match.endpos +.. attribute:: Match.endpos - The value of *endpos* which was passed to the :meth:`~regex.search` or - :meth:`~regex.match` method of a :ref:`regex object `. This is + The value of *endpos* which was passed to the :meth:`~Pattern.search` or + :meth:`~Pattern.match` method of a :ref:`regex object `. This is the index into the string beyond which the RE engine will not go. -.. attribute:: match.lastindex +.. attribute:: Match.lastindex The integer index of the last matched capturing group, or ``None`` if no group was matched at all. For example, the expressions ``(a)b``, ``((a)(b))``, and @@ -1167,21 +1167,21 @@ Match objects support the following methods and attributes: string. -.. attribute:: match.lastgroup +.. attribute:: Match.lastgroup The name of the last matched capturing group, or ``None`` if the group didn't have a name, or if no group was matched at all. -.. attribute:: match.re +.. attribute:: Match.re - The regular expression object whose :meth:`~regex.match` or - :meth:`~regex.search` method produced this match instance. + The regular expression object whose :meth:`~Pattern.match` or + :meth:`~Pattern.search` method produced this match instance. -.. attribute:: match.string +.. attribute:: Match.string - The string passed to :meth:`~regex.match` or :meth:`~regex.search`. + The string passed to :meth:`~Pattern.match` or :meth:`~Pattern.search`. .. versionchanged:: 3.7 @@ -1234,7 +1234,7 @@ To match this with a regular expression, one could use backreferences as such: "" To find out what card the pair consists of, one could use the -:meth:`~match.group` method of the match object in the following manner: +:meth:`~Match.group` method of the match object in the following manner: .. doctest:: @@ -1314,7 +1314,7 @@ For example:: >>> re.match("c", "abcdef") # No match >>> re.search("c", "abcdef") # Match - <_sre.SRE_Match object; span=(2, 3), match='c'> + Regular expressions beginning with ``'^'`` can be used with :func:`search` to restrict the match at the beginning of the string:: @@ -1322,7 +1322,7 @@ restrict the match at the beginning of the string:: >>> re.match("c", "abcdef") # No match >>> re.search("^c", "abcdef") # No match >>> re.search("^a", "abcdef") # Match - <_sre.SRE_Match object; span=(0, 1), match='a'> + Note however that in :const:`MULTILINE` mode :func:`match` only matches at the beginning of the string, whereas using :func:`search` with a regular expression @@ -1330,7 +1330,7 @@ beginning with ``'^'`` will match at the beginning of each line. >>> re.match('X', 'A\nB\nX', re.MULTILINE) # No match >>> re.search('^X', 'A\nB\nX', re.MULTILINE) # Match - <_sre.SRE_Match object; span=(4, 5), match='X'> + Making a Phonebook @@ -1449,9 +1449,9 @@ another one to escape it. For example, the two following lines of code are functionally identical: >>> re.match(r"\W(.)\1\W", " ff ") - <_sre.SRE_Match object; span=(0, 4), match=' ff '> + >>> re.match("\\W(.)\\1\\W", " ff ") - <_sre.SRE_Match object; span=(0, 4), match=' ff '> + When one wants to match a literal backslash, it must be escaped in the regular expression. With raw string notation, this means ``r"\\"``. Without raw string @@ -1459,9 +1459,9 @@ notation, one must use ``"\\\\"``, making the following lines of code functionally identical: >>> re.match(r"\\", r"\\") - <_sre.SRE_Match object; span=(0, 1), match='\\'> + >>> re.match("\\\\", r"\\") - <_sre.SRE_Match object; span=(0, 1), match='\\'> + Writing a Tokenizer diff --git a/Lib/idlelib/idle_test/test_calltips.py b/Lib/idlelib/idle_test/test_calltips.py index fa92ece..a58229d 100644 --- a/Lib/idlelib/idle_test/test_calltips.py +++ b/Lib/idlelib/idle_test/test_calltips.py @@ -74,7 +74,7 @@ class Get_signatureTest(unittest.TestCase): non-overlapping occurrences of the pattern in string by the replacement repl. repl can be either a string or a callable; if a string, backslash escapes in it are processed. If it is -a callable, it's passed the match object and must return''') +a callable, it's passed the Match object and must return''') gtest(p.sub, '''(repl, string, count=0)\nReturn the string obtained by replacing the leftmost non-overlapping occurrences o...''') def test_signature_wrap(self): diff --git a/Lib/re.py b/Lib/re.py index c194dba..d772979 100644 --- a/Lib/re.py +++ b/Lib/re.py @@ -92,8 +92,8 @@ This module exports the following functions: subn Same as sub, but also return the number of substitutions made. split Split a string by the occurrences of a pattern. findall Find all occurrences of a pattern in a string. - finditer Return an iterator yielding a match object for each match. - compile Compile a pattern into a RegexObject. + finditer Return an iterator yielding a Match object for each match. + compile Compile a pattern into a Pattern object. purge Clear the regular expression cache. escape Backslash all non-alphanumerics in a string. @@ -139,7 +139,7 @@ except ImportError: __all__ = [ "match", "fullmatch", "search", "sub", "subn", "split", "findall", "finditer", "compile", "purge", "template", "escape", - "error", "A", "I", "L", "M", "S", "X", "U", + "error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U", "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", "UNICODE", ] @@ -175,17 +175,17 @@ error = sre_compile.error def match(pattern, string, flags=0): """Try to apply the pattern at the start of the string, returning - a match object, or None if no match was found.""" + a Match object, or None if no match was found.""" return _compile(pattern, flags).match(string) def fullmatch(pattern, string, flags=0): """Try to apply the pattern to all of the string, returning - a match object, or None if no match was found.""" + a Match object, or None if no match was found.""" return _compile(pattern, flags).fullmatch(string) def search(pattern, string, flags=0): """Scan through string looking for a match to the pattern, returning - a match object, or None if no match was found.""" + a Match object, or None if no match was found.""" return _compile(pattern, flags).search(string) def sub(pattern, repl, string, count=0, flags=0): @@ -193,7 +193,7 @@ def sub(pattern, repl, string, count=0, flags=0): non-overlapping occurrences of the pattern in string by the replacement repl. repl can be either a string or a callable; if a string, backslash escapes in it are processed. If it is - a callable, it's passed the match object and must return + a callable, it's passed the Match object and must return a replacement string to be used.""" return _compile(pattern, flags).sub(repl, string, count) @@ -204,7 +204,7 @@ def subn(pattern, repl, string, count=0, flags=0): string by the replacement repl. number is the number of substitutions that were made. repl can be either a string or a callable; if a string, backslash escapes in it are processed. - If it is a callable, it's passed the match object and must + If it is a callable, it's passed the Match object and must return a replacement string to be used.""" return _compile(pattern, flags).subn(repl, string, count) @@ -230,13 +230,13 @@ def findall(pattern, string, flags=0): def finditer(pattern, string, flags=0): """Return an iterator over all non-overlapping matches in the - string. For each match, the iterator returns a match object. + string. For each match, the iterator returns a Match object. Empty matches are included in the result.""" return _compile(pattern, flags).finditer(string) def compile(pattern, flags=0): - "Compile a regular expression pattern, returning a pattern object." + "Compile a regular expression pattern, returning a Pattern object." return _compile(pattern, flags) def purge(): @@ -245,7 +245,7 @@ def purge(): _compile_repl.cache_clear() def template(pattern, flags=0): - "Compile a template pattern, returning a pattern object" + "Compile a template pattern, returning a Pattern object" return _compile(pattern, flags|T) # SPECIAL_CHARS @@ -264,13 +264,14 @@ def escape(pattern): pattern = str(pattern, 'latin1') return pattern.translate(_special_chars_map).encode('latin1') +Pattern = type(sre_compile.compile('', 0)) +Match = type(sre_compile.compile('', 0).match('')) + # -------------------------------------------------------------------- # internals _cache = OrderedDict() -_pattern_type = type(sre_compile.compile("", 0)) - _MAXCACHE = 512 def _compile(pattern, flags): # internal: compile pattern @@ -278,7 +279,7 @@ def _compile(pattern, flags): return _cache[type(pattern), pattern, flags] except KeyError: pass - if isinstance(pattern, _pattern_type): + if isinstance(pattern, Pattern): if flags: raise ValueError( "cannot process flags argument with a compiled pattern") @@ -301,12 +302,12 @@ def _compile_repl(repl, pattern): return sre_parse.parse_template(repl, pattern) def _expand(pattern, match, template): - # internal: match.expand implementation hook + # internal: Match.expand implementation hook template = sre_parse.parse_template(template, pattern) return sre_parse.expand_template(template, match) def _subx(pattern, template): - # internal: pattern.sub/subn implementation helper + # internal: Pattern.sub/subn implementation helper template = _compile_repl(template, pattern) if not template[0] and len(template[1]) == 1: # literal replacement @@ -322,7 +323,7 @@ import copyreg def _pickle(p): return _compile, (p.pattern, p.flags) -copyreg.pickle(_pattern_type, _pickle, _compile) +copyreg.pickle(Pattern, _pickle, _compile) # -------------------------------------------------------------------- # experimental stuff (see python-dev discussions for details) diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py index 0261e9e..1daa7bd 100644 --- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -32,6 +32,8 @@ class error(Exception): colno: The column corresponding to pos (may be None) """ + __module__ = 're' + def __init__(self, msg, pattern=None, pos=None): self.msg = msg self.pattern = pattern diff --git a/Lib/telnetlib.py b/Lib/telnetlib.py index b0863b1..b9d45b4 100644 --- a/Lib/telnetlib.py +++ b/Lib/telnetlib.py @@ -585,12 +585,12 @@ class Telnet: """Read until one from a list of a regular expressions matches. The first argument is a list of regular expressions, either - compiled (re.RegexObject instances) or uncompiled (strings). + compiled (re.Pattern instances) or uncompiled (strings). The optional second argument is a timeout, in seconds; default is no timeout. Return a tuple of three items: the index in the list of the - first regular expression that matches; the match object + first regular expression that matches; the re.Match object returned; and the text read up till and including the match. If EOF is read and no text was read, raise EOFError. diff --git a/Lib/test/test_optparse.py b/Lib/test/test_optparse.py index 91a0319..437fdd2 100644 --- a/Lib/test/test_optparse.py +++ b/Lib/test/test_optparse.py @@ -24,8 +24,6 @@ from optparse import make_option, Option, \ from optparse import _match_abbrev from optparse import _parse_num -retype = type(re.compile('')) - class InterceptedError(Exception): def __init__(self, error_message=None, @@ -107,7 +105,7 @@ Args were %(args)s.""" % locals ()) func(*args, **kwargs) except expected_exception as err: actual_message = str(err) - if isinstance(expected_message, retype): + if isinstance(expected_message, re.Pattern): self.assertTrue(expected_message.search(actual_message), """\ expected exception message pattern: diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index e9c07a0..9cb426a 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1596,9 +1596,9 @@ class ReTests(unittest.TestCase): def test_compile(self): # Test return value when given string and pattern as parameter pattern = re.compile('random pattern') - self.assertIsInstance(pattern, re._pattern_type) + self.assertIsInstance(pattern, re.Pattern) same_pattern = re.compile(pattern) - self.assertIsInstance(same_pattern, re._pattern_type) + self.assertIsInstance(same_pattern, re.Pattern) self.assertIs(same_pattern, pattern) # Test behaviour when not given a string or pattern as parameter self.assertRaises(TypeError, re.compile, 0) diff --git a/Lib/unittest/case.py b/Lib/unittest/case.py index f19afef..c48a63c 100644 --- a/Lib/unittest/case.py +++ b/Lib/unittest/case.py @@ -1273,7 +1273,7 @@ class TestCase(object): Args: expected_exception: Exception class expected to be raised. - expected_regex: Regex (re pattern object or string) expected + expected_regex: Regex (re.Pattern object or string) expected to be found in error message. args: Function to be called and extra positional args. kwargs: Extra kwargs. @@ -1292,7 +1292,7 @@ class TestCase(object): Args: expected_warning: Warning class expected to be triggered. - expected_regex: Regex (re pattern object or string) expected + expected_regex: Regex (re.Pattern object or string) expected to be found in error message. args: Function to be called and extra positional args. kwargs: Extra kwargs. diff --git a/Misc/NEWS.d/next/Library/2017-10-03-22-45-50.bpo-30397.e4F7Kr.rst b/Misc/NEWS.d/next/Library/2017-10-03-22-45-50.bpo-30397.e4F7Kr.rst new file mode 100644 index 0000000..2f1f762 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-10-03-22-45-50.bpo-30397.e4F7Kr.rst @@ -0,0 +1,3 @@ +The types of compiled regular objects and match objects are now exposed as +`re.Pattern` and `re.Match`. This adds information in pydoc output for the +re module. diff --git a/Modules/_sre.c b/Modules/_sre.c index 6873f1d..c42ab26 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -630,13 +630,13 @@ _sre.SRE_Pattern.fullmatch pos: Py_ssize_t = 0 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize -Matches against all of the string +Matches against all of the string. [clinic start generated code]*/ static PyObject * _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_t endpos) -/*[clinic end generated code: output=5833c47782a35f4a input=a6f640614aaefceb]*/ +/*[clinic end generated code: output=5833c47782a35f4a input=d9fb03a7625b5828]*/ { SRE_STATE state; Py_ssize_t status; @@ -1341,7 +1341,7 @@ done: return result; } -PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects"); +PyDoc_STRVAR(pattern_doc, "Compiled regular expression object."); /* PatternObject's 'groupindex' method. */ static PyObject * @@ -2221,12 +2221,12 @@ _sre.SRE_Match.span group: object(c_default="NULL") = 0 / -For MatchObject m, return the 2-tuple (m.start(group), m.end(group)). +For match object m, return the 2-tuple (m.start(group), m.end(group)). [clinic start generated code]*/ static PyObject * _sre_SRE_Match_span_impl(MatchObject *self, PyObject *group) -/*[clinic end generated code: output=f02ae40594d14fe6 input=49092b6008d176d3]*/ +/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/ { Py_ssize_t index = match_getindex(self, group); @@ -2625,15 +2625,18 @@ static PyGetSetDef pattern_getset[] = { #define PAT_OFF(x) offsetof(PatternObject, x) static PyMemberDef pattern_members[] = { - {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY}, - {"flags", T_INT, PAT_OFF(flags), READONLY}, - {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY}, + {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY, + "The pattern string from which the RE object was compiled."}, + {"flags", T_INT, PAT_OFF(flags), READONLY, + "The regex matching flags."}, + {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY, + "The number of capturing groups in the pattern."}, {NULL} /* Sentinel */ }; static PyTypeObject Pattern_Type = { PyVarObject_HEAD_INIT(NULL, 0) - "_" SRE_MODULE ".SRE_Pattern", + "re.Pattern", sizeof(PatternObject), sizeof(SRE_CODE), (destructor)pattern_dealloc, /* tp_dealloc */ 0, /* tp_print */ @@ -2685,18 +2688,24 @@ static PyMethodDef match_methods[] = { }; static PyGetSetDef match_getset[] = { - {"lastindex", (getter)match_lastindex_get, (setter)NULL}, - {"lastgroup", (getter)match_lastgroup_get, (setter)NULL}, + {"lastindex", (getter)match_lastindex_get, (setter)NULL, + "The integer index of the last matched capturing group."}, + {"lastgroup", (getter)match_lastgroup_get, (setter)NULL, + "The name of the last matched capturing group."}, {"regs", (getter)match_regs_get, (setter)NULL}, {NULL} }; #define MATCH_OFF(x) offsetof(MatchObject, x) static PyMemberDef match_members[] = { - {"string", T_OBJECT, MATCH_OFF(string), READONLY}, - {"re", T_OBJECT, MATCH_OFF(pattern), READONLY}, - {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY}, - {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY}, + {"string", T_OBJECT, MATCH_OFF(string), READONLY, + "The string passed to match() or search()."}, + {"re", T_OBJECT, MATCH_OFF(pattern), READONLY, + "The regular expression object."}, + {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY, + "The index into the string at which the RE engine started looking for a match."}, + {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY, + "The index into the string beyond which the RE engine will not go."}, {NULL} }; @@ -2705,7 +2714,7 @@ static PyMemberDef match_members[] = { static PyTypeObject Match_Type = { PyVarObject_HEAD_INIT(NULL,0) - "_" SRE_MODULE ".SRE_Match", + "re.Match", sizeof(MatchObject), sizeof(Py_ssize_t), (destructor)match_dealloc, /* tp_dealloc */ 0, /* tp_print */ diff --git a/Modules/clinic/_sre.c.h b/Modules/clinic/_sre.c.h index 8ed288e..6eea0c8 100644 --- a/Modules/clinic/_sre.c.h +++ b/Modules/clinic/_sre.c.h @@ -190,7 +190,7 @@ PyDoc_STRVAR(_sre_SRE_Pattern_fullmatch__doc__, "fullmatch($self, /, string, pos=0, endpos=sys.maxsize)\n" "--\n" "\n" -"Matches against all of the string"); +"Matches against all of the string."); #define _SRE_SRE_PATTERN_FULLMATCH_METHODDEF \ {"fullmatch", (PyCFunction)_sre_SRE_Pattern_fullmatch, METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_fullmatch__doc__}, @@ -682,7 +682,7 @@ PyDoc_STRVAR(_sre_SRE_Match_span__doc__, "span($self, group=0, /)\n" "--\n" "\n" -"For MatchObject m, return the 2-tuple (m.start(group), m.end(group))."); +"For match object m, return the 2-tuple (m.start(group), m.end(group))."); #define _SRE_SRE_MATCH_SPAN_METHODDEF \ {"span", (PyCFunction)_sre_SRE_Match_span, METH_FASTCALL, _sre_SRE_Match_span__doc__}, @@ -765,4 +765,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored)) { return _sre_SRE_Scanner_search_impl(self); } -/*[clinic end generated code: output=6e3fb17fef1be436 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=1e6a1be31302df09 input=a9049054013a1b77]*/ -- cgit v0.12