diff options
author | Georg Brandl <georg@python.org> | 2014-10-29 07:07:50 (GMT) |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2014-10-29 07:07:50 (GMT) |
commit | d51c1a195322e24e86a3c10eaf444a97dbc35b4a (patch) | |
tree | 0c04ce2b6e42305626ccf260072eb4e6391d125e /Doc/tools/extensions | |
parent | fa7a2953cc2260b6f2ffe94955fcddfd83d3595d (diff) | |
parent | e03902910c70900954572d72997f339a3c1f1154 (diff) | |
download | cpython-d51c1a195322e24e86a3c10eaf444a97dbc35b4a.zip cpython-d51c1a195322e24e86a3c10eaf444a97dbc35b4a.tar.gz cpython-d51c1a195322e24e86a3c10eaf444a97dbc35b4a.tar.bz2 |
merge with 3.4
Diffstat (limited to 'Doc/tools/extensions')
-rw-r--r-- | Doc/tools/extensions/c_annotations.py | 121 | ||||
-rw-r--r-- | Doc/tools/extensions/patchlevel.py | 69 | ||||
-rw-r--r-- | Doc/tools/extensions/pyspecific.py | 348 | ||||
-rw-r--r-- | Doc/tools/extensions/suspicious.py | 274 |
4 files changed, 812 insertions, 0 deletions
diff --git a/Doc/tools/extensions/c_annotations.py b/Doc/tools/extensions/c_annotations.py new file mode 100644 index 0000000..baa39f3 --- /dev/null +++ b/Doc/tools/extensions/c_annotations.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +""" + c_annotations.py + ~~~~~~~~~~~~~~~~ + + Supports annotations for C API elements: + + * reference count annotations for C API functions. Based on + refcount.py and anno-api.py in the old Python documentation tools. + + * stable API annotations + + Usage: Set the `refcount_file` config value to the path to the reference + count data file. + + :copyright: Copyright 2007-2014 by Georg Brandl. + :license: Python license. +""" + +from os import path +from docutils import nodes +from docutils.parsers.rst import directives + +from sphinx import addnodes +from sphinx.domains.c import CObject + + +class RCEntry: + def __init__(self, name): + self.name = name + self.args = [] + self.result_type = '' + self.result_refs = None + + +class Annotations(dict): + @classmethod + def fromfile(cls, filename): + d = cls() + fp = open(filename, 'r') + try: + for line in fp: + line = line.strip() + if line[:1] in ("", "#"): + # blank lines and comments + continue + parts = line.split(":", 4) + if len(parts) != 5: + raise ValueError("Wrong field count in %r" % line) + function, type, arg, refcount, comment = parts + # Get the entry, creating it if needed: + try: + entry = d[function] + except KeyError: + entry = d[function] = RCEntry(function) + if not refcount or refcount == "null": + refcount = None + else: + refcount = int(refcount) + # Update the entry with the new parameter or the result + # information. + if arg: + entry.args.append((arg, type, refcount)) + else: + entry.result_type = type + entry.result_refs = refcount + finally: + fp.close() + return d + + def add_annotations(self, app, doctree): + for node in doctree.traverse(addnodes.desc_content): + par = node.parent + if par['domain'] != 'c': + continue + if par['stableabi']: + node.insert(0, nodes.emphasis(' Part of the stable ABI.', + ' Part of the stable ABI.', + classes=['stableabi'])) + if par['objtype'] != 'function': + continue + if not par[0].has_key('names') or not par[0]['names']: + continue + name = par[0]['names'][0] + if name.startswith("c."): + name = name[2:] + entry = self.get(name) + if not entry: + continue + elif entry.result_type not in ("PyObject*", "PyVarObject*"): + continue + if entry.result_refs is None: + rc = 'Return value: Always NULL.' + elif entry.result_refs: + rc = 'Return value: New reference.' + else: + rc = 'Return value: Borrowed reference.' + node.insert(0, nodes.emphasis(rc, rc, classes=['refcount'])) + + +def init_annotations(app): + refcounts = Annotations.fromfile( + path.join(app.srcdir, app.config.refcount_file)) + app.connect('doctree-read', refcounts.add_annotations) + + +def setup(app): + app.add_config_value('refcount_file', '', True) + app.connect('builder-inited', init_annotations) + + # monkey-patch C object... + CObject.option_spec = { + 'noindex': directives.flag, + 'stableabi': directives.flag, + } + old_handle_signature = CObject.handle_signature + def new_handle_signature(self, sig, signode): + signode.parent['stableabi'] = 'stableabi' in self.options + return old_handle_signature(self, sig, signode) + CObject.handle_signature = new_handle_signature + return {'version': '1.0', 'parallel_read_safe': True} diff --git a/Doc/tools/extensions/patchlevel.py b/Doc/tools/extensions/patchlevel.py new file mode 100644 index 0000000..bca2eb8 --- /dev/null +++ b/Doc/tools/extensions/patchlevel.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +""" + patchlevel.py + ~~~~~~~~~~~~~ + + Extract version info from Include/patchlevel.h. + Adapted from Doc/tools/getversioninfo. + + :copyright: 2007-2008 by Georg Brandl. + :license: Python license. +""" + +import os +import re +import sys + +def get_header_version_info(srcdir): + patchlevel_h = os.path.join(srcdir, '..', 'Include', 'patchlevel.h') + + # This won't pick out all #defines, but it will pick up the ones we + # care about. + rx = re.compile(r'\s*#define\s+([a-zA-Z][a-zA-Z_0-9]*)\s+([a-zA-Z_0-9]+)') + + d = {} + f = open(patchlevel_h) + try: + for line in f: + m = rx.match(line) + if m is not None: + name, value = m.group(1, 2) + d[name] = value + finally: + f.close() + + release = version = '%s.%s' % (d['PY_MAJOR_VERSION'], d['PY_MINOR_VERSION']) + micro = int(d['PY_MICRO_VERSION']) + release += '.' + str(micro) + + level = d['PY_RELEASE_LEVEL'] + suffixes = { + 'PY_RELEASE_LEVEL_ALPHA': 'a', + 'PY_RELEASE_LEVEL_BETA': 'b', + 'PY_RELEASE_LEVEL_GAMMA': 'rc', + } + if level != 'PY_RELEASE_LEVEL_FINAL': + release += suffixes[level] + str(int(d['PY_RELEASE_SERIAL'])) + return version, release + + +def get_sys_version_info(): + major, minor, micro, level, serial = sys.version_info + release = version = '%s.%s' % (major, minor) + release += '.%s' % micro + if level != 'final': + release += '%s%s' % (level[0], serial) + return version, release + + +def get_version_info(): + try: + return get_header_version_info('.') + except (IOError, OSError): + version, release = get_sys_version_info() + print >>sys.stderr, 'Can\'t get version info from Include/patchlevel.h, ' \ + 'using version of this interpreter (%s).' % release + return version, release + +if __name__ == '__main__': + print(get_header_version_info('.')[1]) diff --git a/Doc/tools/extensions/pyspecific.py b/Doc/tools/extensions/pyspecific.py new file mode 100644 index 0000000..76f6a9f --- /dev/null +++ b/Doc/tools/extensions/pyspecific.py @@ -0,0 +1,348 @@ +# -*- coding: utf-8 -*- +""" + pyspecific.py + ~~~~~~~~~~~~~ + + Sphinx extension with Python doc-specific markup. + + :copyright: 2008-2014 by Georg Brandl. + :license: Python license. +""" + +ISSUE_URI = 'https://bugs.python.org/issue%s' +SOURCE_URI = 'https://hg.python.org/cpython/file/default/%s' + +from docutils import nodes, utils + +from sphinx.util.nodes import split_explicit_title +from sphinx.util.compat import Directive +from sphinx.writers.html import HTMLTranslator +from sphinx.writers.latex import LaTeXTranslator + +# monkey-patch reST parser to disable alphabetic and roman enumerated lists +from docutils.parsers.rst.states import Body +Body.enum.converters['loweralpha'] = \ + Body.enum.converters['upperalpha'] = \ + Body.enum.converters['lowerroman'] = \ + Body.enum.converters['upperroman'] = lambda x: None + +# monkey-patch HTML and LaTeX translators to keep doctest blocks in the +# doctest docs themselves +orig_visit_literal_block = HTMLTranslator.visit_literal_block +def new_visit_literal_block(self, node): + meta = self.builder.env.metadata[self.builder.current_docname] + old_trim_doctest_flags = self.highlighter.trim_doctest_flags + if 'keepdoctest' in meta: + self.highlighter.trim_doctest_flags = False + try: + orig_visit_literal_block(self, node) + finally: + self.highlighter.trim_doctest_flags = old_trim_doctest_flags + +HTMLTranslator.visit_literal_block = new_visit_literal_block + +orig_depart_literal_block = LaTeXTranslator.depart_literal_block +def new_depart_literal_block(self, node): + meta = self.builder.env.metadata[self.curfilestack[-1]] + old_trim_doctest_flags = self.highlighter.trim_doctest_flags + if 'keepdoctest' in meta: + self.highlighter.trim_doctest_flags = False + try: + orig_depart_literal_block(self, node) + finally: + self.highlighter.trim_doctest_flags = old_trim_doctest_flags + +LaTeXTranslator.depart_literal_block = new_depart_literal_block + +# Support for marking up and linking to bugs.python.org issues + +def issue_role(typ, rawtext, text, lineno, inliner, options={}, content=[]): + issue = utils.unescape(text) + text = 'issue ' + issue + refnode = nodes.reference(text, text, refuri=ISSUE_URI % issue) + return [refnode], [] + + +# Support for linking to Python source files easily + +def source_role(typ, rawtext, text, lineno, inliner, options={}, content=[]): + has_t, title, target = split_explicit_title(text) + title = utils.unescape(title) + target = utils.unescape(target) + refnode = nodes.reference(title, title, refuri=SOURCE_URI % target) + return [refnode], [] + + +# Support for marking up implementation details + +class ImplementationDetail(Directive): + + has_content = True + required_arguments = 0 + optional_arguments = 1 + final_argument_whitespace = True + + def run(self): + pnode = nodes.compound(classes=['impl-detail']) + content = self.content + add_text = nodes.strong('CPython implementation detail:', + 'CPython implementation detail:') + if self.arguments: + n, m = self.state.inline_text(self.arguments[0], self.lineno) + pnode.append(nodes.paragraph('', '', *(n + m))) + self.state.nested_parse(content, self.content_offset, pnode) + if pnode.children and isinstance(pnode[0], nodes.paragraph): + pnode[0].insert(0, add_text) + pnode[0].insert(1, nodes.Text(' ')) + else: + pnode.insert(0, nodes.paragraph('', '', add_text)) + return [pnode] + + +# Support for documenting decorators + +from sphinx import addnodes +from sphinx.domains.python import PyModulelevel, PyClassmember + +class PyDecoratorMixin(object): + def handle_signature(self, sig, signode): + ret = super(PyDecoratorMixin, self).handle_signature(sig, signode) + signode.insert(0, addnodes.desc_addname('@', '@')) + return ret + + def needs_arglist(self): + return False + +class PyDecoratorFunction(PyDecoratorMixin, PyModulelevel): + def run(self): + # a decorator function is a function after all + self.name = 'py:function' + return PyModulelevel.run(self) + +class PyDecoratorMethod(PyDecoratorMixin, PyClassmember): + def run(self): + self.name = 'py:method' + return PyClassmember.run(self) + + +# Support for documenting version of removal in deprecations + +class DeprecatedRemoved(Directive): + has_content = True + required_arguments = 2 + optional_arguments = 1 + final_argument_whitespace = True + option_spec = {} + + _label = 'Deprecated since version %s, will be removed in version %s' + + def run(self): + node = addnodes.versionmodified() + node.document = self.state.document + node['type'] = 'deprecated-removed' + version = (self.arguments[0], self.arguments[1]) + node['version'] = version + text = self._label % version + if len(self.arguments) == 3: + inodes, messages = self.state.inline_text(self.arguments[2], + self.lineno+1) + para = nodes.paragraph(self.arguments[2], '', *inodes) + node.append(para) + else: + messages = [] + if self.content: + self.state.nested_parse(self.content, self.content_offset, node) + if isinstance(node[0], nodes.paragraph) and node[0].rawsource: + content = nodes.inline(node[0].rawsource, translatable=True) + content.source = node[0].source + content.line = node[0].line + content += node[0].children + node[0].replace_self(nodes.paragraph('', '', content)) + node[0].insert(0, nodes.inline('', '%s: ' % text, + classes=['versionmodified'])) + else: + para = nodes.paragraph('', '', + nodes.inline('', '%s.' % text, classes=['versionmodified'])) + if len(node): + node.insert(0, para) + else: + node.append(para) + env = self.state.document.settings.env + env.note_versionchange('deprecated', version[0], node, self.lineno) + return [node] + messages + + +# Support for including Misc/NEWS + +import re +import codecs + +issue_re = re.compile('([Ii])ssue #([0-9]+)') +whatsnew_re = re.compile(r"(?im)^what's new in (.*?)\??$") + +class MiscNews(Directive): + has_content = False + required_arguments = 1 + optional_arguments = 0 + final_argument_whitespace = False + option_spec = {} + + def run(self): + fname = self.arguments[0] + source = self.state_machine.input_lines.source( + self.lineno - self.state_machine.input_offset - 1) + source_dir = path.dirname(path.abspath(source)) + fpath = path.join(source_dir, fname) + self.state.document.settings.record_dependencies.add(fpath) + try: + fp = codecs.open(fpath, encoding='utf-8') + try: + content = fp.read() + finally: + fp.close() + except Exception: + text = 'The NEWS file is not available.' + node = nodes.strong(text, text) + return [node] + content = issue_re.sub(r'`\1ssue #\2 <https://bugs.python.org/\2>`__', + content) + content = whatsnew_re.sub(r'\1', content) + # remove first 3 lines as they are the main heading + lines = ['.. default-role:: obj', ''] + content.splitlines()[3:] + self.state_machine.insert_input(lines, fname) + return [] + + +# Support for building "topic help" for pydoc + +pydoc_topic_labels = [ + 'assert', 'assignment', 'atom-identifiers', 'atom-literals', + 'attribute-access', 'attribute-references', 'augassign', 'binary', + 'bitwise', 'bltin-code-objects', 'bltin-ellipsis-object', + 'bltin-null-object', 'bltin-type-objects', 'booleans', + 'break', 'callable-types', 'calls', 'class', 'comparisons', 'compound', + 'context-managers', 'continue', 'conversions', 'customization', 'debugger', + 'del', 'dict', 'dynamic-features', 'else', 'exceptions', 'execmodel', + 'exprlists', 'floating', 'for', 'formatstrings', 'function', 'global', + 'id-classes', 'identifiers', 'if', 'imaginary', 'import', 'in', 'integers', + 'lambda', 'lists', 'naming', 'nonlocal', 'numbers', 'numeric-types', + 'objects', 'operator-summary', 'pass', 'power', 'raise', 'return', + 'sequence-types', 'shifting', 'slicings', 'specialattrs', 'specialnames', + 'string-methods', 'strings', 'subscriptions', 'truth', 'try', 'types', + 'typesfunctions', 'typesmapping', 'typesmethods', 'typesmodules', + 'typesseq', 'typesseq-mutable', 'unary', 'while', 'with', 'yield' +] + +from os import path +from time import asctime +from pprint import pformat +from docutils.io import StringOutput +from docutils.utils import new_document + +from sphinx.builders import Builder +from sphinx.writers.text import TextWriter + + +class PydocTopicsBuilder(Builder): + name = 'pydoc-topics' + + def init(self): + self.topics = {} + + def get_outdated_docs(self): + return 'all pydoc topics' + + def get_target_uri(self, docname, typ=None): + return '' # no URIs + + def write(self, *ignored): + writer = TextWriter(self) + for label in self.status_iterator(pydoc_topic_labels, + 'building topics... ', + length=len(pydoc_topic_labels)): + if label not in self.env.domaindata['std']['labels']: + self.warn('label %r not in documentation' % label) + continue + docname, labelid, sectname = self.env.domaindata['std']['labels'][label] + doctree = self.env.get_and_resolve_doctree(docname, self) + document = new_document('<section node>') + document.append(doctree.ids[labelid]) + destination = StringOutput(encoding='utf-8') + writer.write(document, destination) + self.topics[label] = writer.output + + def finish(self): + f = open(path.join(self.outdir, 'topics.py'), 'wb') + try: + f.write('# -*- coding: utf-8 -*-\n'.encode('utf-8')) + f.write(('# Autogenerated by Sphinx on %s\n' % asctime()).encode('utf-8')) + f.write(('topics = ' + pformat(self.topics) + '\n').encode('utf-8')) + finally: + f.close() + + +# Support for checking for suspicious markup + +import suspicious + + +# Support for documenting Opcodes + +import re + +opcode_sig_re = re.compile(r'(\w+(?:\+\d)?)(?:\s*\((.*)\))?') + +def parse_opcode_signature(env, sig, signode): + """Transform an opcode signature into RST nodes.""" + m = opcode_sig_re.match(sig) + if m is None: + raise ValueError + opname, arglist = m.groups() + signode += addnodes.desc_name(opname, opname) + if arglist is not None: + paramlist = addnodes.desc_parameterlist() + signode += paramlist + paramlist += addnodes.desc_parameter(arglist, arglist) + return opname.strip() + + +# Support for documenting pdb commands + +pdbcmd_sig_re = re.compile(r'([a-z()!]+)\s*(.*)') + +# later... +#pdbargs_tokens_re = re.compile(r'''[a-zA-Z]+ | # identifiers +# [.,:]+ | # punctuation +# [\[\]()] | # parens +# \s+ # whitespace +# ''', re.X) + +def parse_pdb_command(env, sig, signode): + """Transform a pdb command signature into RST nodes.""" + m = pdbcmd_sig_re.match(sig) + if m is None: + raise ValueError + name, args = m.groups() + fullname = name.replace('(', '').replace(')', '') + signode += addnodes.desc_name(name, name) + if args: + signode += addnodes.desc_addname(' '+args, ' '+args) + return fullname + + +def setup(app): + app.add_role('issue', issue_role) + app.add_role('source', source_role) + app.add_directive('impl-detail', ImplementationDetail) + app.add_directive('deprecated-removed', DeprecatedRemoved) + app.add_builder(PydocTopicsBuilder) + app.add_builder(suspicious.CheckSuspiciousMarkupBuilder) + app.add_description_unit('opcode', 'opcode', '%s (opcode)', + parse_opcode_signature) + app.add_description_unit('pdbcommand', 'pdbcmd', '%s (pdb command)', + parse_pdb_command) + app.add_description_unit('2to3fixer', '2to3fixer', '%s (2to3 fixer)') + app.add_directive_to_domain('py', 'decorator', PyDecoratorFunction) + app.add_directive_to_domain('py', 'decoratormethod', PyDecoratorMethod) + app.add_directive('miscnews', MiscNews) + return {'version': '1.0', 'parallel_read_safe': True} diff --git a/Doc/tools/extensions/suspicious.py b/Doc/tools/extensions/suspicious.py new file mode 100644 index 0000000..d3ed849 --- /dev/null +++ b/Doc/tools/extensions/suspicious.py @@ -0,0 +1,274 @@ +""" +Try to detect suspicious constructs, resembling markup +that has leaked into the final output. + +Suspicious lines are reported in a comma-separated-file, +``suspicious.csv``, located in the output directory. + +The file is utf-8 encoded, and each line contains four fields: + + * document name (normalized) + * line number in the source document + * problematic text + * complete line showing the problematic text in context + +It is common to find many false positives. To avoid reporting them +again and again, they may be added to the ``ignored.csv`` file +(located in the configuration directory). The file has the same +format as ``suspicious.csv`` with a few differences: + + - each line defines a rule; if the rule matches, the issue + is ignored. + - line number may be empty (that is, nothing between the + commas: ",,"). In this case, line numbers are ignored (the + rule matches anywhere in the file). + - the last field does not have to be a complete line; some + surrounding text (never more than a line) is enough for + context. + +Rules are processed sequentially. A rule matches when: + + * document names are the same + * problematic texts are the same + * line numbers are close to each other (5 lines up or down) + * the rule text is completely contained into the source line + +The simplest way to create the ignored.csv file is by copying +undesired entries from suspicious.csv (possibly trimming the last +field.) + +Copyright 2009 Gabriel A. Genellina + +""" + +import os +import re +import csv +import sys + +from docutils import nodes +from sphinx.builders import Builder + +detect_all = re.compile(r''' + ::(?=[^=])| # two :: (but NOT ::=) + :[a-zA-Z][a-zA-Z0-9]+| # :foo + `| # ` (seldom used by itself) + (?<!\.)\.\.[ \t]*\w+: # .. foo: (but NOT ... else:) + ''', re.UNICODE | re.VERBOSE).finditer + +py3 = sys.version_info >= (3, 0) + + +class Rule: + def __init__(self, docname, lineno, issue, line): + """A rule for ignoring issues""" + self.docname = docname # document to which this rule applies + self.lineno = lineno # line number in the original source; + # this rule matches only near that. + # None -> don't care + self.issue = issue # the markup fragment that triggered this rule + self.line = line # text of the container element (single line only) + self.used = False + + def __repr__(self): + return '{0.docname},,{0.issue},{0.line}'.format(self) + + + +class dialect(csv.excel): + """Our dialect: uses only linefeed as newline.""" + lineterminator = '\n' + + +class CheckSuspiciousMarkupBuilder(Builder): + """ + Checks for possibly invalid markup that may leak into the output. + """ + name = 'suspicious' + + def init(self): + # create output file + self.log_file_name = os.path.join(self.outdir, 'suspicious.csv') + open(self.log_file_name, 'w').close() + # load database of previously ignored issues + self.load_rules(os.path.join(os.path.dirname(__file__), '..', + 'susp-ignored.csv')) + + def get_outdated_docs(self): + return self.env.found_docs + + def get_target_uri(self, docname, typ=None): + return '' + + def prepare_writing(self, docnames): + pass + + def write_doc(self, docname, doctree): + # set when any issue is encountered in this document + self.any_issue = False + self.docname = docname + visitor = SuspiciousVisitor(doctree, self) + doctree.walk(visitor) + + def finish(self): + unused_rules = [rule for rule in self.rules if not rule.used] + if unused_rules: + self.warn('Found %s/%s unused rules:' % + (len(unused_rules), len(self.rules))) + for rule in unused_rules: + self.info(repr(rule)) + return + + def check_issue(self, line, lineno, issue): + if not self.is_ignored(line, lineno, issue): + self.report_issue(line, lineno, issue) + + def is_ignored(self, line, lineno, issue): + """Determine whether this issue should be ignored.""" + docname = self.docname + for rule in self.rules: + if rule.docname != docname: continue + if rule.issue != issue: continue + # Both lines must match *exactly*. This is rather strict, + # and probably should be improved. + # Doing fuzzy matches with levenshtein distance could work, + # but that means bringing other libraries... + # Ok, relax that requirement: just check if the rule fragment + # is contained in the document line + if rule.line not in line: continue + # Check both line numbers. If they're "near" + # this rule matches. (lineno=None means "don't care") + if (rule.lineno is not None) and \ + abs(rule.lineno - lineno) > 5: continue + # if it came this far, the rule matched + rule.used = True + return True + return False + + def report_issue(self, text, lineno, issue): + if not self.any_issue: self.info() + self.any_issue = True + self.write_log_entry(lineno, issue, text) + if py3: + self.warn('[%s:%d] "%s" found in "%-.120s"' % + (self.docname, lineno, issue, text)) + else: + self.warn('[%s:%d] "%s" found in "%-.120s"' % ( + self.docname.encode(sys.getdefaultencoding(),'replace'), + lineno, + issue.encode(sys.getdefaultencoding(),'replace'), + text.strip().encode(sys.getdefaultencoding(),'replace'))) + self.app.statuscode = 1 + + def write_log_entry(self, lineno, issue, text): + if py3: + f = open(self.log_file_name, 'a') + writer = csv.writer(f, dialect) + writer.writerow([self.docname, lineno, issue, text.strip()]) + f.close() + else: + f = open(self.log_file_name, 'ab') + writer = csv.writer(f, dialect) + writer.writerow([self.docname.encode('utf-8'), + lineno, + issue.encode('utf-8'), + text.strip().encode('utf-8')]) + f.close() + + def load_rules(self, filename): + """Load database of previously ignored issues. + + A csv file, with exactly the same format as suspicious.csv + Fields: document name (normalized), line number, issue, surrounding text + """ + self.info("loading ignore rules... ", nonl=1) + self.rules = rules = [] + try: + if py3: + f = open(filename, 'r') + else: + f = open(filename, 'rb') + except IOError: + return + for i, row in enumerate(csv.reader(f)): + if len(row) != 4: + raise ValueError( + "wrong format in %s, line %d: %s" % (filename, i+1, row)) + docname, lineno, issue, text = row + if lineno: + lineno = int(lineno) + else: + lineno = None + if not py3: + docname = docname.decode('utf-8') + issue = issue.decode('utf-8') + text = text.decode('utf-8') + rule = Rule(docname, lineno, issue, text) + rules.append(rule) + f.close() + self.info('done, %d rules loaded' % len(self.rules)) + + +def get_lineno(node): + """Obtain line number information for a node.""" + lineno = None + while lineno is None and node: + node = node.parent + lineno = node.line + return lineno + + +def extract_line(text, index): + """text may be a multiline string; extract + only the line containing the given character index. + + >>> extract_line("abc\ndefgh\ni", 6) + >>> 'defgh' + >>> for i in (0, 2, 3, 4, 10): + ... print extract_line("abc\ndefgh\ni", i) + abc + abc + abc + defgh + defgh + i + """ + p = text.rfind('\n', 0, index) + 1 + q = text.find('\n', index) + if q < 0: + q = len(text) + return text[p:q] + + +class SuspiciousVisitor(nodes.GenericNodeVisitor): + + lastlineno = 0 + + def __init__(self, document, builder): + nodes.GenericNodeVisitor.__init__(self, document) + self.builder = builder + + def default_visit(self, node): + if isinstance(node, (nodes.Text, nodes.image)): # direct text containers + text = node.astext() + # lineno seems to go backwards sometimes (?) + self.lastlineno = lineno = max(get_lineno(node) or 0, self.lastlineno) + seen = set() # don't report the same issue more than only once per line + for match in detect_all(text): + issue = match.group() + line = extract_line(text, match.start()) + if (issue, line) not in seen: + self.builder.check_issue(line, lineno, issue) + seen.add((issue, line)) + + unknown_visit = default_visit + + def visit_document(self, node): + self.lastlineno = 0 + + def visit_comment(self, node): + # ignore comments -- too much false positives. + # (although doing this could miss some errors; + # there were two sections "commented-out" by mistake + # in the Python docs that would not be catched) + raise nodes.SkipNode |