diff options
author | Benjamin Peterson <benjamin@python.org> | 2009-01-09 03:03:23 (GMT) |
---|---|---|
committer | Benjamin Peterson <benjamin@python.org> | 2009-01-09 03:03:23 (GMT) |
commit | 28d88b4813ad1ee17d0603f3e607789916ef6547 (patch) | |
tree | 9750ddc979e28258938b0a993f8c1fc8e39ed83f /Doc/tools/sphinxext | |
parent | 13f7c3b6cad28ee57390c7650e1bf644f0b3943f (diff) | |
download | cpython-28d88b4813ad1ee17d0603f3e607789916ef6547.zip cpython-28d88b4813ad1ee17d0603f3e607789916ef6547.tar.gz cpython-28d88b4813ad1ee17d0603f3e607789916ef6547.tar.bz2 |
Merged revisions 68288-68291,68325-68326,68338,68388,68393,68423 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
................
r68288 | benjamin.peterson | 2009-01-03 18:39:07 -0600 (Sat, 03 Jan 2009) | 1 line
only check the actual compile() call for a SyntaxError
................
r68289 | georg.brandl | 2009-01-04 02:26:10 -0600 (Sun, 04 Jan 2009) | 2 lines
Test commit.
................
r68290 | georg.brandl | 2009-01-04 04:23:49 -0600 (Sun, 04 Jan 2009) | 4 lines
Add "suspicious" builder which finds leftover markup in the HTML files.
Patch by Gabriel Genellina.
................
r68291 | georg.brandl | 2009-01-04 04:24:09 -0600 (Sun, 04 Jan 2009) | 2 lines
Fix two issues found by the suspicious builder.
................
r68325 | benjamin.peterson | 2009-01-04 16:00:18 -0600 (Sun, 04 Jan 2009) | 1 line
use Jinja 2.1.1
................
r68326 | georg.brandl | 2009-01-04 16:03:10 -0600 (Sun, 04 Jan 2009) | 2 lines
Update make.bat.
................
r68338 | neal.norwitz | 2009-01-04 21:57:25 -0600 (Sun, 04 Jan 2009) | 1 line
Make sure to checkout any new packages
................
r68388 | benjamin.peterson | 2009-01-07 21:39:46 -0600 (Wed, 07 Jan 2009) | 1 line
string exceptions are gone
................
r68393 | benjamin.peterson | 2009-01-07 22:01:00 -0600 (Wed, 07 Jan 2009) | 1 line
use new sphinx modules
................
r68423 | benjamin.peterson | 2009-01-08 20:13:34 -0600 (Thu, 08 Jan 2009) | 29 lines
Merged revisions 68306-68308,68340,68368,68422 via svnmerge from
svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3
........
r68306 | benjamin.peterson | 2009-01-04 12:27:19 -0600 (Sun, 04 Jan 2009) | 1 line
fix_urllib: add mappings for the url parsing functions
........
r68307 | benjamin.peterson | 2009-01-04 12:30:01 -0600 (Sun, 04 Jan 2009) | 1 line
remove duplicated function
........
r68308 | benjamin.peterson | 2009-01-04 12:50:34 -0600 (Sun, 04 Jan 2009) | 1 line
turtle is no longer renamed
........
r68340 | georg.brandl | 2009-01-05 02:11:39 -0600 (Mon, 05 Jan 2009) | 2 lines
Fix undefined locals in parse_tokens().
........
r68368 | benjamin.peterson | 2009-01-06 17:56:10 -0600 (Tue, 06 Jan 2009) | 1 line
fix typo (thanks to Robert Lehmann)
........
r68422 | benjamin.peterson | 2009-01-08 20:01:03 -0600 (Thu, 08 Jan 2009) | 1 line
run the imports fixers after fix_import, so fix_import doesn't try to make stdlib renames into relative imports #4876
........
................
Diffstat (limited to 'Doc/tools/sphinxext')
-rw-r--r-- | Doc/tools/sphinxext/pyspecific.py | 14 | ||||
-rw-r--r-- | Doc/tools/sphinxext/susp-ignored.csv | 164 | ||||
-rw-r--r-- | Doc/tools/sphinxext/suspicious.py | 237 |
3 files changed, 407 insertions, 8 deletions
diff --git a/Doc/tools/sphinxext/pyspecific.py b/Doc/tools/sphinxext/pyspecific.py index e4727e3..66a0afc 100644 --- a/Doc/tools/sphinxext/pyspecific.py +++ b/Doc/tools/sphinxext/pyspecific.py @@ -46,15 +46,9 @@ from pprint import pformat from docutils.io import StringOutput from docutils.utils import new_document -try: - from sphinx.builders import Builder -except ImportError: - from sphinx.builder import Builder +from sphinx.builders import Builder +from sphinx.writers.text import TextWriter -try: - from sphinx.writers.text import TextWriter -except ImportError: - from sphinx.textwriter import TextWriter class PydocTopicsBuilder(Builder): name = 'pydoc-topics' @@ -90,6 +84,9 @@ class PydocTopicsBuilder(Builder): finally: f.close() +# Support for checking for suspicious markup + +import suspicious # Support for documenting Opcodes @@ -114,5 +111,6 @@ def parse_opcode_signature(env, sig, signode): def setup(app): app.add_role('issue', issue_role) app.add_builder(PydocTopicsBuilder) + app.add_builder(suspicious.CheckSuspiciousMarkupBuilder) app.add_description_unit('opcode', 'opcode', '%s (opcode)', parse_opcode_signature) diff --git a/Doc/tools/sphinxext/susp-ignored.csv b/Doc/tools/sphinxext/susp-ignored.csv new file mode 100644 index 0000000..7e1a289 --- /dev/null +++ b/Doc/tools/sphinxext/susp-ignored.csv @@ -0,0 +1,164 @@ +c-api/arg,,:ref,"PyArg_ParseTuple(args, ""O|O:ref"", &object, &callback)" +c-api/list,,:high,list[low:high] +c-api/list,,:high,list[low:high] = itemlist +c-api/sequence,,:i2,o[i1:i2] +c-api/sequence,,:i2,o[i1:i2] = v +c-api/sequence,,:i2,del o[i1:i2] +c-api/unicode,,:end,str[start:end] +distutils/apiref,,:action,http://pypi.python.org/pypi?:action=list_classifiers +distutils/setupscript,,::, +extending/embedding,,:numargs,"if(!PyArg_ParseTuple(args, "":numargs""))" +extending/extending,,:set,"if (PyArg_ParseTuple(args, ""O:set_callback"", &temp)) {" +extending/extending,,:myfunction,"PyArg_ParseTuple(args, ""D:myfunction"", &c);" +extending/newtypes,,:call,"if (!PyArg_ParseTuple(args, ""sss:call"", &arg1, &arg2, &arg3)) {" +extending/windows,,:initspam,/export:initspam +howto/cporting,,:add,"if (!PyArg_ParseTuple(args, ""ii:add_ints"", &one, &two))" +howto/cporting,,:encode,"if (!PyArg_ParseTuple(args, ""O:encode_object"", &myobj))" +howto/cporting,,:say,"if (!PyArg_ParseTuple(args, ""U:say_hello"", &name))" +howto/curses,,:black,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/curses,,:blue,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/curses,,:cyan,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/curses,,:green,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/curses,,:magenta,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/curses,,:red,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/curses,,:white,"7:white." +howto/curses,,:yellow,"They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and" +howto/regex,,::, +howto/regex,,:foo,(?:foo) +howto/urllib2,,:example,"for example ""joe@password:example.com""" +howto/webservers,,.. image:,.. image:: http.png +library/audioop,,:ipos,"# factor = audioop.findfactor(in_test[ipos*2:ipos*2+len(out_test)]," +library/datetime,,:MM, +library/datetime,,:SS, +library/decimal,,:optional,"trailneg:optional trailing minus indicator" +library/difflib,,:ahi,a[alo:ahi] +library/difflib,,:bhi,b[blo:bhi] +library/difflib,,:i2, +library/difflib,,:j2, +library/difflib,,:i1, +library/dis,,:TOS, +library/dis,,`,TOS = `TOS` +library/doctest,,`,``factorial`` from the ``example`` module: +library/doctest,,`,The ``example`` module +library/doctest,,`,Using ``factorial`` +library/functions,,:step,a[start:stop:step] +library/functions,,:stop,"a[start:stop, i]" +library/functions,,:stop,a[start:stop:step] +library/hotshot,,:lineno,"ncalls tottime percall cumtime percall filename:lineno(function)" +library/httplib,,:port,host:port +library/imaplib,,:MM,"""DD-Mmm-YYYY HH:MM:SS +HHMM""" +library/imaplib,,:SS,"""DD-Mmm-YYYY HH:MM:SS +HHMM""" +library/linecache,,:sys,"sys:x:3:3:sys:/dev:/bin/sh" +library/logging,,:And, +library/logging,,:package1, +library/logging,,:package2, +library/logging,,:root, +library/logging,,:This, +library/logging,,:port,host:port +library/mmap,,:i2,obj[i1:i2] +library/multiprocessing,,:queue,">>> QueueManager.register('get_queue', callable=lambda:queue)" +library/multiprocessing,,`,">>> l._callmethod('__getitem__', (20,)) # equiv to `l[20]`" +library/multiprocessing,,`,">>> l._callmethod('__getslice__', (2, 7)) # equiv to `l[2:7]`" +library/multiprocessing,,`,# `BaseManager`. +library/multiprocessing,,`,# `Pool.imap()` (which will save on the amount of code needed anyway). +library/multiprocessing,,`,# A test file for the `multiprocessing` package +library/multiprocessing,,`,# A test of `multiprocessing.Pool` class +library/multiprocessing,,`,# Add more tasks using `put()` +library/multiprocessing,,`,# create server for a `HostManager` object +library/multiprocessing,,`,# Depends on `multiprocessing` package -- tested with `processing-0.60` +library/multiprocessing,,`,# in the original order then consider using `Pool.map()` or +library/multiprocessing,,`,# Not sure if we should synchronize access to `socket.accept()` method by +library/multiprocessing,,`,# object. (We import `multiprocessing.reduction` to enable this pickling.) +library/multiprocessing,,`,# register the Foo class; make `f()` and `g()` accessible via proxy +library/multiprocessing,,`,# register the Foo class; make `g()` and `_h()` accessible via proxy +library/multiprocessing,,`,# register the generator function baz; use `GeneratorProxy` to make proxies +library/multiprocessing,,`,`Cluster` is a subclass of `SyncManager` so it allows creation of +library/multiprocessing,,`,`hostname` gives the name of the host. If hostname is not +library/multiprocessing,,`,`slots` is used to specify the number of slots for processes on +library/optparse,,:len,"del parser.rargs[:len(value)]" +library/os.path,,:foo,c:foo +library/parser,,`,"""Make a function that raises an argument to the exponent `exp`.""" +library/posix,,`,"CFLAGS=""`getconf LFS_CFLAGS`"" OPT=""-g -O2 $CFLAGS""" +library/profile,,:lineno,ncalls tottime percall cumtime percall filename:lineno(function) +library/profile,,:lineno,filename:lineno(function) +library/pyexpat,,:elem1,<py:elem1 /> +library/pyexpat,,:py,"xmlns:py = ""http://www.python.org/ns/"">" +library/repr,,`,"return `obj`" +library/smtplib,,:port,"as well as a regular host:port server." +library/socket,,::,'5aef:2b::8' +library/sqlite3,,:memory, +library/sqlite3,,:age,"select name_last, age from people where name_last=:who and age=:age" +library/sqlite3,,:who,"select name_last, age from people where name_last=:who and age=:age" +library/ssl,,:My,"Organization Name (eg, company) [Internet Widgits Pty Ltd]:My Organization, Inc." +library/ssl,,:My,"Organizational Unit Name (eg, section) []:My Group" +library/ssl,,:myserver,"Common Name (eg, YOUR name) []:myserver.mygroup.myorganization.com" +library/ssl,,:MyState,State or Province Name (full name) [Some-State]:MyState +library/ssl,,:ops,Email Address []:ops@myserver.mygroup.myorganization.com +library/ssl,,:Some,"Locality Name (eg, city) []:Some City" +library/ssl,,:US,Country Name (2 letter code) [AU]:US +library/stdtypes,,:len,s[len(s):len(s)] +library/stdtypes,,:len,s[len(s):len(s)] +library/string,,:end,s[start:end] +library/string,,:end,s[start:end] +library/subprocess,,`,"output=`mycmd myarg`" +library/subprocess,,`,"output=`dmesg | grep hda`" +library/tarfile,,:compression,filemode[:compression] +library/tarfile,,:gz, +library/tarfile,,:bz2, +library/time,,:mm, +library/time,,:ss, +library/turtle,,::,Example:: +library/urllib,,:port,:port +library/urllib2,,:password,"""joe:password@python.org""" +library/uuid,,:uuid,urn:uuid:12345678-1234-5678-1234-567812345678 +library/xmlrpclib,,:pass,http://user:pass@host:port/path +library/xmlrpclib,,:pass,user:pass +library/xmlrpclib,,:port,http://user:pass@host:port/path +license,,`,THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND +license,,:zooko,mailto:zooko@zooko.com +license,,`,THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +reference/datamodel,,:step,a[i:j:step] +reference/datamodel,,:max, +reference/expressions,,:index,x[index:index] +reference/expressions,,:datum,{key:datum...} +reference/expressions,,`,`expressions...` +reference/grammar,,:output,#diagram:output +reference/grammar,,:rules,#diagram:rules +reference/grammar,,:token,#diagram:token +reference/grammar,,`,'`' testlist1 '`' +reference/lexical_analysis,,:fileencoding,# vim:fileencoding=<encoding-name> +reference/lexical_analysis,,`,", : . ` = ;" +tutorial/datastructures,,:value,key:value pairs within the braces adds initial key:value pairs +tutorial/datastructures,,:value,It is also possible to delete a key:value +tutorial/stdlib2,,:start,"fields = struct.unpack('<IIIHH', data[start:start+16])" +tutorial/stdlib2,,:start,extra = data[start:start+extra_size] +tutorial/stdlib2,,:start,filename = data[start:start+filenamesize] +tutorial/stdlib2,,:config,"logging.warning('Warning:config file %s not found', 'server.conf')" +tutorial/stdlib2,,:config,WARNING:root:Warning:config file server.conf not found +tutorial/stdlib2,,:Critical,CRITICAL:root:Critical error -- shutting down +tutorial/stdlib2,,:Error,ERROR:root:Error occurred +tutorial/stdlib2,,:root,CRITICAL:root:Critical error -- shutting down +tutorial/stdlib2,,:root,ERROR:root:Error occurred +tutorial/stdlib2,,:root,WARNING:root:Warning:config file server.conf not found +tutorial/stdlib2,,:Warning,WARNING:root:Warning:config file server.conf not found +using/cmdline,,:line,file:line: category: message +using/cmdline,,:category,action:message:category:module:line +using/cmdline,,:line,action:message:category:module:line +using/cmdline,,:message,action:message:category:module:line +using/cmdline,,:module,action:message:category:module:line +using/cmdline,,:errorhandler,:errorhandler +using/windows,162,`,`` this fixes syntax highlighting errors in some editors due to the \\\\ hackery +using/windows,170,`,`` +whatsnew/2.0,418,:len, +whatsnew/2.3,,::, +whatsnew/2.3,,:config, +whatsnew/2.3,,:Critical, +whatsnew/2.3,,:Error, +whatsnew/2.3,,:Problem, +whatsnew/2.3,,:root, +whatsnew/2.3,,:Warning, +whatsnew/2.4,,::, +whatsnew/2.4,,:System, +whatsnew/2.5,,:memory,:memory: +whatsnew/2.5,,:step,[start:stop:step] +whatsnew/2.5,,:stop,[start:stop:step] diff --git a/Doc/tools/sphinxext/suspicious.py b/Doc/tools/sphinxext/suspicious.py new file mode 100644 index 0000000..ae11793 --- /dev/null +++ b/Doc/tools/sphinxext/suspicious.py @@ -0,0 +1,237 @@ +""" +Try to detect suspicious constructs, resembling markup +that has leaked into the final output. + +Suspicious lines are reported in a comma-separated-file, +``suspicious.csv``, located in the output directory. + +The file is utf-8 encoded, and each line contains four fields: + + * document name (normalized) + * line number in the source document + * problematic text + * complete line showing the problematic text in context + +It is common to find many false positives. To avoid reporting them +again and again, they may be added to the ``ignored.csv`` file +(located in the configuration directory). The file has the same +format as ``suspicious.csv`` with a few differences: + + - each line defines a rule; if the rule matches, the issue + is ignored. + - line number may be empty (that is, nothing between the + commas: ",,"). In this case, line numbers are ignored (the + rule matches anywhere in the file). + - the last field does not have to be a complete line; some + surrounding text (never more than a line) is enough for + context. + +Rules are processed sequentially. A rule matches when: + + * document names are the same + * problematic texts are the same + * line numbers are close to each other (5 lines up or down) + * the rule text is completely contained into the source line + +The simplest way to create the ignored.csv file is by copying +undesired entries from suspicious.csv (possibly trimming the last +field.) + +Copyright 2009 Gabriel A. Genellina + +""" + +import os, sys +import csv +import re +from docutils import nodes +from sphinx.builders import Builder + +detect_all = re.compile(ur''' + ::(?=[^=])| # two :: (but NOT ::=) + :[a-zA-Z][a-zA-Z0-9]+| # :foo + `| # ` (seldom used by itself) + (?<!\.)\.\.[ \t]*\w+: # .. foo: (but NOT ... else:) + ''', re.UNICODE | re.VERBOSE).finditer + +class Rule: + def __init__(self, docname, lineno, issue, line): + "A rule for ignoring issues" + self.docname = docname # document to which this rule applies + self.lineno = lineno # line number in the original source; + # this rule matches only near that. + # None -> don't care + self.issue = issue # the markup fragment that triggered this rule + self.line = line # text of the container element (single line only) + + +class CheckSuspiciousMarkupBuilder(Builder): + """ + Checks for possibly invalid markup that may leak into the output + """ + name = 'suspicious' + + def init(self): + # create output file + self.log_file_name = os.path.join(self.outdir, 'suspicious.csv') + open(self.log_file_name, 'w').close() + # load database of previously ignored issues + self.load_rules(os.path.join(os.path.dirname(__file__), 'susp-ignored.csv')) + + def get_outdated_docs(self): + return self.env.found_docs + + def get_target_uri(self, docname, typ=None): + return '' + + def prepare_writing(self, docnames): + ### PYTHON PROJECT SPECIFIC ### + for name in set(docnames): + if name.split('/', 1)[0] == 'documenting': + docnames.remove(name) + ### PYTHON PROJECT SPECIFIC ### + + def write_doc(self, docname, doctree): + self.any_issue = False # set when any issue is encountered in this document + self.docname = docname + visitor = SuspiciousVisitor(doctree, self) + doctree.walk(visitor) + + def finish(self): + return + + def check_issue(self, line, lineno, issue): + if not self.is_ignored(line, lineno, issue): + self.report_issue(line, lineno, issue) + + def is_ignored(self, line, lineno, issue): + """Determine whether this issue should be ignored. + """ + docname = self.docname + for rule in self.rules: + if rule.docname != docname: continue + if rule.issue != issue: continue + # Both lines must match *exactly*. This is rather strict, + # and probably should be improved. + # Doing fuzzy matches with levenshtein distance could work, + # but that means bringing other libraries... + # Ok, relax that requirement: just check if the rule fragment + # is contained in the document line + if rule.line not in line: continue + # Check both line numbers. If they're "near" + # this rule matches. (lineno=None means "don't care") + if (rule.lineno is not None) and \ + abs(rule.lineno - lineno) > 5: continue + # if it came this far, the rule matched + return True + return False + + def report_issue(self, text, lineno, issue): + if not self.any_issue: self.info() + self.any_issue = True + self.write_log_entry(lineno, issue, text) + self.warn('[%s:%d] "%s" found in "%-.120s"' % ( + self.docname.encode(sys.getdefaultencoding(),'replace'), + lineno, + issue.encode(sys.getdefaultencoding(),'replace'), + text.strip().encode(sys.getdefaultencoding(),'replace'))) + self.app.statuscode = 1 + + def write_log_entry(self, lineno, issue, text): + f = open(self.log_file_name, 'ab') + writer = csv.writer(f) + writer.writerow([self.docname.encode('utf-8'), + lineno, + issue.encode('utf-8'), + text.strip().encode('utf-8')]) + del writer + f.close() + + def load_rules(self, filename): + """Load database of previously ignored issues. + + A csv file, with exactly the same format as suspicious.csv + Fields: document name (normalized), line number, issue, surrounding text + """ + self.info("loading ignore rules... ", nonl=1) + self.rules = rules = [] + try: f = open(filename, 'rb') + except IOError: return + for i, row in enumerate(csv.reader(f)): + if len(row) != 4: + raise ValueError, "wrong format in %s, line %d: %s" % (filename, i+1, row) + docname, lineno, issue, text = row + docname = docname.decode('utf-8') + if lineno: lineno = int(lineno) + else: lineno = None + issue = issue.decode('utf-8') + text = text.decode('utf-8') + rule = Rule(docname, lineno, issue, text) + rules.append(rule) + f.close() + self.info('done, %d rules loaded' % len(self.rules)) + + +def get_lineno(node): + "Obtain line number information for a node" + lineno = None + while lineno is None and node: + node = node.parent + lineno = node.line + return lineno + + +def extract_line(text, index): + """text may be a multiline string; extract + only the line containing the given character index. + + >>> extract_line("abc\ndefgh\ni", 6) + >>> 'defgh' + >>> for i in (0, 2, 3, 4, 10): + ... print extract_line("abc\ndefgh\ni", i) + abc + abc + abc + defgh + defgh + i + """ + p = text.rfind('\n', 0, index) + 1 + q = text.find('\n', index) + if q<0: q = len(text) + return text[p:q] + + +class SuspiciousVisitor(nodes.GenericNodeVisitor): + + lastlineno = 0 + + def __init__(self, document, builder): + nodes.GenericNodeVisitor.__init__(self, document) + self.builder = builder + + def default_visit(self, node): + if isinstance(node, (nodes.Text, nodes.image)): # direct text containers + text = node.astext() + # lineno seems to go backwards sometimes (?) + self.lastlineno = lineno = max(get_lineno(node) or 0, self.lastlineno) + seen = set() # don't report the same issue more than only once per line + for match in detect_all(text): + #import pdb; pdb.set_trace() + issue = match.group() + line = extract_line(text, match.start()) + if (issue, line) not in seen: + self.builder.check_issue(line, lineno, issue) + seen.add((issue, line)) + + unknown_visit = default_visit + + def visit_document(self, node): + self.lastlineno = 0 + + def visit_comment(self, node): + # ignore comments -- too much false positives. + # (although doing this could miss some errors; + # there were two sections "commented-out" by mistake + # in the Python docs that would not be catched) + raise nodes.SkipNode |