diff options
-rw-r--r-- | Tools/scripts/texcheck.py | 233 |
1 files changed, 0 insertions, 233 deletions
diff --git a/Tools/scripts/texcheck.py b/Tools/scripts/texcheck.py deleted file mode 100644 index 6db0f66..0000000 --- a/Tools/scripts/texcheck.py +++ /dev/null @@ -1,233 +0,0 @@ -""" TeXcheck.py -- rough syntax checking on Python style LaTeX documents. - - Written by Raymond D. Hettinger <python at rcn.com> - Copyright (c) 2003 Python Software Foundation. All rights reserved. - -Designed to catch common markup errors including: -* Unbalanced or mismatched parenthesis, brackets, and braces. -* Unbalanced or mismatched \\begin and \\end blocks. -* Misspelled or invalid LaTeX commands. -* Use of forward slashes instead of backslashes for commands. -* Table line size mismatches. - -Sample command line usage: - python texcheck.py -k chapterheading -m lib/librandomtex *.tex - -Options: - -m Munge parenthesis and brackets. [0,n) would normally mismatch. - -k keyword: Keyword is a valid LaTeX command. Do not include the backslash. - -d: Delimiter check only (useful for non-LaTeX files). - -h: Help - -s lineno: Start at lineno (useful for skipping complex sections). - -v: Verbose. Trace the matching of //begin and //end blocks. -""" - -import re -import sys -import getopt -from itertools import izip, count, islice -import glob - -cmdstr = r""" - \section \module \declaremodule \modulesynopsis \moduleauthor - \sectionauthor \versionadded \code \class \method \begin - \optional \var \ref \end \subsection \lineiii \hline \label - \indexii \textrm \ldots \keyword \stindex \index \item \note - \withsubitem \ttindex \footnote \citetitle \samp \opindex - \noindent \exception \strong \dfn \ctype \obindex \character - \indexiii \function \bifuncindex \refmodule \refbimodindex - \subsubsection \nodename \member \chapter \emph \ASCII \UNIX - \regexp \program \production \token \productioncont \term - \grammartoken \lineii \seemodule \file \EOF \documentclass - \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp - \tableofcontents \kbd \programopt \envvar \refstmodindex - \cfunction \constant \NULL \moreargs \cfuncline \cdata - \textasciicircum \n \ABC \setindexsubitem \versionchanged - \deprecated \seetext \newcommand \POSIX \pep \warning \rfc - \verbatiminput \methodline \textgreater \seetitle \lineiv - \funclineni \ulink \manpage \funcline \dataline \unspecified - \textbackslash \mimetype \mailheader \seepep \textunderscore - \longprogramopt \infinity \plusminus \shortversion \version - \refmodindex \seerfc \makeindex \makemodindex \renewcommand - \indexname \appendix \protect \indexiv \mbox \textasciitilde - \platform \seeurl \leftmargin \labelwidth \localmoduletable - \LaTeX \copyright \memberline \backslash \pi \centerline - \caption \vspace \textwidth \menuselection \textless - \makevar \csimplemacro \menuselection \bfcode \sub \release - \email \kwindex \refexmodindex \filenq \e \menuselection - \exindex \linev \newsgroup \verbatim \setshortversion - \author \authoraddress \paragraph \subparagraph \cmemberline - \textbar \C \seelink -""" - -def matchclose(c_lineno, c_symbol, openers, pairmap): - "Verify that closing delimiter matches most recent opening delimiter" - try: - o_lineno, o_symbol = openers.pop() - except IndexError: - print("\nDelimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol)) - return - if o_symbol in pairmap.get(c_symbol, [c_symbol]): return - print("\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno)) - return - -def checkit(source, opts, morecmds=[]): - """Check the LaTeX formatting in a sequence of lines. - - Opts is a mapping of options to option values if any: - -m munge parenthesis and brackets - -d delimiters only checking - -v verbose trace of delimiter matching - -s lineno: linenumber to start scan (default is 1). - - Morecmds is a sequence of LaTeX commands (without backslashes) that - are to be considered valid in the scan. - """ - - texcmd = re.compile(r'\\[A-Za-z]+') - falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash - - validcmds = set(cmdstr.split()) - for cmd in morecmds: - validcmds.add('\\' + cmd) - - if '-m' in opts: - pairmap = {']':'[(', ')':'(['} # Munged openers - else: - pairmap = {']':'[', ')':'('} # Normal opener for a given closer - openpunct = set('([') # Set of valid openers - - delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])') - braces = re.compile(r'({)|(})') - doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b') - spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s') - - openers = [] # Stack of pending open delimiters - bracestack = [] # Stack of pending open braces - - tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}') - tableline = re.compile(r'\\line([iv]+){') - tableend = re.compile(r'\\end{(?:long)?table([iv]+)}') - tablelevel = '' - tablestartline = 0 - - startline = int(opts.get('-s', '1')) - lineno = 0 - - for lineno, line in izip(count(startline), islice(source, startline-1, None)): - line = line.rstrip() - - # Check balancing of open/close parenthesis, brackets, and begin/end blocks - for begend, name, punct in delimiters.findall(line): - if '-v' in opts: - print(lineno, '|', begend, name, punct, end=' ') - if begend == 'begin' and '-d' not in opts: - openers.append((lineno, name)) - elif punct in openpunct: - openers.append((lineno, punct)) - elif begend == 'end' and '-d' not in opts: - matchclose(lineno, name, openers, pairmap) - elif punct in pairmap: - matchclose(lineno, punct, openers, pairmap) - if '-v' in opts: - print(' --> ', openers) - - # Balance opening and closing braces - for open, close in braces.findall(line): - if open == '{': - bracestack.append(lineno) - if close == '}': - try: - bracestack.pop() - except IndexError: - print(r'Warning, unmatched } on line %s.' % (lineno,)) - - # Optionally, skip LaTeX specific checks - if '-d' in opts: - continue - - # Warn whenever forward slashes encountered with a LaTeX command - for cmd in falsetexcmd.findall(line): - if '822' in line or '.html' in line: - continue # Ignore false positives for urls and for /rfc822 - if '\\' + cmd in validcmds: - print('Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd)) - - # Check for markup requiring {} for correct spacing - for cmd in spacingmarkup.findall(line): - print(r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno)) - - # Validate commands - nc = line.find(r'\newcommand') - if nc != -1: - start = line.find('{', nc) - end = line.find('}', start) - validcmds.add(line[start+1:end]) - for cmd in texcmd.findall(line): - if cmd not in validcmds: - print(r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd)) - - # Check table levels (make sure lineii only inside tableii) - m = tablestart.search(line) - if m: - tablelevel = m.group(1) - tablestartline = lineno - m = tableline.search(line) - if m and m.group(1) != tablelevel: - print(r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline)) - if tableend.search(line): - tablelevel = '' - - # Style guide warnings - if 'e.g.' in line or 'i.e.' in line: - print(r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,)) - - for dw in doubledwords.findall(line): - print(r'Doubled word warning. "%s" on line %d' % (dw, lineno)) - - lastline = lineno - for lineno, symbol in openers: - print("Unmatched open delimiter '%s' on line %d" % (symbol, lineno)) - for lineno in bracestack: - print("Unmatched { on line %d" % (lineno,)) - print('Done checking %d lines.' % (lastline,)) - return 0 - -def main(args=None): - if args is None: - args = sys.argv[1:] - optitems, arglist = getopt.getopt(args, "k:mdhs:v") - opts = dict(optitems) - if '-h' in opts or args==[]: - print(__doc__) - return 0 - - if len(arglist) < 1: - print('Please specify a file to be checked') - return 1 - - for i, filespec in enumerate(arglist): - if '*' in filespec or '?' in filespec: - arglist[i:i+1] = glob.glob(filespec) - - morecmds = [v for k,v in optitems if k=='-k'] - err = [] - - for filename in arglist: - print('=' * 30) - print("Checking", filename) - try: - f = open(filename) - except IOError: - print('Cannot open file %s.' % arglist[0]) - return 2 - - try: - err.append(checkit(f, opts, morecmds)) - finally: - f.close() - - return max(err) - -if __name__ == '__main__': - sys.exit(main()) |