#!/usr/bin/env python3 'Convert Python source code to HTML with colorized markup' __all__ = ['colorize', 'build_page', 'default_css', 'default_html'] __author__ = 'Raymond Hettinger' import keyword, tokenize, cgi, functools def is_builtin(s): 'Return True if s is the name of a builtin' return s in vars(__builtins__) def combine_range(lines, start, end): 'Join content from a range of lines between start and end' (srow, scol), (erow, ecol) = start, end if srow == erow: rows = [lines[srow-1][scol:ecol]] else: rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]] return ''.join(rows), end def isolate_tokens(source): 'Generate chunks of source and indentify chunks to be highlighted' lines = source.splitlines(True) lines.append('') readline = functools.partial(next, iter(lines), '') kind = tok_str = '' tok_type = tokenize.COMMENT written = (1, 0) for tok in tokenize.generate_tokens(readline): prev_tok_type, prev_tok_str = tok_type, tok_str tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok kind = '' if tok_type == tokenize.COMMENT: kind = 'comment' elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;': kind = 'operator' elif tok_type == tokenize.STRING: kind = 'string' if prev_tok_type == tokenize.INDENT or scol==0: kind = 'docstring' elif tok_type == tokenize.NAME: if tok_str in ('def', 'class', 'import', 'from'): kind = 'definition' elif prev_tok_str in ('def', 'class'): kind = 'defname' elif keyword.iskeyword(tok_str): kind = 'keyword' elif is_builtin(tok_str) and prev_tok_str != '.': kind = 'builtin' line_upto_token, written = combine_range(lines, written, (srow, scol)) line_thru_token, written = combine_range(lines, written, (erow, ecol)) yield kind, line_upto_token, line_thru_token def colorize(source): 'Convert Python source code to an HTML fragment with colorized markup' result = ['
\n']
for kind, line_upto_token, line_thru_token in isolate_tokens(source):
if kind:
result += [cgi.escape(line_upto_token),
'' % kind,
cgi.escape(line_thru_token),
'']
else:
result += [cgi.escape(line_upto_token),
cgi.escape(line_thru_token)]
result += ['
\n']
return ''.join(result)
default_css = {
'.comment': '{color: crimson;}',
'.string': '{color: forestgreen;}',
'.docstring': '{color: forestgreen; font-style:italic;}',
'.keyword': '{color: darkorange;}',
'.builtin': '{color: purple;}',
'.definition': '{color: darkorange; font-weight:bold;}',
'.defname': '{color: blue;}',
'.operator': '{color: brown;}',
}
default_html = '''\