diff options
author | Raymond Hettinger <python@rcn.com> | 2012-07-03 20:13:52 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2012-07-03 20:13:52 (GMT) |
commit | 5da6039765bce8f36c0e598f65a28013d6734006 (patch) | |
tree | 599e8d61b8a83a7856072becfca45136a546474b /Tools/scripts | |
parent | 3575f910d964a18d06ec3b2a989763801892916e (diff) | |
download | cpython-5da6039765bce8f36c0e598f65a28013d6734006.zip cpython-5da6039765bce8f36c0e598f65a28013d6734006.tar.gz cpython-5da6039765bce8f36c0e598f65a28013d6734006.tar.bz2 |
Refactor to isolate HTML encoding step from the parsing step.
Diffstat (limited to 'Tools/scripts')
-rwxr-xr-x | Tools/scripts/highlight.py | 37 |
1 files changed, 21 insertions, 16 deletions
diff --git a/Tools/scripts/highlight.py b/Tools/scripts/highlight.py index a0d3d45..b2cef3d 100755 --- a/Tools/scripts/highlight.py +++ b/Tools/scripts/highlight.py @@ -10,24 +10,23 @@ def is_builtin(s): 'Return True if s is the name of a builtin' return s in vars(__builtins__) -def escape_range(lines, start, end): - 'Return escaped content from a range of lines between start and end' +def combine_range(lines, start, end): + 'Join content from a range of lines between start and end' (srow, scol), (erow, ecol) = start, end if srow == erow: rows = [lines[srow-1][scol:ecol]] else: rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]] - return cgi.escape(''.join(rows)), end + return ''.join(rows), end -def colorize(source): - 'Convert Python source code to an HTML fragment with colorized markup' +def isolate_tokens(source): + 'Generate chunks of source and indentify chunks to be highlighted' lines = source.splitlines(True) lines.append('') readline = functools.partial(next, iter(lines), '') kind = tok_str = '' tok_type = tokenize.COMMENT written = (1, 0) - result = [] for tok in tokenize.generate_tokens(readline): prev_tok_type, prev_tok_str = tok_type, tok_str tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok @@ -49,23 +48,29 @@ def colorize(source): kind = 'keyword' elif is_builtin(tok_str) and prev_tok_str != '.': kind = 'builtin' + line_upto_token, written = combine_range(lines, written, (srow, scol)) + line_thru_token, written = combine_range(lines, written, (erow, ecol)) + yield kind, line_upto_token, line_thru_token + +def colorize(source): + 'Convert Python source code to an HTML fragment with colorized markup' + result = ['<pre class="python">\n'] + for kind, line_upto_token, line_thru_token in isolate_tokens(source): if kind: - line_upto_token, written = escape_range(lines, written, (srow, scol)) - line_thru_token, written = escape_range(lines, written, (erow, ecol)) - result += [line_upto_token, '<span class="%s">' % kind, - line_thru_token, '</span>'] + result += [cgi.escape(line_upto_token), + '<span class="%s">' % kind, + cgi.escape(line_thru_token), + '</span>'] else: - line_thru_token, written = escape_range(lines, written, (erow, ecol)) - result += [line_thru_token] - - result.insert(0, '<pre class="python">\n') - result.append('</pre>\n') + result += [cgi.escape(line_upto_token), + cgi.escape(line_thru_token)] + result += ['</pre>\n'] return ''.join(result) default_css = { '.comment': '{color: crimson;}', '.string': '{color: forestgreen;}', - '.docstring': '{color: forestgreen; font-style:italic}', + '.docstring': '{color: forestgreen; font-style:italic;}', '.keyword': '{color: darkorange;}', '.builtin': '{color: purple;}', '.definition': '{color: darkorange; font-weight:bold;}', |