diff options
author | Raymond Hettinger <python@rcn.com> | 2012-07-02 20:29:57 (GMT) |
---|---|---|
committer | Raymond Hettinger <python@rcn.com> | 2012-07-02 20:29:57 (GMT) |
commit | f2cc352afdde98df388a55a45f4d1fa066c2b904 (patch) | |
tree | 35fd71b2e6c8ffeaaf5a278dde28ef60c2130438 /Tools | |
parent | 98979b85e7fa164b365635b99b83d91f30773850 (diff) | |
download | cpython-f2cc352afdde98df388a55a45f4d1fa066c2b904.zip cpython-f2cc352afdde98df388a55a45f4d1fa066c2b904.tar.gz cpython-f2cc352afdde98df388a55a45f4d1fa066c2b904.tar.bz2 |
Do HTML escaping after the tokenization step.
Diffstat (limited to 'Tools')
-rwxr-xr-x | Tools/scripts/pycolorize.py | 37 |
1 files changed, 23 insertions, 14 deletions
diff --git a/Tools/scripts/pycolorize.py b/Tools/scripts/pycolorize.py index 576d473..2278e16 100755 --- a/Tools/scripts/pycolorize.py +++ b/Tools/scripts/pycolorize.py @@ -6,22 +6,29 @@ __author__ = 'Raymond Hettinger' import keyword, tokenize, cgi, functools -def insert(s, i, text): - 'Insert text at position i in string s' - return s[:i] + text + s[i:] - def is_builtin(s): 'Return True if s is the name of a builtin' return s in vars(__builtins__) +def escape_range(lines, start, end): + 'Return escaped content from a range of lines between start and end' + (srow, scol), (erow, ecol) = start, end + if srow == erow: + rows = [lines[srow-1][scol:ecol]] + else: + rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]] + return cgi.escape(''.join(rows)), end + def colorize(source): 'Convert Python source code to an HTML fragment with colorized markup' - text = cgi.escape(source) - lines = text.splitlines(True) + lines = source.splitlines(True) + lines.append('') readline = functools.partial(next, iter(lines), '') actions = [] kind = tok_str = '' tok_type = tokenize.COMMENT + written = (1, 0) + result = [] for tok in tokenize.generate_tokens(readline): prev_tok_type, prev_tok_str = tok_type, tok_str tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok @@ -44,15 +51,17 @@ def colorize(source): elif is_builtin(tok_str) and prev_tok_str != '.': kind = 'builtin' if kind: - actions.append(((srow, scol), (erow, ecol), kind)) - - for (srow, scol), (erow, ecol), kind in reversed(actions): - lines[erow-1] = insert(lines[erow-1], ecol, '</span>') - lines[srow-1] = insert(lines[srow-1], scol, '<span class="%s">' % kind) + line_upto_token, written = escape_range(lines, written, (srow, scol)) + line_thru_token, written = escape_range(lines, written, (erow, ecol)) + result += [line_upto_token, '<span class="%s">' % kind, + line_thru_token, '</span>'] + else: + line_thru_token, written = escape_range(lines, written, (erow, ecol)) + result += [line_thru_token] - lines.insert(0, '<pre class="python">\n') - lines.append('</pre>\n') - return ''.join(lines) + result.insert(0, '<pre class="python">\n') + result.append('</pre>\n') + return ''.join(result) default_css = { '.comment': '{color: crimson;}', |