summaryrefslogtreecommitdiffstats
path: root/Tools/scripts
diff options
context:
space:
mode:
authorRaymond Hettinger <python@rcn.com>2012-07-03 20:13:52 (GMT)
committerRaymond Hettinger <python@rcn.com>2012-07-03 20:13:52 (GMT)
commit5da6039765bce8f36c0e598f65a28013d6734006 (patch)
tree599e8d61b8a83a7856072becfca45136a546474b /Tools/scripts
parent3575f910d964a18d06ec3b2a989763801892916e (diff)
downloadcpython-5da6039765bce8f36c0e598f65a28013d6734006.zip
cpython-5da6039765bce8f36c0e598f65a28013d6734006.tar.gz
cpython-5da6039765bce8f36c0e598f65a28013d6734006.tar.bz2
Refactor to isolate HTML encoding step from the parsing step.
Diffstat (limited to 'Tools/scripts')
-rwxr-xr-xTools/scripts/highlight.py37
1 files changed, 21 insertions, 16 deletions
diff --git a/Tools/scripts/highlight.py b/Tools/scripts/highlight.py
index a0d3d45..b2cef3d 100755
--- a/Tools/scripts/highlight.py
+++ b/Tools/scripts/highlight.py
@@ -10,24 +10,23 @@ def is_builtin(s):
'Return True if s is the name of a builtin'
return s in vars(__builtins__)
-def escape_range(lines, start, end):
- 'Return escaped content from a range of lines between start and end'
+def combine_range(lines, start, end):
+ 'Join content from a range of lines between start and end'
(srow, scol), (erow, ecol) = start, end
if srow == erow:
rows = [lines[srow-1][scol:ecol]]
else:
rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]]
- return cgi.escape(''.join(rows)), end
+ return ''.join(rows), end
-def colorize(source):
- 'Convert Python source code to an HTML fragment with colorized markup'
+def isolate_tokens(source):
+ 'Generate chunks of source and indentify chunks to be highlighted'
lines = source.splitlines(True)
lines.append('')
readline = functools.partial(next, iter(lines), '')
kind = tok_str = ''
tok_type = tokenize.COMMENT
written = (1, 0)
- result = []
for tok in tokenize.generate_tokens(readline):
prev_tok_type, prev_tok_str = tok_type, tok_str
tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok
@@ -49,23 +48,29 @@ def colorize(source):
kind = 'keyword'
elif is_builtin(tok_str) and prev_tok_str != '.':
kind = 'builtin'
+ line_upto_token, written = combine_range(lines, written, (srow, scol))
+ line_thru_token, written = combine_range(lines, written, (erow, ecol))
+ yield kind, line_upto_token, line_thru_token
+
+def colorize(source):
+ 'Convert Python source code to an HTML fragment with colorized markup'
+ result = ['<pre class="python">\n']
+ for kind, line_upto_token, line_thru_token in isolate_tokens(source):
if kind:
- line_upto_token, written = escape_range(lines, written, (srow, scol))
- line_thru_token, written = escape_range(lines, written, (erow, ecol))
- result += [line_upto_token, '<span class="%s">' % kind,
- line_thru_token, '</span>']
+ result += [cgi.escape(line_upto_token),
+ '<span class="%s">' % kind,
+ cgi.escape(line_thru_token),
+ '</span>']
else:
- line_thru_token, written = escape_range(lines, written, (erow, ecol))
- result += [line_thru_token]
-
- result.insert(0, '<pre class="python">\n')
- result.append('</pre>\n')
+ result += [cgi.escape(line_upto_token),
+ cgi.escape(line_thru_token)]
+ result += ['</pre>\n']
return ''.join(result)
default_css = {
'.comment': '{color: crimson;}',
'.string': '{color: forestgreen;}',
- '.docstring': '{color: forestgreen; font-style:italic}',
+ '.docstring': '{color: forestgreen; font-style:italic;}',
'.keyword': '{color: darkorange;}',
'.builtin': '{color: purple;}',
'.definition': '{color: darkorange; font-weight:bold;}',