diff options
author | Just van Rossum <just@lettererror.com> | 1999-01-30 22:39:17 (GMT) |
---|---|---|
committer | Just van Rossum <just@lettererror.com> | 1999-01-30 22:39:17 (GMT) |
commit | 40f9b7bd7cb54645a7c15668b683a8d830ba5219 (patch) | |
tree | baea660d6ef599cd9c4ecc72d009fa75853de577 /Mac/Tools/IDE/PyFontify.py | |
parent | f59a89b5e34ac7db9e69b02a5b558c7cb49a4d9a (diff) | |
download | cpython-40f9b7bd7cb54645a7c15668b683a8d830ba5219.zip cpython-40f9b7bd7cb54645a7c15668b683a8d830ba5219.tar.gz cpython-40f9b7bd7cb54645a7c15668b683a8d830ba5219.tar.bz2 |
First Checked In.
Diffstat (limited to 'Mac/Tools/IDE/PyFontify.py')
-rw-r--r-- | Mac/Tools/IDE/PyFontify.py | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/Mac/Tools/IDE/PyFontify.py b/Mac/Tools/IDE/PyFontify.py new file mode 100644 index 0000000..a61de65 --- /dev/null +++ b/Mac/Tools/IDE/PyFontify.py @@ -0,0 +1,154 @@ +"""Module to analyze Python source code; for syntax coloring tools. + +Interface: + tags = fontify(pytext, searchfrom, searchto) + +The 'pytext' argument is a string containing Python source code. +The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext. +The returned value is a list of tuples, formatted like this: + [('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ] +The tuple contents are always like this: + (tag, startindex, endindex, sublist) +tag is one of 'keyword', 'string', 'comment' or 'identifier' +sublist is not used, hence always None. +""" + +# Based on FontText.py by Mitchell S. Chapman, +# which was modified by Zachary Roadhouse, +# then un-Tk'd by Just van Rossum. +# Many thanks for regular expression debugging & authoring are due to: +# Tim (the-incredib-ly y'rs) Peters and Cristian Tismer +# So, who owns the copyright? ;-) How about this: +# Copyright 1996-1997: +# Mitchell S. Chapman, +# Zachary Roadhouse, +# Tim Peters, +# Just van Rossum + +__version__ = "0.3.1" + +import string, regex + +# First a little helper, since I don't like to repeat things. (Tismer speaking) +import string +def replace(where, what, with): + return string.join(string.split(where, what), with) + +# This list of keywords is taken from ref/node13.html of the +# Python 1.3 HTML documentation. ("access" is intentionally omitted.) +keywordsList = [ + "assert", + "del", "from", "lambda", "return", + "and", "elif", "global", "not", "try", + "break", "else", "if", "or", "while", + "class", "except", "import", "pass", + "continue", "finally", "in", "print", + "def", "for", "is", "raise"] + +# Build up a regular expression which will match anything +# interesting, including multi-line triple-quoted strings. +commentPat = "#.*" + +pat = "q[^\q\n]*\(\\\\[\000-\377][^\q\n]*\)*q" +quotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"') + +# Way to go, Tim! +pat = """ + qqq + [^\\q]* + \( + \( \\\\[\000-\377] + \| q + \( \\\\[\000-\377] + \| [^\\q] + \| q + \( \\\\[\000-\377] + \| [^\\q] + \) + \) + \) + [^\\q]* + \)* + qqq +""" +pat = string.join(string.split(pat), '') # get rid of whitespace +tripleQuotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"') + +# Build up a regular expression which matches all and only +# Python keywords. This will let us skip the uninteresting +# identifier references. +# nonKeyPat identifies characters which may legally precede +# a keyword pattern. +nonKeyPat = "\(^\|[^a-zA-Z0-9_.\"']\)" + +keyPat = nonKeyPat + "\(" +for keyword in keywordsList: + keyPat = keyPat + keyword + "\|" +keyPat = keyPat[:-2] + "\)" + nonKeyPat + +matchPat = keyPat + "\|" + commentPat + "\|" + tripleQuotePat + "\|" + quotePat +matchRE = regex.compile(matchPat) + +idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*" # Ident w. leading whitespace. +idRE = regex.compile(idKeyPat) + + +def fontify(pytext, searchfrom = 0, searchto = None): + if searchto is None: + searchto = len(pytext) + # Cache a few attributes for quicker reference. + search = matchRE.search + group = matchRE.group + idSearch = idRE.search + idGroup = idRE.group + + tags = [] + tags_append = tags.append + commentTag = 'comment' + stringTag = 'string' + keywordTag = 'keyword' + identifierTag = 'identifier' + + start = 0 + end = searchfrom + while 1: + start = search(pytext, end) + if start < 0 or start >= searchto: + break # EXIT LOOP + match = group(0) + end = start + len(match) + c = match[0] + if c not in "#'\"": + # Must have matched a keyword. + if start <> searchfrom: + # there's still a redundant char before and after it, strip! + match = match[1:-1] + start = start + 1 + else: + # this is the first keyword in the text. + # Only a space at the end. + match = match[:-1] + end = end - 1 + tags_append((keywordTag, start, end, None)) + # If this was a defining keyword, look ahead to the + # following identifier. + if match in ["def", "class"]: + start = idSearch(pytext, end) + if start == end: + match = idGroup(0) + end = start + len(match) + tags_append((identifierTag, start, end, None)) + elif c == "#": + tags_append((commentTag, start, end, None)) + else: + tags_append((stringTag, start, end, None)) + return tags + + +def test(path): + f = open(path) + text = f.read() + f.close() + tags = fontify(text) + for tag, start, end, sublist in tags: + print tag, `text[start:end]` |