summaryrefslogtreecommitdiffstats
path: root/Mac/Tools/IDE/PyFontify.py
diff options
context:
space:
mode:
authorJust van Rossum <just@lettererror.com>1999-01-30 22:39:17 (GMT)
committerJust van Rossum <just@lettererror.com>1999-01-30 22:39:17 (GMT)
commit40f9b7bd7cb54645a7c15668b683a8d830ba5219 (patch)
treebaea660d6ef599cd9c4ecc72d009fa75853de577 /Mac/Tools/IDE/PyFontify.py
parentf59a89b5e34ac7db9e69b02a5b558c7cb49a4d9a (diff)
downloadcpython-40f9b7bd7cb54645a7c15668b683a8d830ba5219.zip
cpython-40f9b7bd7cb54645a7c15668b683a8d830ba5219.tar.gz
cpython-40f9b7bd7cb54645a7c15668b683a8d830ba5219.tar.bz2
First Checked In.
Diffstat (limited to 'Mac/Tools/IDE/PyFontify.py')
-rw-r--r--Mac/Tools/IDE/PyFontify.py154
1 files changed, 154 insertions, 0 deletions
diff --git a/Mac/Tools/IDE/PyFontify.py b/Mac/Tools/IDE/PyFontify.py
new file mode 100644
index 0000000..a61de65
--- /dev/null
+++ b/Mac/Tools/IDE/PyFontify.py
@@ -0,0 +1,154 @@
+"""Module to analyze Python source code; for syntax coloring tools.
+
+Interface:
+ tags = fontify(pytext, searchfrom, searchto)
+
+The 'pytext' argument is a string containing Python source code.
+The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext.
+The returned value is a list of tuples, formatted like this:
+ [('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]
+The tuple contents are always like this:
+ (tag, startindex, endindex, sublist)
+tag is one of 'keyword', 'string', 'comment' or 'identifier'
+sublist is not used, hence always None.
+"""
+
+# Based on FontText.py by Mitchell S. Chapman,
+# which was modified by Zachary Roadhouse,
+# then un-Tk'd by Just van Rossum.
+# Many thanks for regular expression debugging & authoring are due to:
+# Tim (the-incredib-ly y'rs) Peters and Cristian Tismer
+# So, who owns the copyright? ;-) How about this:
+# Copyright 1996-1997:
+# Mitchell S. Chapman,
+# Zachary Roadhouse,
+# Tim Peters,
+# Just van Rossum
+
+__version__ = "0.3.1"
+
+import string, regex
+
+# First a little helper, since I don't like to repeat things. (Tismer speaking)
+import string
+def replace(where, what, with):
+ return string.join(string.split(where, what), with)
+
+# This list of keywords is taken from ref/node13.html of the
+# Python 1.3 HTML documentation. ("access" is intentionally omitted.)
+keywordsList = [
+ "assert",
+ "del", "from", "lambda", "return",
+ "and", "elif", "global", "not", "try",
+ "break", "else", "if", "or", "while",
+ "class", "except", "import", "pass",
+ "continue", "finally", "in", "print",
+ "def", "for", "is", "raise"]
+
+# Build up a regular expression which will match anything
+# interesting, including multi-line triple-quoted strings.
+commentPat = "#.*"
+
+pat = "q[^\q\n]*\(\\\\[\000-\377][^\q\n]*\)*q"
+quotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')
+
+# Way to go, Tim!
+pat = """
+ qqq
+ [^\\q]*
+ \(
+ \( \\\\[\000-\377]
+ \| q
+ \( \\\\[\000-\377]
+ \| [^\\q]
+ \| q
+ \( \\\\[\000-\377]
+ \| [^\\q]
+ \)
+ \)
+ \)
+ [^\\q]*
+ \)*
+ qqq
+"""
+pat = string.join(string.split(pat), '') # get rid of whitespace
+tripleQuotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')
+
+# Build up a regular expression which matches all and only
+# Python keywords. This will let us skip the uninteresting
+# identifier references.
+# nonKeyPat identifies characters which may legally precede
+# a keyword pattern.
+nonKeyPat = "\(^\|[^a-zA-Z0-9_.\"']\)"
+
+keyPat = nonKeyPat + "\("
+for keyword in keywordsList:
+ keyPat = keyPat + keyword + "\|"
+keyPat = keyPat[:-2] + "\)" + nonKeyPat
+
+matchPat = keyPat + "\|" + commentPat + "\|" + tripleQuotePat + "\|" + quotePat
+matchRE = regex.compile(matchPat)
+
+idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*" # Ident w. leading whitespace.
+idRE = regex.compile(idKeyPat)
+
+
+def fontify(pytext, searchfrom = 0, searchto = None):
+ if searchto is None:
+ searchto = len(pytext)
+ # Cache a few attributes for quicker reference.
+ search = matchRE.search
+ group = matchRE.group
+ idSearch = idRE.search
+ idGroup = idRE.group
+
+ tags = []
+ tags_append = tags.append
+ commentTag = 'comment'
+ stringTag = 'string'
+ keywordTag = 'keyword'
+ identifierTag = 'identifier'
+
+ start = 0
+ end = searchfrom
+ while 1:
+ start = search(pytext, end)
+ if start < 0 or start >= searchto:
+ break # EXIT LOOP
+ match = group(0)
+ end = start + len(match)
+ c = match[0]
+ if c not in "#'\"":
+ # Must have matched a keyword.
+ if start <> searchfrom:
+ # there's still a redundant char before and after it, strip!
+ match = match[1:-1]
+ start = start + 1
+ else:
+ # this is the first keyword in the text.
+ # Only a space at the end.
+ match = match[:-1]
+ end = end - 1
+ tags_append((keywordTag, start, end, None))
+ # If this was a defining keyword, look ahead to the
+ # following identifier.
+ if match in ["def", "class"]:
+ start = idSearch(pytext, end)
+ if start == end:
+ match = idGroup(0)
+ end = start + len(match)
+ tags_append((identifierTag, start, end, None))
+ elif c == "#":
+ tags_append((commentTag, start, end, None))
+ else:
+ tags_append((stringTag, start, end, None))
+ return tags
+
+
+def test(path):
+ f = open(path)
+ text = f.read()
+ f.close()
+ tags = fontify(text)
+ for tag, start, end, sublist in tags:
+ print tag, `text[start:end]`