summaryrefslogtreecommitdiffstats
path: root/Lib/distutils/util.py
diff options
context:
space:
mode:
authorGreg Ward <gward@python.net>2000-06-24 20:40:02 (GMT)
committerGreg Ward <gward@python.net>2000-06-24 20:40:02 (GMT)
commit6a2a3dbec59f267e05c0c507457dfd234263237b (patch)
tree0d8a0e36983bfc49a9a45e53e7b246dcd396893b /Lib/distutils/util.py
parentc3a43b4f9bbfba0685c99302c6144c3d0475ff21 (diff)
downloadcpython-6a2a3dbec59f267e05c0c507457dfd234263237b.zip
cpython-6a2a3dbec59f267e05c0c507457dfd234263237b.tar.gz
cpython-6a2a3dbec59f267e05c0c507457dfd234263237b.tar.bz2
Added 'split_quoted()' function to deal with strings that are quoted in
Unix shell-like syntax (eg. in Python's Makefile, for one thing -- now that I have this function, I'll probably allow quoted strings in config files too.
Diffstat (limited to 'Lib/distutils/util.py')
-rw-r--r--Lib/distutils/util.py67
1 files changed, 67 insertions, 0 deletions
diff --git a/Lib/distutils/util.py b/Lib/distutils/util.py
index 74df8aa..5c1de78 100644
--- a/Lib/distutils/util.py
+++ b/Lib/distutils/util.py
@@ -166,3 +166,70 @@ def grok_environment_error (exc, prefix="error: "):
error = prefix + str(exc[-1])
return error
+
+
+# Needed by 'split_quoted()'
+_wordchars_re = re.compile(r'[^\\\'\"\ ]*')
+_squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
+_dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
+
+def split_quoted (s):
+ """Split a string up according to Unix shell-like rules for quotes and
+ backslashes. In short: words are delimited by spaces, as long as those
+ spaces are not escaped by a backslash, or inside a quoted string.
+ Single and double quotes are equivalent, and the quote characters can
+ be backslash-escaped. The backslash is stripped from any two-character
+ escape sequence, leaving only the escaped character. The quote
+ characters are stripped from any quoted string. Returns a list of
+ words.
+ """
+
+ # This is a nice algorithm for splitting up a single string, since it
+ # doesn't require character-by-character examination. It was a little
+ # bit of a brain-bender to get it working right, though...
+
+ s = string.strip(s)
+ words = []
+ pos = 0
+
+ while s:
+ m = _wordchars_re.match(s, pos)
+ end = m.end()
+ if end == len(s):
+ words.append(s[:end])
+ break
+
+ if s[end] == ' ': # unescaped, unquoted space: now
+ words.append(s[:end]) # we definitely have a word delimiter
+ s = string.lstrip(s[end:])
+ pos = 0
+
+ elif s[end] == '\\': # preserve whatever is being escaped;
+ # will become part of the current word
+ s = s[:end] + s[end+1:]
+ pos = end+1
+
+ else:
+ if s[end] == "'": # slurp singly-quoted string
+ m = _squote_re.match(s, end)
+ elif s[end] == '"': # slurp doubly-quoted string
+ m = _dquote_re.match(s, end)
+ else:
+ raise RuntimeError, \
+ "this can't happen (bad char '%c')" % s[end]
+
+ if m is None:
+ raise ValueError, \
+ "bad string (mismatched %s quotes?)" % s[end]
+
+ (beg, end) = m.span()
+ s = s[:beg] + s[beg+1:end-1] + s[end:]
+ pos = m.end() - 2
+
+ if pos >= len(s):
+ words.append(s)
+ break
+
+ return words
+
+# split_quoted ()