Added 'split_quoted()' function to deal with strings that are quoted in

Unix shell-like syntax (eg. in Python's Makefile, for one thing -- now that I have this function, I'll probably allow quoted strings in config files too.
author: Greg Ward <gward@python.net> 2000-06-24 20:40:02 (GMT)
committer: Greg Ward <gward@python.net> 2000-06-24 20:40:02 (GMT)
commit: 6a2a3dbec59f267e05c0c507457dfd234263237b (patch)
tree: 0d8a0e36983bfc49a9a45e53e7b246dcd396893b /Lib/distutils/util.py
parent: c3a43b4f9bbfba0685c99302c6144c3d0475ff21 (diff)
download: cpython-6a2a3dbec59f267e05c0c507457dfd234263237b.zip
cpython-6a2a3dbec59f267e05c0c507457dfd234263237b.tar.gz
cpython-6a2a3dbec59f267e05c0c507457dfd234263237b.tar.bz2
1 files changed, 67 insertions, 0 deletions
diff --git a/Lib/distutils/util.py b/Lib/distutils/util.py
index 74df8aa..5c1de78 100644
--- a/Lib/distutils/util.py
+++ b/Lib/distutils/util.py
@@ -166,3 +166,70 @@ def grok_environment_error (exc, prefix="error: "):
         error = prefix + str(exc[-1])
 
     return error
+
+
+# Needed by 'split_quoted()'
+_wordchars_re = re.compile(r'[^\\\'\"\ ]*')
+_squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
+_dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
+
+def split_quoted (s):
+    """Split a string up according to Unix shell-like rules for quotes and
+    backslashes.  In short: words are delimited by spaces, as long as those
+    spaces are not escaped by a backslash, or inside a quoted string.
+    Single and double quotes are equivalent, and the quote characters can
+    be backslash-escaped.  The backslash is stripped from any two-character
+    escape sequence, leaving only the escaped character.  The quote
+    characters are stripped from any quoted string.  Returns a list of
+    words.
+    """
+
+    # This is a nice algorithm for splitting up a single string, since it
+    # doesn't require character-by-character examination.  It was a little
+    # bit of a brain-bender to get it working right, though...
+
+    s = string.strip(s)
+    words = []
+    pos = 0
+
+    while s:
+        m = _wordchars_re.match(s, pos)
+        end = m.end()
+        if end == len(s):
+            words.append(s[:end])
+            break
+
+        if s[end] == ' ':               # unescaped, unquoted space: now
+            words.append(s[:end])       # we definitely have a word delimiter
+            s = string.lstrip(s[end:])
+            pos = 0
+
+        elif s[end] == '\\':            # preserve whatever is being escaped;
+                                        # will become part of the current word
+            s = s[:end] + s[end+1:]
+            pos = end+1
+
+        else:
+            if s[end] == "'":           # slurp singly-quoted string
+                m = _squote_re.match(s, end)
+            elif s[end] == '"':         # slurp doubly-quoted string
+                m = _dquote_re.match(s, end)
+            else:
+                raise RuntimeError, \
+                      "this can't happen (bad char '%c')" % s[end]
+
+            if m is None:
+                raise ValueError, \
+                      "bad string (mismatched %s quotes?)" % s[end]
+
+            (beg, end) = m.span()
+            s = s[:beg] + s[beg+1:end-1] + s[end:]
+            pos = m.end() - 2
+
+        if pos >= len(s):
+            words.append(s)
+            break
+
+    return words
+
+# split_quoted ()
author	Greg Ward <gward@python.net>	2000-06-24 20:40:02 (GMT)
committer	Greg Ward <gward@python.net>	2000-06-24 20:40:02 (GMT)
commit	6a2a3dbec59f267e05c0c507457dfd234263237b (patch)
tree	0d8a0e36983bfc49a9a45e53e7b246dcd396893b /Lib/distutils/util.py
parent	c3a43b4f9bbfba0685c99302c6144c3d0475ff21 (diff)
download	cpython-6a2a3dbec59f267e05c0c507457dfd234263237b.zip cpython-6a2a3dbec59f267e05c0c507457dfd234263237b.tar.gz cpython-6a2a3dbec59f267e05c0c507457dfd234263237b.tar.bz2