diff options
author | Greg Ward <gward@python.net> | 2000-06-24 20:40:02 (GMT) |
---|---|---|
committer | Greg Ward <gward@python.net> | 2000-06-24 20:40:02 (GMT) |
commit | 6a2a3dbec59f267e05c0c507457dfd234263237b (patch) | |
tree | 0d8a0e36983bfc49a9a45e53e7b246dcd396893b /Lib/distutils/util.py | |
parent | c3a43b4f9bbfba0685c99302c6144c3d0475ff21 (diff) | |
download | cpython-6a2a3dbec59f267e05c0c507457dfd234263237b.zip cpython-6a2a3dbec59f267e05c0c507457dfd234263237b.tar.gz cpython-6a2a3dbec59f267e05c0c507457dfd234263237b.tar.bz2 |
Added 'split_quoted()' function to deal with strings that are quoted in
Unix shell-like syntax (eg. in Python's Makefile, for one thing -- now that
I have this function, I'll probably allow quoted strings in config files too.
Diffstat (limited to 'Lib/distutils/util.py')
-rw-r--r-- | Lib/distutils/util.py | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/Lib/distutils/util.py b/Lib/distutils/util.py index 74df8aa..5c1de78 100644 --- a/Lib/distutils/util.py +++ b/Lib/distutils/util.py @@ -166,3 +166,70 @@ def grok_environment_error (exc, prefix="error: "): error = prefix + str(exc[-1]) return error + + +# Needed by 'split_quoted()' +_wordchars_re = re.compile(r'[^\\\'\"\ ]*') +_squote_re = re.compile(r"'(?:[^'\\]|\\.)*'") +_dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"') + +def split_quoted (s): + """Split a string up according to Unix shell-like rules for quotes and + backslashes. In short: words are delimited by spaces, as long as those + spaces are not escaped by a backslash, or inside a quoted string. + Single and double quotes are equivalent, and the quote characters can + be backslash-escaped. The backslash is stripped from any two-character + escape sequence, leaving only the escaped character. The quote + characters are stripped from any quoted string. Returns a list of + words. + """ + + # This is a nice algorithm for splitting up a single string, since it + # doesn't require character-by-character examination. It was a little + # bit of a brain-bender to get it working right, though... + + s = string.strip(s) + words = [] + pos = 0 + + while s: + m = _wordchars_re.match(s, pos) + end = m.end() + if end == len(s): + words.append(s[:end]) + break + + if s[end] == ' ': # unescaped, unquoted space: now + words.append(s[:end]) # we definitely have a word delimiter + s = string.lstrip(s[end:]) + pos = 0 + + elif s[end] == '\\': # preserve whatever is being escaped; + # will become part of the current word + s = s[:end] + s[end+1:] + pos = end+1 + + else: + if s[end] == "'": # slurp singly-quoted string + m = _squote_re.match(s, end) + elif s[end] == '"': # slurp doubly-quoted string + m = _dquote_re.match(s, end) + else: + raise RuntimeError, \ + "this can't happen (bad char '%c')" % s[end] + + if m is None: + raise ValueError, \ + "bad string (mismatched %s quotes?)" % s[end] + + (beg, end) = m.span() + s = s[:beg] + s[beg+1:end-1] + s[end:] + pos = m.end() - 2 + + if pos >= len(s): + words.append(s) + break + + return words + +# split_quoted () |