diff options
author | Evan <evanunderscore@gmail.com> | 2019-06-01 19:09:22 (GMT) |
---|---|---|
committer | Vinay Sajip <vinay_sajip@yahoo.co.uk> | 2019-06-01 19:09:22 (GMT) |
commit | 56624a99a916fd27152d5b23364303acc0d707de (patch) | |
tree | 469ecf27c685101302f1c9c365f394df174e68e9 /Lib | |
parent | 2b843ac0ae745026ce39514573c5d075137bef65 (diff) | |
download | cpython-56624a99a916fd27152d5b23364303acc0d707de.zip cpython-56624a99a916fd27152d5b23364303acc0d707de.tar.gz cpython-56624a99a916fd27152d5b23364303acc0d707de.tar.bz2 |
bpo-28595: Allow shlex whitespace_split with punctuation_chars (GH-2071)
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/shlex.py | 3 | ||||
-rw-r--r-- | Lib/test/test_shlex.py | 46 |
2 files changed, 38 insertions, 11 deletions
diff --git a/Lib/shlex.py b/Lib/shlex.py index fb1130d..edea077 100644 --- a/Lib/shlex.py +++ b/Lib/shlex.py @@ -246,7 +246,8 @@ class shlex: escapedstate = 'a' self.state = nextchar elif (nextchar in self.wordchars or nextchar in self.quotes - or self.whitespace_split): + or (self.whitespace_split and + nextchar not in self.punctuation_chars)): self.token += nextchar else: if self.punctuation_chars: diff --git a/Lib/test/test_shlex.py b/Lib/test/test_shlex.py index a432610..376c5e8 100644 --- a/Lib/test/test_shlex.py +++ b/Lib/test/test_shlex.py @@ -1,4 +1,5 @@ import io +import itertools import shlex import string import unittest @@ -183,10 +184,12 @@ class ShlexTest(unittest.TestCase): src = ['echo hi %s echo bye' % delimiter, 'echo hi%secho bye' % delimiter] ref = ['echo', 'hi', delimiter, 'echo', 'bye'] - for ss in src: + for ss, ws in itertools.product(src, (False, True)): s = shlex.shlex(ss, punctuation_chars=True) + s.whitespace_split = ws result = list(s) - self.assertEqual(ref, result, "While splitting '%s'" % ss) + self.assertEqual(ref, result, + "While splitting '%s' [ws=%s]" % (ss, ws)) def testSyntaxSplitSemicolon(self): """Test handling of syntax splitting of ;""" @@ -197,10 +200,12 @@ class ShlexTest(unittest.TestCase): 'echo hi%s echo bye' % delimiter, 'echo hi%secho bye' % delimiter] ref = ['echo', 'hi', delimiter, 'echo', 'bye'] - for ss in src: + for ss, ws in itertools.product(src, (False, True)): s = shlex.shlex(ss, punctuation_chars=True) + s.whitespace_split = ws result = list(s) - self.assertEqual(ref, result, "While splitting '%s'" % ss) + self.assertEqual(ref, result, + "While splitting '%s' [ws=%s]" % (ss, ws)) def testSyntaxSplitRedirect(self): """Test handling of syntax splitting of >""" @@ -211,10 +216,11 @@ class ShlexTest(unittest.TestCase): 'echo hi%s out' % delimiter, 'echo hi%sout' % delimiter] ref = ['echo', 'hi', delimiter, 'out'] - for ss in src: + for ss, ws in itertools.product(src, (False, True)): s = shlex.shlex(ss, punctuation_chars=True) result = list(s) - self.assertEqual(ref, result, "While splitting '%s'" % ss) + self.assertEqual(ref, result, + "While splitting '%s' [ws=%s]" % (ss, ws)) def testSyntaxSplitParen(self): """Test handling of syntax splitting of ()""" @@ -222,18 +228,25 @@ class ShlexTest(unittest.TestCase): src = ['( echo hi )', '(echo hi)'] ref = ['(', 'echo', 'hi', ')'] - for ss in src: + for ss, ws in itertools.product(src, (False, True)): s = shlex.shlex(ss, punctuation_chars=True) + s.whitespace_split = ws result = list(s) - self.assertEqual(ref, result, "While splitting '%s'" % ss) + self.assertEqual(ref, result, + "While splitting '%s' [ws=%s]" % (ss, ws)) def testSyntaxSplitCustom(self): """Test handling of syntax splitting with custom chars""" + ss = "~/a&&b-c --color=auto||d *.py?" ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?'] - ss = "~/a && b-c --color=auto || d *.py?" s = shlex.shlex(ss, punctuation_chars="|") result = list(s) - self.assertEqual(ref, result, "While splitting '%s'" % ss) + self.assertEqual(ref, result, "While splitting '%s' [ws=False]" % ss) + ref = ['~/a&&b-c', '--color=auto', '||', 'd', '*.py?'] + s = shlex.shlex(ss, punctuation_chars="|") + s.whitespace_split = True + result = list(s) + self.assertEqual(ref, result, "While splitting '%s' [ws=True]" % ss) def testTokenTypes(self): """Test that tokens are split with types as expected.""" @@ -293,6 +306,19 @@ class ShlexTest(unittest.TestCase): s = shlex.shlex("'')abc", punctuation_chars=True) self.assertEqual(list(s), expected) + def testUnicodeHandling(self): + """Test punctuation_chars and whitespace_split handle unicode.""" + ss = "\u2119\u01b4\u2602\u210c\u00f8\u1f24" + # Should be parsed as one complete token (whitespace_split=True). + ref = ['\u2119\u01b4\u2602\u210c\u00f8\u1f24'] + s = shlex.shlex(ss, punctuation_chars=True) + s.whitespace_split = True + self.assertEqual(list(s), ref) + # Without whitespace_split, uses wordchars and splits on all. + ref = ['\u2119', '\u01b4', '\u2602', '\u210c', '\u00f8', '\u1f24'] + s = shlex.shlex(ss, punctuation_chars=True) + self.assertEqual(list(s), ref) + def testQuote(self): safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./' unicode_sample = '\xe9\xe0\xdf' # e + acute accent, a + grave, sharp s |