summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdam Turner <9087854+AA-Turner@users.noreply.github.com>2025-04-24 15:10:46 (GMT)
committerGitHub <noreply@github.com>2025-04-24 15:10:46 (GMT)
commit06a26fda607fb1a5e108cf82a0458c8ebf97f5d2 (patch)
treed7a9b2a0c3d5288c400d3c647deaaf5951b35582
parent984a314b9ff6c1953a714f21d6390926b604332d (diff)
downloadcpython-06a26fda607fb1a5e108cf82a0458c8ebf97f5d2.zip
cpython-06a26fda607fb1a5e108cf82a0458c8ebf97f5d2.tar.gz
cpython-06a26fda607fb1a5e108cf82a0458c8ebf97f5d2.tar.bz2
gh-118761: Optimise import time for ``shlex`` (#132036)
-rw-r--r--Lib/shlex.py17
-rw-r--r--Lib/test/test_shlex.py4
-rw-r--r--Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst3
3 files changed, 17 insertions, 7 deletions
diff --git a/Lib/shlex.py b/Lib/shlex.py
index f482161..5bf6e0d 100644
--- a/Lib/shlex.py
+++ b/Lib/shlex.py
@@ -7,11 +7,7 @@
# iterator interface by Gustavo Niemeyer, April 2003.
# changes to tokenize more like Posix shells by Vinay Sajip, July 2016.
-import os
-import re
import sys
-from collections import deque
-
from io import StringIO
__all__ = ["shlex", "split", "quote", "join"]
@@ -20,6 +16,8 @@ class shlex:
"A lexical analyzer class for simple shell-like syntaxes."
def __init__(self, instream=None, infile=None, posix=False,
punctuation_chars=False):
+ from collections import deque # deferred import for performance
+
if isinstance(instream, str):
instream = StringIO(instream)
if instream is not None:
@@ -278,6 +276,7 @@ class shlex:
def sourcehook(self, newfile):
"Hook called on a filename to be sourced."
+ import os.path
if newfile[0] == '"':
newfile = newfile[1:-1]
# This implements cpp-like semantics for relative-path inclusion.
@@ -318,13 +317,17 @@ def join(split_command):
return ' '.join(quote(arg) for arg in split_command)
-_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search
-
def quote(s):
"""Return a shell-escaped version of the string *s*."""
if not s:
return "''"
- if _find_unsafe(s) is None:
+
+ # Use bytes.translate() for performance
+ safe_chars = (b'%+,-./0123456789:=@'
+ b'ABCDEFGHIJKLMNOPQRSTUVWXYZ_'
+ b'abcdefghijklmnopqrstuvwxyz')
+ # No quoting is needed if `s` is an ASCII string consisting only of `safe_chars`
+ if s.isascii() and not s.encode().translate(None, delete=safe_chars):
return s
# use single quotes, and put single quotes into double quotes
diff --git a/Lib/test/test_shlex.py b/Lib/test/test_shlex.py
index 797c91e..f35571e 100644
--- a/Lib/test/test_shlex.py
+++ b/Lib/test/test_shlex.py
@@ -3,6 +3,7 @@ import itertools
import shlex
import string
import unittest
+from test.support import import_helper
# The original test data set was from shellwords, by Hartmut Goebel.
@@ -363,6 +364,9 @@ class ShlexTest(unittest.TestCase):
with self.assertRaises(AttributeError):
shlex_instance.punctuation_chars = False
+ def test_lazy_imports(self):
+ import_helper.ensure_lazy_imports('shlex', {'collections', 're', 'os'})
+
# Allow this test to be used with old shlex.py
if not getattr(shlex, "split", None):
diff --git a/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst b/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst
new file mode 100644
index 0000000..6b4b3ed
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst
@@ -0,0 +1,3 @@
+Improve import times by up to 33x for the :mod:`shlex` module,
+and improve the performance of :func:`shlex.quote` by up to 12x.
+Patch by Adam Turner.