summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVinay Sajip <vinay_sajip@yahoo.co.uk>2020-02-28 14:26:27 (GMT)
committerGitHub <noreply@github.com>2020-02-28 14:26:27 (GMT)
commit4f17c5cd9a1ec50fe8de7ef68c39220a01a862cb (patch)
tree627e549365ac1eadeae19e013ec5c218898c0b6a
parente263bb1e97ae8f84fb4f2ab5b0c4f529a2e5696d (diff)
downloadcpython-4f17c5cd9a1ec50fe8de7ef68c39220a01a862cb.zip
cpython-4f17c5cd9a1ec50fe8de7ef68c39220a01a862cb.tar.gz
cpython-4f17c5cd9a1ec50fe8de7ef68c39220a01a862cb.tar.bz2
bpo-12915: Improve Unicode support for package names and attributes. (GH-18517)
-rw-r--r--Lib/pkgutil.py13
-rw-r--r--Lib/test/test_pkgutil.py32
2 files changed, 39 insertions, 6 deletions
diff --git a/Lib/pkgutil.py b/Lib/pkgutil.py
index 4bc3083..4c18467 100644
--- a/Lib/pkgutil.py
+++ b/Lib/pkgutil.py
@@ -638,8 +638,8 @@ def get_data(package, resource):
return loader.get_data(resource_name)
-_DOTTED_WORDS = r'[a-z_]\w*(\.[a-z_]\w*)*'
-_NAME_PATTERN = re.compile(f'^({_DOTTED_WORDS})(:({_DOTTED_WORDS})?)?$', re.I)
+_DOTTED_WORDS = r'(?!\d)(\w+)(\.(?!\d)(\w+))*'
+_NAME_PATTERN = re.compile(f'^(?P<pkg>{_DOTTED_WORDS})(?P<cln>:(?P<obj>{_DOTTED_WORDS})?)?$', re.U)
del _DOTTED_WORDS
def resolve_name(name):
@@ -677,11 +677,12 @@ def resolve_name(name):
m = _NAME_PATTERN.match(name)
if not m:
raise ValueError(f'invalid format: {name!r}')
- groups = m.groups()
- if groups[2]:
+ gd = m.groupdict()
+ if gd.get('cln'):
# there is a colon - a one-step import is all that's needed
- mod = importlib.import_module(groups[0])
- parts = groups[3].split('.') if groups[3] else []
+ mod = importlib.import_module(gd['pkg'])
+ parts = gd.get('obj')
+ parts = parts.split('.') if parts else []
else:
# no colon - have to iterate to find the package boundary
parts = name.split('.')
diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py
index 906150b..53456c2 100644
--- a/Lib/test/test_pkgutil.py
+++ b/Lib/test/test_pkgutil.py
@@ -229,8 +229,40 @@ class PkgutilTests(unittest.TestCase):
('logging.handlers:SysLogHandler.NO_SUCH_VALUE', AttributeError),
('logging.handlers.SysLogHandler.NO_SUCH_VALUE', AttributeError),
('ZeroDivisionError', ImportError),
+ ('os.path.9abc', ValueError),
+ ('9abc', ValueError),
)
+ # add some Unicode package names to the mix.
+
+ unicode_words = ('\u0935\u092e\u0938',
+ '\xe9', '\xc8',
+ '\uc548\ub155\ud558\uc138\uc694',
+ '\u3055\u3088\u306a\u3089',
+ '\u3042\u308a\u304c\u3068\u3046',
+ '\u0425\u043e\u0440\u043e\u0448\u043e',
+ '\u0441\u043f\u0430\u0441\u0438\u0431\u043e',
+ '\u73b0\u4ee3\u6c49\u8bed\u5e38\u7528\u5b57\u8868')
+
+ for uw in unicode_words:
+ d = os.path.join(self.dirname, uw)
+ os.makedirs(d, exist_ok=True)
+ # make an empty __init__.py file
+ f = os.path.join(d, '__init__.py')
+ with open(f, 'w') as f:
+ f.write('')
+ f.flush()
+ # now import the package we just created; clearing the caches is
+ # needed, otherwise the newly created package isn't found
+ importlib.invalidate_caches()
+ mod = importlib.import_module(uw)
+ success_cases += (uw, mod),
+ if len(uw) > 1:
+ failure_cases += (uw[:-1], ImportError),
+
+ # add an example with a Unicode digit at the start
+ failure_cases += ('\u0966\u0935\u092e\u0938', ValueError),
+
for s, expected in success_cases:
with self.subTest(s=s):
o = pkgutil.resolve_name(s)