diff options
author | Éric Araujo <merwok@netwok.org> | 2011-09-10 16:10:58 (GMT) |
---|---|---|
committer | Éric Araujo <merwok@netwok.org> | 2011-09-10 16:10:58 (GMT) |
commit | 030cfe26a336826d08362cd60c8ea4be7775844b (patch) | |
tree | 1584286aed99eb6bc31af8b2c0e5f052537ead9a /Lib/packaging | |
parent | c8f9c81cfa7fb46d3c0be9e9e5f18bfda9247984 (diff) | |
download | cpython-030cfe26a336826d08362cd60c8ea4be7775844b.zip cpython-030cfe26a336826d08362cd60c8ea4be7775844b.tar.gz cpython-030cfe26a336826d08362cd60c8ea4be7775844b.tar.bz2 |
Use bytes regex instead of decoding whole pages
Diffstat (limited to 'Lib/packaging')
-rw-r--r-- | Lib/packaging/pypi/simple.py | 22 |
1 files changed, 10 insertions, 12 deletions
diff --git a/Lib/packaging/pypi/simple.py b/Lib/packaging/pypi/simple.py index 710355d..76aad02 100644 --- a/Lib/packaging/pypi/simple.py +++ b/Lib/packaging/pypi/simple.py @@ -159,22 +159,20 @@ class Crawler(BaseClient): Return a list of names. """ - with self._open_url(self.index_url) as index: - if '*' in name: - name.replace('*', '.*') - else: - name = "%s%s%s" % ('*.?', name, '*.?') - name = name.replace('*', '[^<]*') # avoid matching end tag - projectname = re.compile('<a[^>]*>(%s)</a>' % name, re.I) - matching_projects = [] + if '*' in name: + name.replace('*', '.*') + else: + name = "%s%s%s" % ('*.?', name, '*.?') + name = name.replace('*', '[^<]*') # avoid matching end tag + pattern = ('<a[^>]*>(%s)</a>' % name).encode('utf-8') + projectname = re.compile(pattern, re.I) + matching_projects = [] + with self._open_url(self.index_url) as index: index_content = index.read() - # FIXME should use bytes I/O and regexes instead of decoding - index_content = index_content.decode() - for match in projectname.finditer(index_content): - project_name = match.group(1) + project_name = match.group(1).decode('utf-8') matching_projects.append(self._get_project(project_name)) return matching_projects |