Kaydet (Commit) 030cfe26 authored tarafından Éric Araujo's avatar Éric Araujo

Use bytes regex instead of decoding whole pages

üst c8f9c81c
...@@ -159,22 +159,20 @@ class Crawler(BaseClient): ...@@ -159,22 +159,20 @@ class Crawler(BaseClient):
Return a list of names. Return a list of names.
""" """
with self._open_url(self.index_url) as index: if '*' in name:
if '*' in name: name.replace('*', '.*')
name.replace('*', '.*') else:
else: name = "%s%s%s" % ('*.?', name, '*.?')
name = "%s%s%s" % ('*.?', name, '*.?') name = name.replace('*', '[^<]*') # avoid matching end tag
name = name.replace('*', '[^<]*') # avoid matching end tag pattern = ('<a[^>]*>(%s)</a>' % name).encode('utf-8')
projectname = re.compile('<a[^>]*>(%s)</a>' % name, re.I) projectname = re.compile(pattern, re.I)
matching_projects = [] matching_projects = []
with self._open_url(self.index_url) as index:
index_content = index.read() index_content = index.read()
# FIXME should use bytes I/O and regexes instead of decoding
index_content = index_content.decode()
for match in projectname.finditer(index_content): for match in projectname.finditer(index_content):
project_name = match.group(1) project_name = match.group(1).decode('utf-8')
matching_projects.append(self._get_project(project_name)) matching_projects.append(self._get_project(project_name))
return matching_projects return matching_projects
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment