Kaydet (Commit) fad81f08 authored tarafından Guido van Rossum's avatar Guido van Rossum

Be explicit about scheme_chars -- string.letters is locale dependent

so we can't use it.

While I'm at it, got rid of string module use.  (Found several new
hard special cases for a hypothetical conversion tool: from string
import join, find, rfind; and a local assignment "find=string.find".)
üst 68abe832
...@@ -4,10 +4,6 @@ See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, ...@@ -4,10 +4,6 @@ See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
UC Irvine, June 1995. UC Irvine, June 1995.
""" """
# Standard/builtin Python modules
import string
from string import join, split, rfind
# A classification of schemes ('' means apply by default) # A classification of schemes ('' means apply by default)
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file', uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file',
'https', 'shttp', 'https', 'shttp',
...@@ -31,7 +27,10 @@ uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais', ...@@ -31,7 +27,10 @@ uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais',
'file', 'prospero', ''] 'file', 'prospero', '']
# Characters valid in scheme names # Characters valid in scheme names
scheme_chars = string.letters + string.digits + '+-.' scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'0123456789'
'+-.')
MAX_CACHE_SIZE = 20 MAX_CACHE_SIZE = 20
_parse_cache = {} _parse_cache = {}
...@@ -54,29 +53,28 @@ def urlparse(url, scheme = '', allow_fragments = 1): ...@@ -54,29 +53,28 @@ def urlparse(url, scheme = '', allow_fragments = 1):
return cached return cached
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
clear_cache() clear_cache()
find = string.find
netloc = path = params = query = fragment = '' netloc = path = params = query = fragment = ''
i = find(url, ':') i = url.find(':')
if i > 0: if i > 0:
if url[:i] == 'http': # optimize the common case if url[:i] == 'http': # optimize the common case
scheme = string.lower(url[:i]) scheme = url[:i].lower()
url = url[i+1:] url = url[i+1:]
if url[:2] == '//': if url[:2] == '//':
i = find(url, '/', 2) i = url.find('/', 2)
if i < 0: if i < 0:
i = len(url) i = len(url)
netloc = url[2:i] netloc = url[2:i]
url = url[i:] url = url[i:]
if allow_fragments: if allow_fragments:
i = string.rfind(url, '#') i = url.rfind('#')
if i >= 0: if i >= 0:
fragment = url[i+1:] fragment = url[i+1:]
url = url[:i] url = url[:i]
i = find(url, '?') i = url.find('?')
if i >= 0: if i >= 0:
query = url[i+1:] query = url[i+1:]
url = url[:i] url = url[:i]
i = find(url, ';') i = url.find(';')
if i >= 0: if i >= 0:
params = url[i+1:] params = url[i+1:]
url = url[:i] url = url[:i]
...@@ -87,23 +85,23 @@ def urlparse(url, scheme = '', allow_fragments = 1): ...@@ -87,23 +85,23 @@ def urlparse(url, scheme = '', allow_fragments = 1):
if c not in scheme_chars: if c not in scheme_chars:
break break
else: else:
scheme, url = string.lower(url[:i]), url[i+1:] scheme, url = url[:i].lower(), url[i+1:]
if scheme in uses_netloc: if scheme in uses_netloc:
if url[:2] == '//': if url[:2] == '//':
i = find(url, '/', 2) i = url.find('/', 2)
if i < 0: if i < 0:
i = len(url) i = len(url)
netloc, url = url[2:i], url[i:] netloc, url = url[2:i], url[i:]
if allow_fragments and scheme in uses_fragment: if allow_fragments and scheme in uses_fragment:
i = string.rfind(url, '#') i = url.rfind('#')
if i >= 0: if i >= 0:
url, fragment = url[:i], url[i+1:] url, fragment = url[:i], url[i+1:]
if scheme in uses_query: if scheme in uses_query:
i = find(url, '?') i = url.find('?')
if i >= 0: if i >= 0:
url, query = url[:i], url[i+1:] url, query = url[:i], url[i+1:]
if scheme in uses_params: if scheme in uses_params:
i = find(url, ';') i = url.find(';')
if i >= 0: if i >= 0:
url, params = url[:i], url[i+1:] url, params = url[:i], url[i+1:]
tuple = scheme, netloc, url, params, query, fragment tuple = scheme, netloc, url, params, query, fragment
...@@ -151,7 +149,7 @@ def urljoin(base, url, allow_fragments = 1): ...@@ -151,7 +149,7 @@ def urljoin(base, url, allow_fragments = 1):
if not path: if not path:
return urlunparse((scheme, netloc, bpath, return urlunparse((scheme, netloc, bpath,
params, query or bquery, fragment)) params, query or bquery, fragment))
segments = split(bpath, '/')[:-1] + split(path, '/') segments = bpath.split('/')[:-1] + path.split('/')
# XXX The stuff below is bogus in various ways... # XXX The stuff below is bogus in various ways...
if segments[-1] == '.': if segments[-1] == '.':
segments[-1] = '' segments[-1] = ''
...@@ -171,7 +169,7 @@ def urljoin(base, url, allow_fragments = 1): ...@@ -171,7 +169,7 @@ def urljoin(base, url, allow_fragments = 1):
segments[-1] = '' segments[-1] = ''
elif len(segments) >= 2 and segments[-1] == '..': elif len(segments) >= 2 and segments[-1] == '..':
segments[-2:] = [''] segments[-2:] = ['']
return urlunparse((scheme, netloc, join(segments, '/'), return urlunparse((scheme, netloc, '/'.join(segments),
params, query, fragment)) params, query, fragment))
def urldefrag(url): def urldefrag(url):
...@@ -236,7 +234,7 @@ def test(): ...@@ -236,7 +234,7 @@ def test():
while 1: while 1:
line = fp.readline() line = fp.readline()
if not line: break if not line: break
words = string.split(line) words = line.split()
if not words: if not words:
continue continue
url = words[0] url = words[0]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment