Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
749057be
Kaydet (Commit)
749057be
authored
Şub 22, 1994
tarafından
Guido van Rossum
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Redesigned as a class
üst
76ca3c17
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
193 additions
and
114 deletions
+193
-114
urlopen.py
Lib/urlopen.py
+193
-114
No files found.
Lib/urlopen.py
Dosyayı görüntüle @
749057be
...
@@ -6,110 +6,170 @@
...
@@ -6,110 +6,170 @@
# IETF URL Working Group 14 July 1993
# IETF URL Working Group 14 July 1993
# draft-ietf-uri-url-01.txt
# draft-ietf-uri-url-01.txt
#
#
# The object returned by urlopen() will differ per protocol.
# The object returned by URLopener().open(file) will differ per
# All you know is that is has methods read(), fileno(), close() and info().
# protocol. All you know is that is has methods read(), readline(),
# The read(), fileno() and close() methods work like those of open files.
# readlines(), fileno(), close() and info(). The read*(), fileno()
# and close() methods work like those of open files.
# The info() method returns an rfc822.Message object which can be
# The info() method returns an rfc822.Message object which can be
# used to query various info about the object, if available.
# used to query various info about the object, if available.
# (rfc822.Message objects are queried with the getheader() method.)
# (rfc822.Message objects are queried with the getheader() method.)
import
socket
import
socket
import
regex
import
regex
import
regsub
import
string
import
rfc822
import
ftplib
# External interface -- use urlopen(file) as if it were open(file, 'r')
# This really consists of two pieces:
# (1) a class which handles opening of all sorts of URLs
# (plus assorted utilities etc.)
# (2) a set of functions for parsing URLs
# XXX Should these be separated out into different modules?
# Shortcut for basic usage
_urlopener
=
None
def
urlopen
(
url
):
def
urlopen
(
url
):
url
=
string
.
strip
(
url
)
global
_urlopener
if
url
[:
1
]
==
'<'
and
url
[
-
1
:]
==
'>'
:
url
=
string
.
strip
(
url
[
1
:
-
1
])
if
not
_urlopener
:
if
url
[:
4
]
==
'URL:'
:
url
=
string
.
strip
(
url
[
4
:])
_urlopener
=
URLopener
()
type
,
url
=
splittype
(
url
)
return
_urlopener
.
open
(
url
)
if
not
type
:
type
=
'file'
type
=
regsub
.
gsub
(
'-'
,
'_'
,
type
)
try
:
# Class to open URLs.
func
=
eval
(
'open_'
+
type
)
# This is a class rather than just a subroutine because we may need
except
NameError
:
# more than one set of global protocol-specific options.
raise
IOError
,
(
'url error'
,
'unknown url type'
,
type
)
try
:
return
func
(
url
)
except
socket
.
error
,
msg
:
raise
IOError
,
(
'socket error'
,
msg
)
# Each routine of the form open_<type> knows how to open that type of URL
# Use HTTP protocol
def
open_http
(
url
):
import
httplib
host
,
selector
=
splithost
(
url
)
h
=
httplib
.
HTTP
(
host
)
h
.
putrequest
(
'GET'
,
selector
)
errcode
,
errmsg
,
headers
=
h
.
getreply
()
if
errcode
==
200
:
return
makefile
(
h
.
getfile
(),
headers
)
else
:
raise
IOError
,
(
'http error'
,
errcode
,
errmsg
,
headers
)
# Empty rfc822.Message object
noheaders
=
rfc822
.
Message
(
open
(
'/dev/null'
,
'r'
))
noheaders
.
fp
.
close
()
# Recycle file descriptor
# Use Gopher protocol
def
open_gopher
(
url
):
import
gopherlib
host
,
selector
=
splithost
(
url
)
type
,
selector
=
splitgophertype
(
selector
)
selector
,
query
=
splitquery
(
selector
)
if
query
:
fp
=
gopherlib
.
send_query
(
selector
,
query
,
host
)
else
:
fp
=
gopherlib
.
send_selector
(
selector
,
host
)
return
makefile
(
fp
,
noheaders
)
# Use local file or FTP depending on form of URL
localhost
=
socket
.
gethostbyname
(
'localhost'
)
thishost
=
socket
.
gethostbyname
(
socket
.
gethostname
())
def
open_file
(
url
):
host
,
file
=
splithost
(
url
)
if
not
host
:
return
makefile
(
open
(
file
,
'r'
),
noheaders
)
host
,
port
=
splitport
(
host
)
if
not
port
and
socket
.
gethostbyname
(
host
)
in
(
localhost
,
thishost
):
try
:
fp
=
open
(
file
,
'r'
)
except
IOError
:
fp
=
None
if
fp
:
return
makefile
(
fp
,
noheaders
)
return
open_ftp
(
url
)
# Use FTP protocol
ftpcache
=
{}
ftpcache
=
{}
ftperrors
=
(
ftplib
.
error_reply
,
class
URLopener
:
ftplib
.
error_temp
,
ftplib
.
error_perm
,
ftplib
.
error_proto
)
def
open_ftp
(
url
):
host
,
file
=
splithost
(
url
)
host
,
port
=
splitport
(
host
)
host
=
socket
.
gethostbyname
(
host
)
if
not
port
:
port
=
ftplib
.
FTP_PORT
key
=
(
host
,
port
)
try
:
if
not
ftpcache
.
has_key
(
key
):
ftpcache
[
key
]
=
ftpwrapper
(
host
,
port
)
return
makefile
(
ftpcache
[
key
]
.
retrfile
(
file
),
noheaders
)
except
ftperrors
,
msg
:
raise
IOError
,
(
'ftp error'
,
msg
)
# Constructor
def
__init__
(
self
):
self
.
addheaders
=
[]
self
.
ftpcache
=
ftpcache
# Undocumented feature: you can use a different
# ftp cache by assigning to the .ftpcache member;
# in case you want logically independent URL openers
# Utility classes
# Add a header to be used by the HTTP interface only
# e.g. u.addheader('Accept', 'sound/basic')
def
addheader
(
self
,
*
args
):
self
.
addheaders
.
append
(
args
)
# Class used to add an info() method to a file object
# External interface
class
makefile
:
# Use URLopener().open(file) instead of open(file, 'r')
def
__init__
(
self
,
fp
,
headers
):
def
open
(
self
,
url
):
self
.
fp
=
fp
import
string
self
.
headers
=
headers
url
=
string
.
strip
(
url
)
self
.
read
=
self
.
fp
.
read
if
url
[:
1
]
==
'<'
and
url
[
-
1
:]
==
'>'
:
self
.
fileno
=
self
.
fp
.
fileno
url
=
string
.
strip
(
url
[
1
:
-
1
])
self
.
close
=
self
.
fp
.
close
if
url
[:
4
]
==
'URL:'
:
url
=
string
.
strip
(
url
[
4
:])
def
info
(
self
):
type
,
url
=
splittype
(
url
)
return
self
.
headers
if
not
type
:
type
=
'file'
name
=
'open_'
+
type
if
'-'
in
name
:
import
regsub
name
=
regsub
.
gsub
(
'-'
,
'_'
,
name
)
if
not
hasattr
(
self
,
name
):
raise
IOError
,
(
'url error'
,
'unknown url type'
,
type
)
meth
=
getattr
(
self
,
name
)
try
:
return
meth
(
url
)
except
socket
.
error
,
msg
:
raise
IOError
,
(
'socket error'
,
msg
)
# Each method named open_<type> knows how to open that type of URL
# Use HTTP protocol
def
open_http
(
self
,
url
):
import
httplib
host
,
selector
=
splithost
(
url
)
h
=
httplib
.
HTTP
(
host
)
h
.
putrequest
(
'GET'
,
selector
)
for
args
in
self
.
addheaders
:
apply
(
h
.
putheader
,
args
)
errcode
,
errmsg
,
headers
=
h
.
getreply
()
if
errcode
==
200
:
return
addinfo
(
h
.
getfile
(),
headers
)
else
:
raise
IOError
,
(
'http error'
,
errcode
,
errmsg
,
headers
)
# Use Gopher protocol
def
open_gopher
(
self
,
url
):
import
gopherlib
host
,
selector
=
splithost
(
url
)
type
,
selector
=
splitgophertype
(
selector
)
selector
,
query
=
splitquery
(
selector
)
if
query
:
fp
=
gopherlib
.
send_query
(
selector
,
query
,
host
)
else
:
fp
=
gopherlib
.
send_selector
(
selector
,
host
)
return
addinfo
(
fp
,
noheaders
())
# Use local file or FTP depending on form of URL
def
open_file
(
self
,
url
):
host
,
file
=
splithost
(
url
)
if
not
host
:
return
addinfo
(
open
(
file
,
'r'
),
noheaders
())
host
,
port
=
splitport
(
host
)
if
not
port
and
socket
.
gethostbyname
(
host
)
in
(
localhost
(),
thishost
()):
try
:
fp
=
open
(
file
,
'r'
)
except
IOError
:
fp
=
None
if
fp
:
return
addinfo
(
fp
,
noheaders
())
return
self
.
open_ftp
(
url
)
# Use FTP protocol
def
open_ftp
(
self
,
url
):
host
,
file
=
splithost
(
url
)
host
,
port
=
splitport
(
host
)
host
=
socket
.
gethostbyname
(
host
)
if
not
port
:
import
ftplib
port
=
ftplib
.
FTP_PORT
key
=
(
host
,
port
)
try
:
if
not
self
.
ftpcache
.
has_key
(
key
):
self
.
ftpcache
[
key
]
=
ftpwrapper
(
host
,
port
)
return
addinfo
(
self
.
ftpcache
[
key
]
.
retrfile
(
file
),
noheaders
())
except
ftperrors
(),
msg
:
raise
IOError
,
(
'ftp error'
,
msg
)
# Utility functions
# Return the IP address of the magic hostname 'localhost'
_localhost
=
None
def
localhost
():
global
_localhost
if
not
_localhost
:
_localhost
=
socket
.
gethostbyname
(
'localhost'
)
return
_localhost
# Return the IP address of the current host
_thishost
=
None
def
thishost
():
global
_thishost
if
not
_thishost
:
_thishost
=
socket
.
gethostbyname
(
socket
.
gethostname
())
return
_thishost
# Return the set of errors raised by the FTP class
_ftperrors
=
None
def
ftperrors
():
global
_ftperrors
if
not
_ftperrors
:
import
ftplib
_ftperrors
=
(
ftplib
.
error_reply
,
ftplib
.
error_temp
,
ftplib
.
error_perm
,
ftplib
.
error_proto
)
return
_ftperrors
# Return an empty rfc822.Message object
_noheaders
=
None
def
noheaders
():
global
_noheaders
if
not
_noheaders
:
import
rfc822
_noheaders
=
rfc822
.
Message
(
open
(
'/dev/null'
,
'r'
))
_noheaders
.
fp
.
close
()
# Recycle file descriptor
return
_noheaders
# Utility classes
# Class used by open_ftp() for cache of open FTP connections
# Class used by open_ftp() for cache of open FTP connections
class
ftpwrapper
:
class
ftpwrapper
:
...
@@ -118,10 +178,12 @@ class ftpwrapper:
...
@@ -118,10 +178,12 @@ class ftpwrapper:
self
.
port
=
port
self
.
port
=
port
self
.
init
()
self
.
init
()
def
init
(
self
):
def
init
(
self
):
import
ftplib
self
.
ftp
=
ftplib
.
FTP
()
self
.
ftp
=
ftplib
.
FTP
()
self
.
ftp
.
connect
(
self
.
host
,
self
.
port
)
self
.
ftp
.
connect
(
self
.
host
,
self
.
port
)
self
.
ftp
.
login
()
self
.
ftp
.
login
()
def
retrfile
(
self
,
file
):
def
retrfile
(
self
,
file
):
import
ftplib
try
:
try
:
self
.
ftp
.
voidcmd
(
'TYPE I'
)
self
.
ftp
.
voidcmd
(
'TYPE I'
)
except
ftplib
.
all_errors
:
except
ftplib
.
all_errors
:
...
@@ -140,27 +202,43 @@ class ftpwrapper:
...
@@ -140,27 +202,43 @@ class ftpwrapper:
if
file
:
cmd
=
'NLST '
+
file
if
file
:
cmd
=
'NLST '
+
file
else
:
cmd
=
'NLST'
else
:
cmd
=
'NLST'
conn
=
self
.
ftp
.
transfercmd
(
cmd
)
conn
=
self
.
ftp
.
transfercmd
(
cmd
)
return
fakefile
(
self
.
ftp
,
conn
)
return
addclosehook
(
conn
.
makefile
(
'r'
),
self
.
ftp
.
voidresp
)
# Class used by ftpwrapper to handle response when transfer is complete
# Base class for addinfo and addclosehook
class
fakefile
:
class
addbase
:
def
__init__
(
self
,
ftp
,
conn
):
def
__init__
(
self
,
fp
):
self
.
ftp
=
ftp
self
.
fp
=
fp
self
.
conn
=
conn
self
.
fp
=
self
.
conn
.
makefile
(
'r'
)
self
.
read
=
self
.
fp
.
read
self
.
read
=
self
.
fp
.
read
self
.
readline
=
self
.
fp
.
readline
self
.
readlines
=
self
.
fp
.
readlines
self
.
fileno
=
self
.
fp
.
fileno
self
.
fileno
=
self
.
fp
.
fileno
def
__del__
(
self
):
def
__del__
(
self
):
self
.
close
()
self
.
close
()
def
close
(
self
):
def
close
(
self
):
self
.
conn
=
None
self
.
fp
=
None
self
.
fp
=
None
self
.
read
=
None
if
self
.
ftp
:
self
.
ftp
.
voidresp
()
# Class to add a close hook to an open file
self
.
ftp
=
None
class
addclosehook
(
addbase
):
def
__init__
(
self
,
fp
,
closehook
,
*
hookargs
):
addbase
.
__init__
(
self
,
fp
)
self
.
closehook
=
closehook
self
.
hookargs
=
hookargs
def
close
(
self
):
if
self
.
closehook
:
apply
(
self
.
closehook
,
self
.
hookargs
)
self
.
closehook
=
None
self
.
fp
=
None
# class to add an info() method to an open file
class
addinfo
(
addbase
):
def
__init__
(
self
,
fp
,
headers
):
addbase
.
__init__
(
self
,
fp
)
self
.
headers
=
headers
def
info
(
self
):
return
self
.
headers
# Utilities to
split url parts into component
s:
# Utilities to
parse URL
s:
# splittype('type:opaquestring') --> 'type', 'opaquestring'
# splittype('type:opaquestring') --> 'type', 'opaquestring'
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splitport('host:port') --> 'host', 'port'
# splitport('host:port') --> 'host', 'port'
...
@@ -168,29 +246,29 @@ class fakefile:
...
@@ -168,29 +246,29 @@ class fakefile:
# splittag('/path#tag') --> '/path', 'tag'
# splittag('/path#tag') --> '/path', 'tag'
# splitgophertype('/Xselector') --> 'X', 'selector'
# splitgophertype('/Xselector') --> 'X', 'selector'
typeprog
=
regex
.
compile
(
'^
\
([^/:]+
\
):
\
(.*
\
)$'
)
_
typeprog
=
regex
.
compile
(
'^
\
([^/:]+
\
):
\
(.*
\
)$'
)
def
splittype
(
url
):
def
splittype
(
url
):
if
typeprog
.
match
(
url
)
>=
0
:
return
typeprog
.
group
(
1
,
2
)
if
_typeprog
.
match
(
url
)
>=
0
:
return
_
typeprog
.
group
(
1
,
2
)
return
None
,
url
return
None
,
url
hostprog
=
regex
.
compile
(
'^//
\
([^/]+
\
)
\
(.*
\
)$'
)
_
hostprog
=
regex
.
compile
(
'^//
\
([^/]+
\
)
\
(.*
\
)$'
)
def
splithost
(
url
):
def
splithost
(
url
):
if
hostprog
.
match
(
url
)
>=
0
:
return
hostprog
.
group
(
1
,
2
)
if
_hostprog
.
match
(
url
)
>=
0
:
return
_
hostprog
.
group
(
1
,
2
)
return
None
,
url
return
None
,
url
portprog
=
regex
.
compile
(
'^
\
(.*
\
):
\
([0-9]+
\
)$'
)
_
portprog
=
regex
.
compile
(
'^
\
(.*
\
):
\
([0-9]+
\
)$'
)
def
splitport
(
host
):
def
splitport
(
host
):
if
portprog
.
match
(
host
)
>=
0
:
return
portprog
.
group
(
1
,
2
)
if
_portprog
.
match
(
host
)
>=
0
:
return
_
portprog
.
group
(
1
,
2
)
return
host
,
None
return
host
,
None
queryprog
=
regex
.
compile
(
'^
\
(.*
\
)
\
?
\
([^?]*
\
)$'
)
_
queryprog
=
regex
.
compile
(
'^
\
(.*
\
)
\
?
\
([^?]*
\
)$'
)
def
splitquery
(
url
):
def
splitquery
(
url
):
if
queryprog
.
match
(
url
)
>=
0
:
return
queryprog
.
group
(
1
,
2
)
if
_queryprog
.
match
(
url
)
>=
0
:
return
_
queryprog
.
group
(
1
,
2
)
return
url
,
None
return
url
,
None
tagprog
=
regex
.
compile
(
'^
\
(.*
\
)#
\
([^#]*
\
)$'
)
_
tagprog
=
regex
.
compile
(
'^
\
(.*
\
)#
\
([^#]*
\
)$'
)
def
splittag
(
url
):
def
splittag
(
url
):
if
tagprog
.
match
(
url
)
>=
0
:
return
tagprog
.
group
(
1
,
2
)
if
_tagprog
.
match
(
url
)
>=
0
:
return
_
tagprog
.
group
(
1
,
2
)
return
url
,
None
return
url
,
None
def
splitgophertype
(
selector
):
def
splitgophertype
(
selector
):
...
@@ -202,6 +280,7 @@ def splitgophertype(selector):
...
@@ -202,6 +280,7 @@ def splitgophertype(selector):
# Test program
# Test program
def
test
():
def
test
():
import
sys
import
sys
import
regsub
args
=
sys
.
argv
[
1
:]
args
=
sys
.
argv
[
1
:]
if
not
args
:
if
not
args
:
args
=
[
args
=
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment