gopherlib.py 5.43 KB
Newer Older
1
"""Gopher protocol client interface."""
2

3 4
__all__ = ["send_selector","send_query"]

5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
# Default selector, host and port
DEF_SELECTOR = '1/'
DEF_HOST     = 'gopher.micro.umn.edu'
DEF_PORT     = 70

# Recognized file types
A_TEXT       = '0'
A_MENU       = '1'
A_CSO        = '2'
A_ERROR      = '3'
A_MACBINHEX  = '4'
A_PCBINHEX   = '5'
A_UUENCODED  = '6'
A_INDEX      = '7'
A_TELNET     = '8'
A_BINARY     = '9'
A_DUPLICATE  = '+'
A_SOUND      = 's'
A_EVENT      = 'e'
A_CALENDAR   = 'c'
A_HTML       = 'h'
A_TN3270     = 'T'
A_MIME       = 'M'
A_IMAGE      = 'I'
A_WHOIS      = 'w'
A_QUERY      = 'q'
A_GIF        = 'g'
32 33
A_HTML       = 'h'          # HTML file
A_WWW        = 'w'          # WWW address
34 35 36 37 38 39
A_PLUS_IMAGE = ':'
A_PLUS_MOVIE = ';'
A_PLUS_SOUND = '<'


_names = dir()
40
_type_to_name_map = {}
41
def type_to_name(gtype):
42 43 44 45 46 47
    """Map all file types to strings; unknown types become TYPE='x'."""
    global _type_to_name_map
    if _type_to_name_map=={}:
        for name in _names:
            if name[:2] == 'A_':
                _type_to_name_map[eval(name)] = name[2:]
48
    if gtype in _type_to_name_map:
49 50
        return _type_to_name_map[gtype]
    return 'TYPE=' + `gtype`
51 52 53 54 55

# Names for characters and strings
CRLF = '\r\n'
TAB = '\t'

56
def send_selector(selector, host, port = 0):
57 58 59
    """Send a selector to a given host and port, return a file with the reply."""
    import socket
    if not port:
60
        i = host.find(':')
61
        if i >= 0:
62
            host, port = host[:i], int(host[i+1:])
63 64 65
    if not port:
        port = DEF_PORT
    elif type(port) == type(''):
66
        port = int(port)
67
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
Guido van Rossum's avatar
Guido van Rossum committed
68
    s.connect((host, port))
69
    s.sendall(selector + CRLF)
70 71 72
    s.shutdown(1)
    return s.makefile('rb')

73
def send_query(selector, query, host, port = 0):
74 75
    """Send a selector and a query string."""
    return send_selector(selector + '\t' + query, host, port)
76

77
def path_to_selector(path):
78 79 80 81 82
    """Takes a path as returned by urlparse and returns the appropriate selector."""
    if path=="/":
        return "/"
    else:
        return path[2:] # Cuts initial slash and data type identifier
83 84

def path_to_datatype_name(path):
85 86 87 88 89 90 91
    """Takes a path as returned by urlparse and maps it to a string.
    See section 3.4 of RFC 1738 for details."""
    if path=="/":
        # No way to tell, although "INDEX" is likely
        return "TYPE='unknown'"
    else:
        return type_to_name(path[1])
92

93 94 95 96
# The following functions interpret the data returned by the gopher
# server according to the expected type, e.g. textfile or directory

def get_directory(f):
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
    """Get a directory in the form of a list of entries."""
    list = []
    while 1:
        line = f.readline()
        if not line:
            print '(Unexpected EOF from server)'
            break
        if line[-2:] == CRLF:
            line = line[:-2]
        elif line[-1:] in CRLF:
            line = line[:-1]
        if line == '.':
            break
        if not line:
            print '(Empty line from server)'
            continue
        gtype = line[0]
114
        parts = line[1:].split(TAB)
115 116 117 118 119 120 121 122 123 124 125 126 127
        if len(parts) < 4:
            print '(Bad line from server:', `line`, ')'
            continue
        if len(parts) > 4:
            if parts[4:] != ['+']:
                print '(Extra info from server:',
                print parts[4:], ')'
        else:
            parts.append('')
        parts.insert(0, gtype)
        list.append(parts)
    return list

128
def get_textfile(f):
129 130 131 132
    """Get a text file as a list of lines, with trailing CRLF stripped."""
    list = []
    get_alt_textfile(f, list.append)
    return list
133 134

def get_alt_textfile(f, func):
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
    """Get a text file and pass each line to a function, with trailing CRLF stripped."""
    while 1:
        line = f.readline()
        if not line:
            print '(Unexpected EOF from server)'
            break
        if line[-2:] == CRLF:
            line = line[:-2]
        elif line[-1:] in CRLF:
            line = line[:-1]
        if line == '.':
            break
        if line[:2] == '..':
            line = line[1:]
        func(line)

151
def get_binary(f):
152 153 154
    """Get a binary file as one solid data block."""
    data = f.read()
    return data
155 156

def get_alt_binary(f, func, blocksize):
157 158 159 160 161 162
    """Get a binary file and pass each block to a function."""
    while 1:
        data = f.read(blocksize)
        if not data:
            break
        func(data)
163 164

def test():
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
    """Trivial test program."""
    import sys
    import getopt
    opts, args = getopt.getopt(sys.argv[1:], '')
    selector = DEF_SELECTOR
    type = selector[0]
    host = DEF_HOST
    if args:
        host = args[0]
        args = args[1:]
    if args:
        type = args[0]
        args = args[1:]
        if len(type) > 1:
            type, selector = type[0], type
        else:
            selector = ''
            if args:
                selector = args[0]
                args = args[1:]
        query = ''
        if args:
            query = args[0]
            args = args[1:]
    if type == A_INDEX:
        f = send_query(selector, query, host)
    else:
        f = send_selector(selector, host)
    if type == A_TEXT:
        list = get_textfile(f)
        for item in list: print item
    elif type in (A_MENU, A_INDEX):
        list = get_directory(f)
        for item in list: print item
    else:
        data = get_binary(f)
        print 'binary data:', len(data), 'bytes:', `data[:100]`[:40]
202 203 204

# Run the test when run as script
if __name__ == '__main__':
205
    test()