Kaydet (Commit) 5dd52d37 authored tarafından Guido van Rossum's avatar Guido van Rossum

commit -- why not

üst 5b98ac5b
This directory contains a browser written in Python for "Info files"
as used by the Emacs documentation system. The browser requires that
Python is built with the "stdwin" option and runs under X11 or the
Mac window system.
Now you can read Info files even if you can't spare the memory, time or
disk space to run Emacs. (I have used this extensively on a Macintosh
with 1 Megabyte main memory and a 20 Meg harddisk.)
You can give this to someone with great fear of complex computer
systems, as long as they can use a mouse.
Another reason to use this is to encourage the use of Info for on-line
documentation of software that is not related to Emacs or GNU.
(In particular, I plan to redo the Python and STDWIN documentation
in texinfo.)
The main program is in file "ib.py"; this accepts a file name and a
node name as optional command line arguments, i.e., its usage is
python ib.py [file [node]]
Configuration:
- The pathname of the directory (or directories) containing
the standard Info files should be set by editing the
value assigned to INFOPATH in module ifile.py.
- The default font should be set by editing the value of FONT
in this module (ibrowse.py).
- For fastest I/O, you may look at BLOCKSIZE and a few other
constants in ifile.py.
: ${ARCH}=`arch`
exec /ufs/guido/bin/$ARCH/python ib.py ${1+"$@"}
#! /usr/local/bin/python
# Call ibrowse (the info file browser) under UNIX.
import sys
import ibrowse
if len(sys.argv) > 1:
file = sys.argv[1]
if len(sys.argv) > 2:
if len(sys.argv) > 3:
sys.stdout = sys.stderr
print 'usage:', sys.argv[0], '[file [node]]'
sys.exit(2)
else:
node = sys.argv[2]
else:
node = ''
ibrowse.start('(' + file + ')' + node)
else:
ibrowse.main()
This diff is collapsed.
This diff is collapsed.
# Cache management for info file processing.
# The function get_node() is the standard interface;
# its signature is the same as ifile.get_node() but it uses
# the cache and supports indirect tag tables.
import string
import ifile
from ifile import NoSuchNode, NoSuchFile
import itags
# Special hack to save the cache when using reload().
# This can just be "cache = {}" in a production version.
#
try:
dummy = cache
del dummy
except NameError:
cache = {}
# Clear the entire cache.
#
def resetcache():
for key in cache.keys():
del cache[key]
# Clear the node info from the cache (the most voluminous data).
#
def resetnodecache():
for key in cache.keys():
tags, nodes = cache[key]
cache[key] = tags, {}
# Get a node.
#
def get_node(curfile, ref):
file, node = ifile.parse_ref(curfile, ref)
file = string.lower(file)
node = string.lower(node)
if node == '*':
# Don't cache whole file references;
# reading the data is faster than displaying it anyway.
return ifile.get_whole_file(file) # May raise NoSuchFile
if not cache.has_key(file):
cache[file] = get_tags(file), {} # May raise NoSuchFile
tags, nodes = cache[file]
if not nodes.has_key(node):
if not tags.has_key(node):
raise NoSuchNode, ref
file1, offset, line = tags[node]
if not file1:
file1 = file
file1, node1, header, menu, footnotes, text = \
ifile.get_file_node(file1, offset, node)
nodes[node] = file, node1, header, menu, footnotes, text
return nodes[node]
# Get the tag table for a file.
# Either construct one or get the one found in the file.
# Raise NoSuchFile if the file isn't found.
#
def get_tags(file):
f = ifile.try_open(file) # May raise NoSuchFile
tags = itags.get_tags(f)
if not tags:
###print 'Scanning file...'
f.seek(0)
tags = ifile.make_tags(f)
return tags
# Tools for info file processing.
# XXX Need to be more careful with reading ahead searching for nodes.
import regexp
import string
# Exported exceptions.
#
NoSuchFile = 'no such file'
NoSuchNode = 'no such node'
# The search path for info files; this is site-specific.
# Directory names should end in a partname delimiter,
# so they can simply be concatenated to a relative pathname.
#
#INFOPATH = ['', ':Info.Ibrowse:', ':Info:'] # Mac
INFOPATH = ['', '/usr/local/emacs/info/'] # X11 on UNIX
# Tunable constants.
#
BLOCKSIZE = 512 # Qty to align reads to, if possible
FUZZ = 2*BLOCKSIZE # Qty to back-up before searching for a node
CHUNKSIZE = 4*BLOCKSIZE # Qty to read at once when reading lots of data
# Regular expressions used.
# Note that it is essential that Python leaves unrecognized backslash
# escapes in a string so they can be seen by regexp.compile!
#
findheader = regexp.compile('\037\014?\n(.*\n)').match
findescape = regexp.compile('\037').match
parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
findfirstline = regexp.compile('^.*\n').match
findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match
findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match
findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match
findmenu = regexp.compile('^\* [mM]enu:').match
findmenuitem = regexp.compile( \
'^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match
findfootnote = regexp.compile( \
'\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match
parsenoderef = regexp.compile('^\((.*)\)(.*)$').match
# Get a node and all information pertaining to it.
# This doesn't work if there is an indirect tag table,
# and in general you are better off using icache.get_node() instead.
# Functions get_whole_file() and get_file_node() provide part
# functionality used by icache.
# Raise NoSuchFile or NoSuchNode as appropriate.
#
def get_node(curfile, ref):
file, node = parse_ref(curfile, ref)
if node == '*':
return get_whole_file(file)
else:
return get_file_node(file, 0, node)
#
def get_whole_file(file):
f = try_open(file) # May raise NoSuchFile
text = f.read()
header, menu, footnotes = ('', '', ''), [], []
return file, '*', header, menu, footnotes, text
#
def get_file_node(file, offset, node):
f = try_open(file) # May raise NoSuchFile
text = find_node(f, offset, node) # May raise NoSuchNode
node, header, menu, footnotes = analyze_node(text)
return file, node, header, menu, footnotes, text
# Parse a node reference into a file (possibly default) and node name.
# Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE".
# Default file is the curfile argument; default node is Top.
# A node value of '*' is a special case: the whole file should
# be interpreted (by the caller!) as a single node.
#
def parse_ref(curfile, ref):
match = parsenoderef(ref)
if not match:
file, node = curfile, ref
else:
(a, b), (a1, b1), (a2, b2) = match
file, node = ref[a1:b1], ref[a2:b2]
if not file:
file = curfile # (Is this necessary?)
if not node:
node = 'Top'
return file, node
# Extract node name, links, menu and footnotes from the node text.
#
def analyze_node(text):
#
# Get node name and links from the header line
#
match = findfirstline(text)
if match:
(a, b) = match[0]
line = text[a:b]
else:
line = ''
node = get_it(text, findnode)
prev = get_it(text, findprev)
next = get_it(text, findnext)
up = get_it(text, findup)
#
# Get the menu items, if there is a menu
#
menu = []
match = findmenu(text)
if match:
(a, b) = match[0]
while 1:
match = findmenuitem(text, b)
if not match:
break
(a, b), (a1, b1), (a2, b2) = match
topic, ref = text[a1:b1], text[a2:b2]
if ref == ':':
ref = topic
menu.append(topic, ref)
#
# Get the footnotes
#
footnotes = []
b = 0
while 1:
match = findfootnote(text, b)
if not match:
break
(a, b), (a1, b1), (a2, b2) = match
topic, ref = text[a1:b1], text[a2:b2]
if ref == ':':
ref = topic
footnotes.append(topic, ref)
#
return node, (prev, next, up), menu, footnotes
#
def get_it(line, matcher):
match = matcher(line)
if not match:
return ''
else:
(a, b), (a1, b1) = match
return line[a1:b1]
# Find a node in an open file.
# The offset (from the tags table) is a hint about the node's position.
# Pass zero if there is no tags table.
# Raise NoSuchNode if the node isn't found.
# NB: This seeks around in the file.
#
def find_node(f, offset, node):
node = string.lower(node) # Just to be sure
#
# Position a little before the given offset,
# so we may find the node even if it has moved around
# in the file a little.
#
offset = max(0, ((offset-FUZZ) / BLOCKSIZE) * BLOCKSIZE)
f.seek(offset)
#
# Loop, hunting for a matching node header.
#
while 1:
buf = f.read(CHUNKSIZE)
if not buf:
break
i = 0
while 1:
match = findheader(buf, i)
if match:
(a,b), (a1,b1) = match
start = a1
line = buf[a1:b1]
i = b
match = parseheader(line)
if match:
(a,b), (a1,b1) = match
key = string.lower(line[a1:b1])
if key == node:
# Got it! Now read the rest.
return read_node(f, buf[start:])
elif findescape(buf, i):
next = f.read(CHUNKSIZE)
if not next:
break
buf = buf + next
else:
break
#
# If we get here, we didn't find it. Too bad.
#
raise NoSuchNode, node
# Finish off getting a node (subroutine for find_node()).
# The node begins at the start of buf and may end in buf;
# if it doesn't end there, read additional data from f.
#
def read_node(f, buf):
i = 0
match = findescape(buf, i)
while not match:
next = f.read(CHUNKSIZE)
if not next:
end = len(buf)
break
i = len(buf)
buf = buf + next
match = findescape(buf, i)
else:
# Got a match
(a, b) = match[0]
end = a
# Strip trailing newlines
while end > 0 and buf[end-1] == '\n':
end = end-1
buf = buf[:end]
return buf
# Read reverse starting at offset until the beginning of a node is found.
# Then return a buffer containing the beginning of the node,
# with f positioned just after the buffer.
# The buffer will contain at least the full header line of the node;
# the caller should finish off with read_node() if it is the right node.
# (It is also possible that the buffer extends beyond the node!)
# Return an empty string if there is no node before the given offset.
#
def backup_node(f, offset):
start = max(0, ((offset-CHUNKSIZE) / BLOCKSIZE) * BLOCKSIZE)
end = offset
while start < end:
f.seek(start)
buf = f.read(end-start)
i = 0
hit = -1
while 1:
match = findheader(buf, i)
if match:
(a,b), (a1,b1) = match
hit = a1
i = b
elif end < offset and findescape(buf, i):
next = f.read(min(offset-end, BLOCKSIZE))
if not next:
break
buf = buf + next
end = end + len(next)
else:
break
if hit >= 0:
return buf[hit:]
end = start
start = max(0, end - CHUNKSIZE)
return ''
# Make a tag table for the given file by scanning the file.
# The file must be open for reading, and positioned at the beginning
# (or wherever the hunt for tags must begin; it is read till the end).
#
def make_tags(f):
tags = {}
while 1:
offset = f.tell()
buf = f.read(CHUNKSIZE)
if not buf:
break
i = 0
while 1:
match = findheader(buf, i)
if match:
(a,b), (a1,b1) = match
start = offset+a1
line = buf[a1:b1]
i = b
match = parseheader(line)
if match:
(a,b), (a1,b1) = match
key = string.lower(line[a1:b1])
if tags.has_key(key):
print 'Duplicate node:',
print key
tags[key] = '', start, line
elif findescape(buf, i):
next = f.read(CHUNKSIZE)
if not next:
break
buf = buf + next
else:
break
return tags
# Try to open a file, return a file object if succeeds.
# Raise NoSuchFile if the file can't be opened.
# Should treat absolute pathnames special.
#
def try_open(file):
for dir in INFOPATH:
try:
return open(dir + file, 'r')
except IOError:
pass
raise NoSuchFile, file
# A little test for the speed of make_tags().
#
TESTFILE = 'texinfo-1'
def test_make_tags():
import time
f = try_open(TESTFILE)
t1 = time.time()
tags = make_tags(f)
t2 = time.time()
print 'Making tag table for', `TESTFILE`, 'took', t2-t1, 'sec.'
# Utility module for 'icache.py': interpret tag tables and indirect nodes.
# (This module is a bit chatty when confronted with the unexpected.)
import regexp
import string
import ifile
# Get the tag table of an open file, as a dictionary.
# Seeks around in the file; after reading, the position is undefined.
# Return an empty tag table if none is found.
#
def get_tags(f):
#
# First see if the last "node" is the end of tag table marker.
#
f.seek(0, 2) # Seek to EOF
end = f.tell()
buf = ifile.backup_node(f, end)
if not labelmatch(buf, 0, 'end tag table\n'):
return {} # No succes
#
# Next backup to the previous "node" -- the tag table itself.
#
###print 'Getting prebuilt tag table...'
end = f.tell() - len(buf)
buf = ifile.backup_node(f, end)
label = 'tag table:\n'
if not labelmatch(buf, 0, label):
print 'Weird: end tag table marker but no tag table?'
print 'Node begins:', `buf[:50]`
return {}
#
# Now read the whole tag table.
#
end = f.tell() - len(buf) # Do this first!
buf = ifile.read_node(f, buf)
#
# First check for an indirection table.
#
indirlist = []
if labelmatch(buf, len(label), '(indirect)\n'):
indirbuf = ifile.backup_node(f, end)
if not labelmatch(indirbuf, 0, 'indirect:\n'):
print 'Weird: promised indirection table not found'
print 'Node begins:', `indirbuf[:50]`
# Carry on. Things probably won't work though.
else:
indirbuf = ifile.read_node(f, indirbuf)
indirlist = parse_indirlist(indirbuf)
#
# Now parse the tag table.
#
findtag = regexp.compile('^(.*[nN]ode:[ \t]*(.*))\177([0-9]+)$').match
i = 0
tags = {}
while 1:
match = findtag(buf, i)
if not match:
break
(a,b), (a1,b1), (a2,b2), (a3,b3) = match
i = b
line = buf[a1:b1]
node = string.lower(buf[a2:b2])
offset = eval(buf[a3:b3]) # XXX What if it overflows?
if tags.has_key(node):
print 'Duplicate key in tag table:', `node`
file, offset = map_offset(offset, indirlist)
tags[node] = file, offset, line
#
return tags
# Return true if buf[i:] begins with a label, after lower case conversion.
# The label argument must be in lower case.
#
def labelmatch(buf, i, label):
return string.lower(buf[i:i+len(label)]) == label
# Parse the indirection list.
# Return a list of (filename, offset) pairs ready for use.
#
def parse_indirlist(buf):
list = []
findindir = regexp.compile('^(.+):[ \t]*([0-9]+)$').match
i = 0
while 1:
match = findindir(buf, i)
if not match:
break
(a,b), (a1,b1), (a2,b2) = match
file = buf[a1:b1]
offset = eval(buf[a2:b2]) # XXX What if this gets overflow?
list.append(file, offset)
i = b
return list
# Map an offset through the indirection list.
# Return (filename, new_offset).
# If the list is empty, return the given offset and an empty file name.
#
def map_offset(offset, indirlist):
if not indirlist:
return '', offset
#
# XXX This could be done more elegant.
#
filex, offx = indirlist[0]
for i in range(len(indirlist)):
file1, off1 = indirlist[i]
if i+1 >= len(indirlist):
file2, off2 = '', 0x7fffffff
else:
file2, off2 = indirlist[i+1]
if off1 <= offset < off2:
# Add offx+2 to compensate for extra header.
# No idea whether this is always correct.
return file1, offset-off1 + offx+2
#
# XXX Shouldn't get here.
#
print 'Oops, map_offset fell through'
return '', offset # Not likely to get good results
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment