Kaydet (Commit) 6a508aef authored tarafından Barry Warsaw's avatar Barry Warsaw

Use the (new) standard script #! line

Convert to re module; remove all references to regexp and regsub

Added support for the new texinfo @url{} and @email{} commands
üst dc1c970b
#! /usr/local/bin/python #! /usr/bin/env python
# Convert GNU texinfo files into HTML, one file per node. # Convert GNU texinfo files into HTML, one file per node.
# Based on Texinfo 2.14. # Based on Texinfo 2.14.
...@@ -36,19 +36,21 @@ ...@@ -36,19 +36,21 @@
# How about icons ? # How about icons ?
import os import os
import regex
import regsub
import string import string
import re
MAGIC = '\\input texinfo' MAGIC = '\\input texinfo'
cmprog = regex.compile('^@\([a-z]+\)\([ \t]\|$\)') # Command (line-oriented) cmprog = re.compile('^@([a-z]+)([ \t]|$)') # Command (line-oriented)
blprog = regex.compile('^[ \t]*$') # Blank line blprog = re.compile('^[ \t]*$') # Blank line
kwprog = regex.compile('@[a-z]+') # Keyword (embedded, usually with {} args) kwprog = re.compile('@[a-z]+') # Keyword (embedded, usually
spprog = regex.compile('[\n@{}&<>]') # Special characters in running text # with {} args)
miprog = regex.compile( \ spprog = re.compile('[\n@{}&<>]') # Special characters in
'^\* \([^:]*\):\(:\|[ \t]*\([^\t,\n.]+\)\([^ \t\n]*\)\)[ \t\n]*') # running text
#
# menu item (Yuck!) # menu item (Yuck!)
miprog = re.compile('^\* ([^:]*):(:|[ \t]*([^\t,\n.]+)([^ \t\n]*))[ \t\n]*')
class HTMLNode: class HTMLNode:
...@@ -212,7 +214,7 @@ class TexinfoParser: ...@@ -212,7 +214,7 @@ class TexinfoParser:
def parse(self, fp): def parse(self, fp):
line = fp.readline() line = fp.readline()
lineno = 1 lineno = 1
while line and (line[0] == '%' or blprog.match(line) >= 0): while line and (line[0] == '%' or blprog.match(line)):
line = fp.readline() line = fp.readline()
lineno = lineno + 1 lineno = lineno + 1
if line[:len(MAGIC)] <> MAGIC: if line[:len(MAGIC)] <> MAGIC:
...@@ -237,8 +239,9 @@ class TexinfoParser: ...@@ -237,8 +239,9 @@ class TexinfoParser:
print '*** EOF before @bye' print '*** EOF before @bye'
break break
lineno = lineno + 1 lineno = lineno + 1
if cmprog.match(line) >= 0: mo = cmprog.match(line)
a, b = cmprog.regs[1] if mo:
a, b = mo.span(1)
cmd = line[a:b] cmd = line[a:b]
if cmd in ('noindent', 'refill'): if cmd in ('noindent', 'refill'):
accu.append(line) accu.append(line)
...@@ -247,8 +250,8 @@ class TexinfoParser: ...@@ -247,8 +250,8 @@ class TexinfoParser:
if not self.skip: if not self.skip:
self.process(accu) self.process(accu)
accu = [] accu = []
self.command(line) self.command(line, mo)
elif blprog.match(line) >= 0 and \ elif blprog.match(line) and \
'format' not in self.stack and \ 'format' not in self.stack and \
'example' not in self.stack: 'example' not in self.stack:
if accu: if accu:
...@@ -346,12 +349,16 @@ class TexinfoParser: ...@@ -346,12 +349,16 @@ class TexinfoParser:
if self.stack and self.stack[-1] == 'menu': if self.stack and self.stack[-1] == 'menu':
# XXX should be done differently # XXX should be done differently
for line in accu: for line in accu:
if miprog.match(line) < 0: mo = miprog.match(line)
if not mo:
line = string.strip(line) + '\n' line = string.strip(line) + '\n'
self.expand(line) self.expand(line)
continue continue
(bgn, end), (a, b), (c, d), (e, f), (g, h) = \ bgn, end = mo.span(0)
miprog.regs[:5] a, b = mo.span(1)
c, d = mo.span(2)
e, f = mo.span(3)
g, h = mo.span(4)
label = line[a:b] label = line[a:b]
nodename = line[c:d] nodename = line[c:d]
if nodename[0] == ':': nodename = label if nodename[0] == ':': nodename = label
...@@ -373,8 +380,10 @@ class TexinfoParser: ...@@ -373,8 +380,10 @@ class TexinfoParser:
n = len(text) n = len(text)
while i < n: while i < n:
start = i start = i
i = spprog.search(text, i) mo = spprog.search(text, i)
if i < 0: if mo:
i = mo.start()
else:
self.write(text[start:]) self.write(text[start:])
break break
self.write(text[start:i]) self.write(text[start:i])
...@@ -674,14 +683,24 @@ class TexinfoParser: ...@@ -674,14 +683,24 @@ class TexinfoParser:
def open_w(self): self.write('<NOBREAK>') def open_w(self): self.write('<NOBREAK>')
def close_w(self): self.write('</NOBREAK>') def close_w(self): self.write('</NOBREAK>')
def open_url(self): self.startsaving()
def close_url(self):
text = self.collectsavings()
self.write('<A HREF="', text, '">', text, '</A>')
def open_email(self): self.startsaving()
def close_email(self):
text = self.collectsavings()
self.write('<A HREF="mailto:', text, '">', text, '</A>')
open_titlefont = open_ open_titlefont = open_
close_titlefont = close_ close_titlefont = close_
def open_small(self): pass def open_small(self): pass
def close_small(self): pass def close_small(self): pass
def command(self, line): def command(self, line, mo):
a, b = cmprog.regs[1] a, b = mo.span(1)
cmd = line[a:b] cmd = line[a:b]
args = string.strip(line[b:]) args = string.strip(line[b:])
if self.debugging > 1: if self.debugging > 1:
...@@ -1378,15 +1397,17 @@ class TexinfoParser: ...@@ -1378,15 +1397,17 @@ class TexinfoParser:
print '--- Generating', self.indextitle[name], 'index' print '--- Generating', self.indextitle[name], 'index'
# The node already provides a title # The node already provides a title
index1 = [] index1 = []
junkprog = regex.compile('^\(@[a-z]+\)?{') junkprog = re.compile('^(@[a-z]+)?{')
for key, node in index: for key, node in index:
sortkey = string.lower(key) sortkey = string.lower(key)
# Remove leading `@cmd{' from sort key # Remove leading `@cmd{' from sort key
# -- don't bother about the matching `}' # -- don't bother about the matching `}'
oldsortkey = sortkey oldsortkey = sortkey
while 1: while 1:
i = junkprog.match(sortkey) mo = junkprog.match(sortkey)
if i < 0: break if not mo:
break
i = mo.end()
sortkey = sortkey[i:] sortkey = sortkey[i:]
index1.append(sortkey, key, node) index1.append(sortkey, key, node)
del index[:] del index[:]
...@@ -1481,12 +1502,14 @@ def splitwords(str, minlength): ...@@ -1481,12 +1502,14 @@ def splitwords(str, minlength):
# Find the end of a "word", matching braces and interpreting @@ @{ @} # Find the end of a "word", matching braces and interpreting @@ @{ @}
fwprog = regex.compile('[@{} ]') fwprog = re.compile('[@{} ]')
def findwordend(str, i, n): def findwordend(str, i, n):
level = 0 level = 0
while i < n: while i < n:
i = fwprog.search(str, i) mo = fwprog.search(str, i)
if i < 0: break if not mo:
break
i = mo.start()
c = str[i]; i = i+1 c = str[i]; i = i+1
if c == '@': i = i+1 # Next character is not special if c == '@': i = i+1 # Next character is not special
elif c == '{': level = level+1 elif c == '{': level = level+1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment