buildindex.py 9.61 KB
Newer Older
1 2 3 4
#! /usr/bin/env python

__version__ = '$Revision$'

5
import os
6 7 8 9 10 11 12 13
import re
import string
import sys


class Node:
    __rmjunk = re.compile("<#\d+#>")

14 15
    continuation = 0

16 17 18 19 20 21
    def __init__(self, link, str, seqno):
        self.links = [link]
        self.seqno = seqno
        # remove <#\d+#> left in by moving the data out of LaTeX2HTML
        str = self.__rmjunk.sub('', str)
        # build up the text
Fred Drake's avatar
Fred Drake committed
22 23
        self.text = split_entry_text(str)
        self.key = split_entry_key(str)
24 25 26 27 28 29 30 31

    def __cmp__(self, other):
        """Comparison operator includes sequence number, for use with
        list.sort()."""
        return self.cmp_entry(other) or cmp(self.seqno, other.seqno)

    def cmp_entry(self, other):
        """Comparison 'operator' that ignores sequence number."""
Fred Drake's avatar
Fred Drake committed
32
        c = 0
33
        for i in range(min(len(self.key), len(other.key))):
Fred Drake's avatar
Fred Drake committed
34 35
            c = (cmp_part(self.key[i], other.key[i])
                 or cmp_part(self.text[i], other.text[i]))
36
            if c:
Fred Drake's avatar
Fred Drake committed
37 38
                break
        return c or cmp(self.key, other.key) or cmp(self.text, other.text)
39 40 41 42 43 44 45 46

    def __repr__(self):
        return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno)

    def __str__(self):
        return string.join(self.key, '!')

    def dump(self):
Fred Drake's avatar
Fred Drake committed
47 48
        return "%s\1%s###%s\n" \
               % (string.join(self.links, "\1"),
49 50 51 52
                  string.join(self.text, '!'),
                  self.seqno)


Fred Drake's avatar
Fred Drake committed
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
def cmp_part(s1, s2):
    result = cmp(s1, s2)
    if result == 0:
        return 0
    l1 = string.lower(s1)
    l2 = string.lower(s2)
    minlen = min(len(s1), len(s2))
    if len(s1) < len(s2) and l1 == l2[:len(s1)]:
        result = -1
    elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
        result = 1
    else:
        result = cmp(l1, l2) or cmp(s1, s2)
    return result


def split_entry(str, which):
    stuff = []
    parts = string.split(str, '!')
    parts = map(string.split, parts, ['@'] * len(parts))
    for entry in parts:
        if len(entry) != 1:
            key = entry[which]
        else:
            key = entry[0]
        stuff.append(key)
    return stuff


82
_rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""",
83
                   re.IGNORECASE)
Fred Drake's avatar
Fred Drake committed
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
_rmparens = re.compile(r"\(\)")

def split_entry_key(str):
    parts = split_entry(str, 1)
    for i in range(len(parts)):
        m = _rmtt.match(parts[i])
        if m:
            parts[i] = string.join(m.group(1, 2, 3), '')
        else:
            parts[i] = string.lower(parts[i])
        # remove '()' from the key:
        parts[i] = _rmparens.sub('', parts[i])
    return map(trim_ignored_letters, parts)


def split_entry_text(str):
    if '<' in str:
        m = _rmtt.match(str)
        if m:
            str = string.join(m.group(1, 2, 3), '')
    return split_entry(str, 1)


107 108
def load(fp):
    nodes = []
Fred Drake's avatar
Fred Drake committed
109
    rx = re.compile("(.*)\1(.*)###(.*)$")
110 111 112 113 114 115 116 117 118 119 120
    while 1:
        line = fp.readline()
        if not line:
            break
        m = rx.match(line)
        if m:
            link, str, seqno = m.group(1, 2, 3)
            nodes.append(Node(link, str, seqno))
    return nodes


Fred Drake's avatar
Fred Drake committed
121
def trim_ignored_letters(s):
122 123
    # ignore $ to keep environment variables with the
    # leading letter from the name
Fred Drake's avatar
Fred Drake committed
124
    s = string.lower(s)
125 126 127 128
    if s[0] == "$":
        return s[1:]
    else:
        return s
Fred Drake's avatar
Fred Drake committed
129 130 131 132 133

def get_first_letter(s):
    return string.lower(trim_ignored_letters(s)[0])


134 135 136
def split_letters(nodes):
    letter_groups = []
    if nodes:
Fred Drake's avatar
Fred Drake committed
137 138 139
        group = []
        append = group.append
        letter = get_first_letter(nodes[0].text[0])
140 141
        letter_groups.append((letter, group))
        for node in nodes:
Fred Drake's avatar
Fred Drake committed
142
            nletter = get_first_letter(node.text[0])
143 144 145 146 147 148 149 150 151
            if letter != nletter:
                letter = nletter
                group = []
                letter_groups.append((letter, group))
                append = group.append
            append(node)
    return letter_groups


152 153 154
# need a function to separate the nodes into columns...
def split_columns(nodes, columns=1):
    if columns <= 1:
155
        return [nodes]
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
    # This is a rough height; we may have to increase to avoid breaks before
    # a subitem.
    colheight = len(nodes) / columns
    numlong = len(nodes) % columns
    if numlong:
        colheight = colheight + 1
    else:
        numlong = columns
    cols = []
    for i in range(numlong):
        start = i * colheight
        end = start + colheight
        cols.append(nodes[start:end])
    del nodes[:end]
    colheight = colheight - 1
    try:
        numshort = len(nodes) / colheight
    except ZeroDivisionError:
        cols = cols + (columns - len(cols)) * [[]]
    else:
        for i in range(numshort):
            start = i * colheight
            end = start + colheight
            cols.append(nodes[start:end])
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
    #
    # If items continue across columns, make sure they are marked
    # as continuations so the user knows to look at the previous column.
    #
    for i in range(len(cols) - 1):
        try:
            prev = cols[i][-1]
            next = cols[i + 1][0]
        except IndexError:
            return cols
        else:
            n = min(len(prev.key), len(next.key))
            for j in range(n):
                if prev.key[j] != next.key[j]:
                    break
                next.continuation = j + 1
196
    return cols
197 198


Fred Drake's avatar
Fred Drake committed
199 200
DL_LEVEL_INDENT = "  "

201
def format_column(nodes):
202 203
    strings = ["<dl compact>"]
    append = strings.append
204
    level = 0
Fred Drake's avatar
Fred Drake committed
205
    previous = []
206
    for node in nodes:
Fred Drake's avatar
Fred Drake committed
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
        current = node.text
        count = 0
        for i in range(min(len(current), len(previous))):
            if previous[i] != current[i]:
                break
            count = i + 1
        if count > level:
            append("<dl compact>" * (count - level) + "\n")
            level = count
        elif level > count:
            append("\n")
            append(level * DL_LEVEL_INDENT)
            append("</dl>" * (level - count))
            level = count
        # else: level == count
        for i in range(count, len(current) - 1):
            term = node.text[i]
            level = level + 1
225 226 227 228 229 230
            if node.continuation > i:
                extra = " (continued)"
            else:
                extra = ""
            append("\n<dt>%s%s\n<dd>\n%s<dl compact>"
                   % (term, extra, level * DL_LEVEL_INDENT))
Fred Drake's avatar
Fred Drake committed
231 232
        append("\n%s<dt>%s%s</a>"
               % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1]))
233
        for link in node.links[1:]:
Fred Drake's avatar
Fred Drake committed
234 235 236
            append(",\n%s    %s[Link]</a>" % (level * DL_LEVEL_INDENT, link))
        previous = current
    append("\n")
237
    append("</dl>" * (level + 1))
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
    return string.join(strings, '')


def format_nodes(nodes, columns=1):
    strings = []
    append = strings.append
    if columns > 1:
        colnos = range(columns)
        colheight = len(nodes) / columns
        if len(nodes) % columns:
            colheight = colheight + 1
        colwidth = 100 / columns
        append('<table width="100%"><tr valign="top">')
        for col in split_columns(nodes, columns):
            append('<td width="%d%%">\n' % colwidth)
            append(format_column(col))
            append("\n</td>")
        append("\n</tr></table>")
    else:
        append(format_column(nodes))
258
    append("\n<p>\n")
Fred Drake's avatar
Fred Drake committed
259
    return string.join(strings, '')
260 261 262 263 264 265 266 267 268


def format_letter(letter):
    if letter == '.':
        lettername = ". (dot)"
    elif letter == '_':
        lettername = "_ (underscore)"
    else:
        lettername = string.upper(letter)
Fred Drake's avatar
Fred Drake committed
269
    return "\n<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \
270 271 272
           % (letter, lettername)


273
def format_html_letters(nodes, columns=1):
274 275 276 277 278
    letter_groups = split_letters(nodes)
    items = []
    for letter, nodes in letter_groups:
        s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
        items.append(s)
Fred Drake's avatar
Fred Drake committed
279
    s = ["<hr><center>\n%s</center>\n" % string.join(items, " |\n")]
280
    for letter, nodes in letter_groups:
Fred Drake's avatar
Fred Drake committed
281
        s.append(format_letter(letter))
282
        s.append(format_nodes(nodes, columns))
Fred Drake's avatar
Fred Drake committed
283
    return string.join(s, '')
284

285 286 287
def format_html(nodes, columns):
    return format_nodes(nodes, columns)

288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310

def collapse(nodes):
    """Collapse sequences of nodes with matching keys into a single node.
    Destructive."""
    if len(nodes) < 2:
        return
    prev = nodes[0]
    i = 1
    while i < len(nodes):
        node = nodes[i]
        if not node.cmp_entry(prev):
            prev.links.append(node.links[0])
            del nodes[i]
        else:
            i = i + 1
            prev = node


def dump(nodes, fp):
    for node in nodes:
        fp.write(node.dump())


311 312 313 314 315 316 317 318 319
def process_nodes(nodes, columns, letters):
    nodes.sort()
    collapse(nodes)
    if letters:
        return format_html_letters(nodes, columns)
    else:
        return format_html(nodes, columns)


320
def main():
Fred Drake's avatar
Fred Drake committed
321 322 323
    import getopt
    ifn = "-"
    ofn = "-"
324
    columns = 1
325 326 327
    letters = 0
    opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
                               ["columns=", "letters", "output="])
Fred Drake's avatar
Fred Drake committed
328 329 330
    for opt, val in opts:
        if opt in ("-o", "--output"):
            ofn = val
331 332
        elif opt in ("-c", "--columns"):
            columns = string.atoi(val)
333 334
        elif opt in ("-l", "--letters"):
            letters = 1
Fred Drake's avatar
Fred Drake committed
335 336 337 338 339
    if not args:
        args = [ifn]
    nodes = []
    for fn in args:
        nodes = nodes + load(open(fn))
340
    num_nodes = len(nodes)
341
    html = process_nodes(nodes, columns, letters)
342
    program = os.path.basename(sys.argv[0])
Fred Drake's avatar
Fred Drake committed
343 344
    if ofn == "-":
        sys.stdout.write(html)
345
        sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes))
Fred Drake's avatar
Fred Drake committed
346 347
    else:
        open(ofn, "w").write(html)
348 349
        print
        print "%s: %d index nodes" % (program, num_nodes)
350 351 352 353


if __name__ == "__main__":
    main()