toc2bkm.py 3.87 KB
Newer Older
1 2 3 4 5
#! /usr/bin/env python

"""Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline.

The output file has an extension of '.bkm' instead of '.out', since hyperref
6
already uses that extension.
7 8
"""

9
import getopt
10 11 12 13 14 15 16 17 18
import os
import re
import string
import sys


# Ench item in an entry is a tuple of:
#
#   Section #,  Title String,  Page #,  List of Sub-entries
19 20
#
# The return value of parse_toc() is such a tuple.
21 22 23 24 25

cline_re = r"""^
\\contentsline\ \{([a-z]*)}             # type of section in $1
\{(?:\\numberline\ \{([0-9.A-Z]+)})?     # section number
(.*)}                                   # title string
Fred Drake's avatar
Fred Drake committed
26
\{(\d+)}$"""                            # page number
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43

cline_rx = re.compile(cline_re, re.VERBOSE)

OUTER_TO_INNER = -1

_transition_map = {
    ('chapter', 'section'): OUTER_TO_INNER,
    ('section', 'subsection'): OUTER_TO_INNER,
    ('subsection', 'subsubsection'): OUTER_TO_INNER,
    ('subsubsection', 'subsection'): 1,
    ('subsection', 'section'): 1,
    ('section', 'chapter'): 1,
    ('subsection', 'chapter'): 2,
    ('subsubsection', 'section'): 2,
    ('subsubsection', 'chapter'): 3,
    }

44 45 46
INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection")


47
def parse_toc(fp, bigpart=None):
48 49
    toc = top = []
    stack = [toc]
50
    level = bigpart or 'chapter'
51 52
    lineno = 0
    while 1:
Fred Drake's avatar
Fred Drake committed
53 54 55 56 57 58 59 60 61 62 63 64
        line = fp.readline()
        if not line:
            break
        lineno = lineno + 1
        m = cline_rx.match(line)
        if m:
            stype, snum, title, pageno = m.group(1, 2, 3, 4)
            title = clean_title(title)
            entry = (stype, snum, title, string.atoi(pageno), [])
            if stype == level:
                toc.append(entry)
            else:
65 66 67
                if stype not in INCLUDED_LEVELS:
                    # we don't want paragraphs & subparagraphs
                    continue
Fred Drake's avatar
Fred Drake committed
68 69 70 71 72 73 74 75 76 77 78 79 80
                direction = _transition_map[(level, stype)]
                if direction == OUTER_TO_INNER:
                    toc = toc[-1][-1]
                    stack.insert(0, toc)
                    toc.append(entry)
                else:
                    for i in range(direction):
                        del stack[0]
                        toc = stack[0]
                    toc.append(entry)
                level = stype
        else:
            sys.stderr.write("l.%s: " + line)
81 82 83
    return top


84 85 86
hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}")
raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}")
title_rx = re.compile(r"\\([a-zA-Z])+\s+")
87 88 89
title_trans = string.maketrans("", "")

def clean_title(title):
90 91 92
    title = raisebox_rx.sub("", title)
    title = hackscore_rx.sub(r"\\_", title)
    pos = 0
93
    while 1:
Fred Drake's avatar
Fred Drake committed
94 95 96 97 98 99 100 101
        m = title_rx.search(title, pos)
        if m:
            start = m.start()
            if title[start:start+15] != "\\textunderscore":
                title = title[:start] + title[m.end():]
            pos = start + 1
        else:
            break
102 103
    title = string.translate(title, title_trans, "{}")
    return title
104 105 106 107


def write_toc(toc, fp):
    for entry in toc:
Fred Drake's avatar
Fred Drake committed
108
        write_toc_entry(entry, fp, 0)
109 110 111

def write_toc_entry(entry, fp, layer):
    stype, snum, title, pageno, toc = entry
112
    s = "\\pdfoutline goto name{page%03d}" % pageno
113
    if toc:
Fred Drake's avatar
Fred Drake committed
114
        s = "%s count -%d" % (s, len(toc))
115
    if snum:
Fred Drake's avatar
Fred Drake committed
116
        title = "%s %s" % (snum, title)
117 118 119
    s = "%s {%s}\n" % (s, title)
    fp.write(s)
    for entry in toc:
Fred Drake's avatar
Fred Drake committed
120
        write_toc_entry(entry, fp, layer + 1)
121 122


123 124 125 126 127
def process(ifn, ofn, bigpart=None):
    toc = parse_toc(open(ifn), bigpart)
    write_toc(toc, open(ofn, "w"))


128
def main():
129 130 131
    bigpart = None
    opts, args = getopt.getopt(sys.argv[1:], "c:")
    if opts:
Fred Drake's avatar
Fred Drake committed
132
        bigpart = opts[0][1]
133
    if not args:
Fred Drake's avatar
Fred Drake committed
134 135
        usage()
        sys.exit(2)
136
    for filename in args:
Fred Drake's avatar
Fred Drake committed
137 138
        base, ext = os.path.splitext(filename)
        ext = ext or ".toc"
139
        process(base + ext, base + ".bkm", bigpart)
140 141 142 143


if __name__ == "__main__":
    main()