| 1 | #! /usr/bin/env python
|
|---|
| 2 |
|
|---|
| 3 | """Convert a LaTeX .toc file to some PDFTeX magic to create that neat outline.
|
|---|
| 4 |
|
|---|
| 5 | The output file has an extension of '.bkm' instead of '.out', since hyperref
|
|---|
| 6 | already uses that extension.
|
|---|
| 7 | """
|
|---|
| 8 |
|
|---|
| 9 | import getopt
|
|---|
| 10 | import os
|
|---|
| 11 | import re
|
|---|
| 12 | import string
|
|---|
| 13 | import sys
|
|---|
| 14 |
|
|---|
| 15 |
|
|---|
| 16 | # Ench item in an entry is a tuple of:
|
|---|
| 17 | #
|
|---|
| 18 | # Section #, Title String, Page #, List of Sub-entries
|
|---|
| 19 | #
|
|---|
| 20 | # The return value of parse_toc() is such a tuple.
|
|---|
| 21 |
|
|---|
| 22 | cline_re = r"""^
|
|---|
| 23 | \\contentsline\ \{([a-z]*)} # type of section in $1
|
|---|
| 24 | \{(?:\\numberline\ \{([0-9.A-Z]+)})? # section number
|
|---|
| 25 | (.*)} # title string
|
|---|
| 26 | \{(\d+)}$""" # page number
|
|---|
| 27 |
|
|---|
| 28 | cline_rx = re.compile(cline_re, re.VERBOSE)
|
|---|
| 29 |
|
|---|
| 30 | OUTER_TO_INNER = -1
|
|---|
| 31 |
|
|---|
| 32 | _transition_map = {
|
|---|
| 33 | ('chapter', 'section'): OUTER_TO_INNER,
|
|---|
| 34 | ('section', 'subsection'): OUTER_TO_INNER,
|
|---|
| 35 | ('subsection', 'subsubsection'): OUTER_TO_INNER,
|
|---|
| 36 | ('subsubsection', 'subsection'): 1,
|
|---|
| 37 | ('subsection', 'section'): 1,
|
|---|
| 38 | ('section', 'chapter'): 1,
|
|---|
| 39 | ('subsection', 'chapter'): 2,
|
|---|
| 40 | ('subsubsection', 'section'): 2,
|
|---|
| 41 | ('subsubsection', 'chapter'): 3,
|
|---|
| 42 | }
|
|---|
| 43 |
|
|---|
| 44 | INCLUDED_LEVELS = ("chapter", "section", "subsection", "subsubsection")
|
|---|
| 45 |
|
|---|
| 46 |
|
|---|
| 47 | class BadSectionNesting(Exception):
|
|---|
| 48 | """Raised for unsupported section level transitions."""
|
|---|
| 49 |
|
|---|
| 50 | def __init__(self, level, newsection, path, lineno):
|
|---|
| 51 | self.level = level
|
|---|
| 52 | self.newsection = newsection
|
|---|
| 53 | self.path = path
|
|---|
| 54 | self.lineno = lineno
|
|---|
| 55 |
|
|---|
| 56 | def __str__(self):
|
|---|
| 57 | return ("illegal transition from %s to %s at %s (line %s)"
|
|---|
| 58 | % (self.level, self.newsection, self.path, self.lineno))
|
|---|
| 59 |
|
|---|
| 60 |
|
|---|
| 61 | def parse_toc(fp, bigpart=None):
|
|---|
| 62 | toc = top = []
|
|---|
| 63 | stack = [toc]
|
|---|
| 64 | level = bigpart or 'chapter'
|
|---|
| 65 | lineno = 0
|
|---|
| 66 | while 1:
|
|---|
| 67 | line = fp.readline()
|
|---|
| 68 | if not line:
|
|---|
| 69 | break
|
|---|
| 70 | lineno = lineno + 1
|
|---|
| 71 | m = cline_rx.match(line)
|
|---|
| 72 | if m:
|
|---|
| 73 | stype, snum, title, pageno = m.group(1, 2, 3, 4)
|
|---|
| 74 | title = clean_title(title)
|
|---|
| 75 | entry = (stype, snum, title, int(pageno), [])
|
|---|
| 76 | if stype == level:
|
|---|
| 77 | toc.append(entry)
|
|---|
| 78 | else:
|
|---|
| 79 | if stype not in INCLUDED_LEVELS:
|
|---|
| 80 | # we don't want paragraphs & subparagraphs
|
|---|
| 81 | continue
|
|---|
| 82 | try:
|
|---|
| 83 | direction = _transition_map[(level, stype)]
|
|---|
| 84 | except KeyError:
|
|---|
| 85 | raise BadSectionNesting(level, stype, fp.name, lineno)
|
|---|
| 86 | if direction == OUTER_TO_INNER:
|
|---|
| 87 | toc = toc[-1][-1]
|
|---|
| 88 | stack.insert(0, toc)
|
|---|
| 89 | toc.append(entry)
|
|---|
| 90 | else:
|
|---|
| 91 | for i in range(direction):
|
|---|
| 92 | del stack[0]
|
|---|
| 93 | toc = stack[0]
|
|---|
| 94 | toc.append(entry)
|
|---|
| 95 | level = stype
|
|---|
| 96 | else:
|
|---|
| 97 | sys.stderr.write("l.%s: " + line)
|
|---|
| 98 | return top
|
|---|
| 99 |
|
|---|
| 100 |
|
|---|
| 101 | hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}")
|
|---|
| 102 | raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}")
|
|---|
| 103 | title_rx = re.compile(r"\\([a-zA-Z])+\s+")
|
|---|
| 104 | title_trans = string.maketrans("", "")
|
|---|
| 105 |
|
|---|
| 106 | def clean_title(title):
|
|---|
| 107 | title = raisebox_rx.sub("", title)
|
|---|
| 108 | title = hackscore_rx.sub(r"\\_", title)
|
|---|
| 109 | pos = 0
|
|---|
| 110 | while 1:
|
|---|
| 111 | m = title_rx.search(title, pos)
|
|---|
| 112 | if m:
|
|---|
| 113 | start = m.start()
|
|---|
| 114 | if title[start:start+15] != "\\textunderscore":
|
|---|
| 115 | title = title[:start] + title[m.end():]
|
|---|
| 116 | pos = start + 1
|
|---|
| 117 | else:
|
|---|
| 118 | break
|
|---|
| 119 | title = title.translate(title_trans, "{}")
|
|---|
| 120 | return title
|
|---|
| 121 |
|
|---|
| 122 |
|
|---|
| 123 | def write_toc(toc, fp):
|
|---|
| 124 | for entry in toc:
|
|---|
| 125 | write_toc_entry(entry, fp, 0)
|
|---|
| 126 |
|
|---|
| 127 | def write_toc_entry(entry, fp, layer):
|
|---|
| 128 | stype, snum, title, pageno, toc = entry
|
|---|
| 129 | s = "\\pdfoutline goto name{page%03d}" % pageno
|
|---|
| 130 | if toc:
|
|---|
| 131 | s = "%s count -%d" % (s, len(toc))
|
|---|
| 132 | if snum:
|
|---|
| 133 | title = "%s %s" % (snum, title)
|
|---|
| 134 | s = "%s {%s}\n" % (s, title)
|
|---|
| 135 | fp.write(s)
|
|---|
| 136 | for entry in toc:
|
|---|
| 137 | write_toc_entry(entry, fp, layer + 1)
|
|---|
| 138 |
|
|---|
| 139 |
|
|---|
| 140 | def process(ifn, ofn, bigpart=None):
|
|---|
| 141 | toc = parse_toc(open(ifn), bigpart)
|
|---|
| 142 | write_toc(toc, open(ofn, "w"))
|
|---|
| 143 |
|
|---|
| 144 |
|
|---|
| 145 | def main():
|
|---|
| 146 | bigpart = None
|
|---|
| 147 | opts, args = getopt.getopt(sys.argv[1:], "c:")
|
|---|
| 148 | if opts:
|
|---|
| 149 | bigpart = opts[0][1]
|
|---|
| 150 | if not args:
|
|---|
| 151 | usage()
|
|---|
| 152 | sys.exit(2)
|
|---|
| 153 | for filename in args:
|
|---|
| 154 | base, ext = os.path.splitext(filename)
|
|---|
| 155 | ext = ext or ".toc"
|
|---|
| 156 | process(base + ext, base + ".bkm", bigpart)
|
|---|
| 157 |
|
|---|
| 158 |
|
|---|
| 159 | if __name__ == "__main__":
|
|---|
| 160 | main()
|
|---|