| 1 | #! /usr/bin/env python
|
|---|
| 2 |
|
|---|
| 3 | __version__ = '$Revision: 36356 $'
|
|---|
| 4 |
|
|---|
| 5 | import os.path
|
|---|
| 6 | import re
|
|---|
| 7 | import string
|
|---|
| 8 | import sys
|
|---|
| 9 |
|
|---|
| 10 | from xml.sax.saxutils import quoteattr
|
|---|
| 11 |
|
|---|
| 12 |
|
|---|
| 13 | bang_join = "!".join
|
|---|
| 14 | null_join = "".join
|
|---|
| 15 |
|
|---|
| 16 | REPLACEMENTS = [
|
|---|
| 17 | # Hackish way to deal with macros replaced with simple text
|
|---|
| 18 | (re.compile(r"\\ABC\b"), "ABC"),
|
|---|
| 19 | (re.compile(r"\\ASCII\b"), "ASCII"),
|
|---|
| 20 | (re.compile(r"\\Cpp\b"), "C++"),
|
|---|
| 21 | (re.compile(r"\\EOF\b"), "EOF"),
|
|---|
| 22 | (re.compile(r"\\NULL\b"), "NULL"),
|
|---|
| 23 | (re.compile(r"\\POSIX\b"), "POSIX"),
|
|---|
| 24 | (re.compile(r"\\UNIX\b"), "Unix"),
|
|---|
| 25 | # deal with turds left over from LaTeX2HTML
|
|---|
| 26 | (re.compile(r"<#\d+#>"), ""),
|
|---|
| 27 | ]
|
|---|
| 28 |
|
|---|
| 29 | class Node:
|
|---|
| 30 | continuation = 0
|
|---|
| 31 |
|
|---|
| 32 | def __init__(self, link, str, seqno):
|
|---|
| 33 | self.links = [link]
|
|---|
| 34 | self.seqno = seqno
|
|---|
| 35 | for pattern, replacement in REPLACEMENTS:
|
|---|
| 36 | str = pattern.sub(replacement, str)
|
|---|
| 37 | # build up the text
|
|---|
| 38 | self.text = split_entry_text(str)
|
|---|
| 39 | self.key = split_entry_key(str)
|
|---|
| 40 |
|
|---|
| 41 | def __cmp__(self, other):
|
|---|
| 42 | """Comparison operator includes sequence number, for use with
|
|---|
| 43 | list.sort()."""
|
|---|
| 44 | return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
|
|---|
| 45 |
|
|---|
| 46 | def cmp_entry(self, other):
|
|---|
| 47 | """Comparison 'operator' that ignores sequence number."""
|
|---|
| 48 | c = 0
|
|---|
| 49 | for i in range(min(len(self.key), len(other.key))):
|
|---|
| 50 | c = (cmp_part(self.key[i], other.key[i])
|
|---|
| 51 | or cmp_part(self.text[i], other.text[i]))
|
|---|
| 52 | if c:
|
|---|
| 53 | break
|
|---|
| 54 | return c or cmp(self.key, other.key) or cmp(self.text, other.text)
|
|---|
| 55 |
|
|---|
| 56 | def __repr__(self):
|
|---|
| 57 | return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno)
|
|---|
| 58 |
|
|---|
| 59 | def __str__(self):
|
|---|
| 60 | return bang_join(self.key)
|
|---|
| 61 |
|
|---|
| 62 | def dump(self):
|
|---|
| 63 | return "%s\1%s###%s\n" \
|
|---|
| 64 | % ("\1".join(self.links),
|
|---|
| 65 | bang_join(self.text),
|
|---|
| 66 | self.seqno)
|
|---|
| 67 |
|
|---|
| 68 |
|
|---|
| 69 | def cmp_part(s1, s2):
|
|---|
| 70 | result = cmp(s1, s2)
|
|---|
| 71 | if result == 0:
|
|---|
| 72 | return 0
|
|---|
| 73 | l1 = s1.lower()
|
|---|
| 74 | l2 = s2.lower()
|
|---|
| 75 | minlen = min(len(s1), len(s2))
|
|---|
| 76 | if len(s1) < len(s2) and l1 == l2[:len(s1)]:
|
|---|
| 77 | result = -1
|
|---|
| 78 | elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
|
|---|
| 79 | result = 1
|
|---|
| 80 | else:
|
|---|
| 81 | result = cmp(l1, l2) or cmp(s1, s2)
|
|---|
| 82 | return result
|
|---|
| 83 |
|
|---|
| 84 |
|
|---|
| 85 | def split_entry(str, which):
|
|---|
| 86 | stuff = []
|
|---|
| 87 | parts = str.split('!')
|
|---|
| 88 | parts = [part.split('@') for part in parts]
|
|---|
| 89 | for entry in parts:
|
|---|
| 90 | if len(entry) != 1:
|
|---|
| 91 | key = entry[which]
|
|---|
| 92 | else:
|
|---|
| 93 | key = entry[0]
|
|---|
| 94 | stuff.append(key)
|
|---|
| 95 | return stuff
|
|---|
| 96 |
|
|---|
| 97 |
|
|---|
| 98 | _rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""",
|
|---|
| 99 | re.IGNORECASE)
|
|---|
| 100 | _rmparens = re.compile(r"\(\)")
|
|---|
| 101 |
|
|---|
| 102 | def split_entry_key(str):
|
|---|
| 103 | parts = split_entry(str, 1)
|
|---|
| 104 | for i in range(len(parts)):
|
|---|
| 105 | m = _rmtt.match(parts[i])
|
|---|
| 106 | if m:
|
|---|
| 107 | parts[i] = null_join(m.group(1, 2, 3))
|
|---|
| 108 | else:
|
|---|
| 109 | parts[i] = parts[i].lower()
|
|---|
| 110 | # remove '()' from the key:
|
|---|
| 111 | parts[i] = _rmparens.sub('', parts[i])
|
|---|
| 112 | return map(trim_ignored_letters, parts)
|
|---|
| 113 |
|
|---|
| 114 |
|
|---|
| 115 | def split_entry_text(str):
|
|---|
| 116 | if '<' in str:
|
|---|
| 117 | m = _rmtt.match(str)
|
|---|
| 118 | if m:
|
|---|
| 119 | str = null_join(m.group(1, 2, 3))
|
|---|
| 120 | return split_entry(str, 1)
|
|---|
| 121 |
|
|---|
| 122 |
|
|---|
| 123 | def load(fp):
|
|---|
| 124 | nodes = []
|
|---|
| 125 | rx = re.compile("(.*)\1(.*)###(.*)$")
|
|---|
| 126 | while 1:
|
|---|
| 127 | line = fp.readline()
|
|---|
| 128 | if not line:
|
|---|
| 129 | break
|
|---|
| 130 | m = rx.match(line)
|
|---|
| 131 | if m:
|
|---|
| 132 | link, str, seqno = m.group(1, 2, 3)
|
|---|
| 133 | nodes.append(Node(link, str, seqno))
|
|---|
| 134 | return nodes
|
|---|
| 135 |
|
|---|
| 136 |
|
|---|
| 137 | def trim_ignored_letters(s):
|
|---|
| 138 | # ignore $ to keep environment variables with the
|
|---|
| 139 | # leading letter from the name
|
|---|
| 140 | if s.startswith("$"):
|
|---|
| 141 | return s[1:].lower()
|
|---|
| 142 | else:
|
|---|
| 143 | return s.lower()
|
|---|
| 144 |
|
|---|
| 145 | def get_first_letter(s):
|
|---|
| 146 | if s.startswith("<tex2html_percent_mark>"):
|
|---|
| 147 | return "%"
|
|---|
| 148 | else:
|
|---|
| 149 | return trim_ignored_letters(s)[0]
|
|---|
| 150 |
|
|---|
| 151 |
|
|---|
| 152 | def split_letters(nodes):
|
|---|
|
|---|