source: trunk/essentials/dev-lang/python/Doc/tools/buildindex.py@ 3391

Last change on this file since 3391 was 3225, checked in by bird, 19 years ago

Python 2.5

File size: 10.7 KB
Line 
1#! /usr/bin/env python
2
3__version__ = '$Revision: 36356 $'
4
5import os.path
6import re
7import string
8import sys
9
10from xml.sax.saxutils import quoteattr
11
12
13bang_join = "!".join
14null_join = "".join
15
16REPLACEMENTS = [
17 # Hackish way to deal with macros replaced with simple text
18 (re.compile(r"\\ABC\b"), "ABC"),
19 (re.compile(r"\\ASCII\b"), "ASCII"),
20 (re.compile(r"\\Cpp\b"), "C++"),
21 (re.compile(r"\\EOF\b"), "EOF"),
22 (re.compile(r"\\NULL\b"), "NULL"),
23 (re.compile(r"\\POSIX\b"), "POSIX"),
24 (re.compile(r"\\UNIX\b"), "Unix"),
25 # deal with turds left over from LaTeX2HTML
26 (re.compile(r"<#\d+#>"), ""),
27 ]
28
29class Node:
30 continuation = 0
31
32 def __init__(self, link, str, seqno):
33 self.links = [link]
34 self.seqno = seqno
35 for pattern, replacement in REPLACEMENTS:
36 str = pattern.sub(replacement, str)
37 # build up the text
38 self.text = split_entry_text(str)
39 self.key = split_entry_key(str)
40
41 def __cmp__(self, other):
42 """Comparison operator includes sequence number, for use with
43 list.sort()."""
44 return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
45
46 def cmp_entry(self, other):
47 """Comparison 'operator' that ignores sequence number."""
48 c = 0
49 for i in range(min(len(self.key), len(other.key))):
50 c = (cmp_part(self.key[i], other.key[i])
51 or cmp_part(self.text[i], other.text[i]))
52 if c:
53 break
54 return c or cmp(self.key, other.key) or cmp(self.text, other.text)
55
56 def __repr__(self):
57 return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno)
58
59 def __str__(self):
60 return bang_join(self.key)
61
62 def dump(self):
63 return "%s\1%s###%s\n" \
64 % ("\1".join(self.links),
65 bang_join(self.text),
66 self.seqno)
67
68
69def cmp_part(s1, s2):
70 result = cmp(s1, s2)
71 if result == 0:
72 return 0
73 l1 = s1.lower()
74 l2 = s2.lower()
75 minlen = min(len(s1), len(s2))
76 if len(s1) < len(s2) and l1 == l2[:len(s1)]:
77 result = -1
78 elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
79 result = 1
80 else:
81 result = cmp(l1, l2) or cmp(s1, s2)
82 return result
83
84
85def split_entry(str, which):
86 stuff = []
87 parts = str.split('!')
88 parts = [part.split('@') for part in parts]
89 for entry in parts:
90 if len(entry) != 1:
91 key = entry[which]
92 else:
93 key = entry[0]
94 stuff.append(key)
95 return stuff
96
97
98_rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""",
99 re.IGNORECASE)
100_rmparens = re.compile(r"\(\)")
101
102def split_entry_key(str):
103 parts = split_entry(str, 1)
104 for i in range(len(parts)):
105 m = _rmtt.match(parts[i])
106 if m:
107 parts[i] = null_join(m.group(1, 2, 3))
108 else:
109 parts[i] = parts[i].lower()
110 # remove '()' from the key:
111 parts[i] = _rmparens.sub('', parts[i])
112 return map(trim_ignored_letters, parts)
113
114
115def split_entry_text(str):
116 if '<' in str:
117 m = _rmtt.match(str)
118 if m:
119 str = null_join(m.group(1, 2, 3))
120 return split_entry(str, 1)
121
122
123def load(fp):
124 nodes = []
125 rx = re.compile("(.*)\1(.*)###(.*)$")
126 while 1:
127 line = fp.readline()
128 if not line:
129 break
130 m = rx.match(line)
131 if m:
132 link, str, seqno = m.group(1, 2, 3)
133 nodes.append(Node(link, str, seqno))
134 return nodes
135
136
137def trim_ignored_letters(s):
138 # ignore $ to keep environment variables with the
139 # leading letter from the name
140 if s.startswith("$"):
141 return s[1:].lower()
142 else:
143 return s.lower()
144
145def get_first_letter(s):
146 if s.startswith("<tex2html_percent_mark>"):
147 return "%"
148 else:
149 return trim_ignored_letters(s)[0]
150
151
152def split_letters(nodes):