Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

buildindex.py@ 3391

Visit:

Last change on this file since 3391 was 3225, checked in by bird, 19 years ago
Python 2.5
File size: 10.7 KB

Line
1	#! /usr/bin/env python
2
3	__version__ = '$Revision: 36356 $'
4
5	import os.path
6	import re
7	import string
8	import sys
9
10	from xml.sax.saxutils import quoteattr
11
12
13	bang_join = "!".join
14	null_join = "".join
15
16	REPLACEMENTS = [
17	# Hackish way to deal with macros replaced with simple text
18	(re.compile(r"\\ABC\b"), "ABC"),
19	(re.compile(r"\\ASCII\b"), "ASCII"),
20	(re.compile(r"\\Cpp\b"), "C++"),
21	(re.compile(r"\\EOF\b"), "EOF"),
22	(re.compile(r"\\NULL\b"), "NULL"),
23	(re.compile(r"\\POSIX\b"), "POSIX"),
24	(re.compile(r"\\UNIX\b"), "Unix"),
25	# deal with turds left over from LaTeX2HTML
26	(re.compile(r"<#\d+#>"), ""),
27	]
28
29	class Node:
30	continuation = 0
31
32	def __init__(self, link, str, seqno):
33	self.links = [link]
34	self.seqno = seqno
35	for pattern, replacement in REPLACEMENTS:
36	str = pattern.sub(replacement, str)
37	# build up the text
38	self.text = split_entry_text(str)
39	self.key = split_entry_key(str)
40
41	def __cmp__(self, other):
42	"""Comparison operator includes sequence number, for use with
43	list.sort()."""
44	return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
45
46	def cmp_entry(self, other):
47	"""Comparison 'operator' that ignores sequence number."""
48	c = 0
49	for i in range(min(len(self.key), len(other.key))):
50	c = (cmp_part(self.key[i], other.key[i])
51	or cmp_part(self.text[i], other.text[i]))
52	if c:
53	break
54	return c or cmp(self.key, other.key) or cmp(self.text, other.text)
55
56	def __repr__(self):
57	return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno)
58
59	def __str__(self):
60	return bang_join(self.key)
61
62	def dump(self):
63	return "%s\1%s###%s\n" \
64	% ("\1".join(self.links),
65	bang_join(self.text),
66	self.seqno)
67
68
69	def cmp_part(s1, s2):
70	result = cmp(s1, s2)
71	if result == 0:
72	return 0
73	l1 = s1.lower()
74	l2 = s2.lower()
75	minlen = min(len(s1), len(s2))
76	if len(s1) < len(s2) and l1 == l2[:len(s1)]:
77	result = -1
78	elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
79	result = 1
80	else:
81	result = cmp(l1, l2) or cmp(s1, s2)
82	return result
83
84
85	def split_entry(str, which):
86	stuff = []
87	parts = str.split('!')
88	parts = [part.split('@') for part in parts]
89	for entry in parts:
90	if len(entry) != 1:
91	key = entry[which]
92	else:
93	key = entry[0]
94	stuff.append(key)
95	return stuff
96
97
98	_rmtt = re.compile(r"""(.)<tt(?: class=['"][a-z0-9]+["'])?>(.)</tt>(.*)$""",
99	re.IGNORECASE)
100	_rmparens = re.compile(r"")
101
102	def split_entry_key(str):
103	parts = split_entry(str, 1)
104	for i in range(len(parts)):
105	m = _rmtt.match(parts[i])
106	if m:
107	parts[i] = null_join(m.group(1, 2, 3))
108	else:
109	parts[i] = parts[i].lower()
110	# remove '()' from the key:
111	parts[i] = _rmparens.sub('', parts[i])
112	return map(trim_ignored_letters, parts)
113
114
115	def split_entry_text(str):
116	if '<' in str:
117	m = _rmtt.match(str)
118	if m:
119	str = null_join(m.group(1, 2, 3))
120	return split_entry(str, 1)
121
122
123	def load(fp):
124	nodes = []
125	rx = re.compile("(.)\1(.)###(.*)$")
126	while 1:
127	line = fp.readline()
128	if not line:
129	break
130	m = rx.match(line)
131	if m:
132	link, str, seqno = m.group(1, 2, 3)
133	nodes.append(Node(link, str, seqno))
134	return nodes
135
136
137	def trim_ignored_letters(s):
138	# ignore $ to keep environment variables with the
139	# leading letter from the name
140	if s.startswith("$"):
141	return s[1:].lower()
142	else:
143	return s.lower()
144
145	def get_first_letter(s):
146	if s.startswith("<tex2html_percent_mark>"):
147	return "%"
148	else:
149	return trim_ignored_letters(s)[0]
150
151
152	def split_letters(nodes):