Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

RuleBasedCollator.java@ 1389

Last change on this file since 1389 was 2, checked in by bird, 23 years ago
Initial revision
Property cvs2svn:cvs-rev set to `1.1` Property svn:eol-style set to `native` Property svn:executable set to ``*
File size: 8.7 KB

Line
1	// RuleBasedCollator.java - Concrete class for locale-based string compare.
2
3	/* Copyright (C) 1999, 2000, 2001 Free Software Foundation
4
5	This file is part of libgcj.
6
7	This software is copyrighted work licensed under the terms of the
8	Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
9	details. */
10
11	package java.text;
12
13	import java.util.Enumeration;
14	import java.util.Hashtable;
15	import java.util.Vector;
16
17	/**
18	* @author Tom Tromey <[email protected]>
19	* @date March 25, 1999
20	*/
21	/* Written using "Java Class Libraries", 2nd edition, plus online
22	* API docs for JDK 1.2 from http://www.javasoft.com.
23	* Status: Believed complete and correct
24	*/
25
26	final class RBCElement
27	{
28	String key;
29	char relation;
30
31	RBCElement (String key, char relation)
32	{
33	this.key = key;
34	this.relation = relation;
35	}
36	}
37
38	public class RuleBasedCollator extends Collator
39	{
40	public Object clone ()
41	{
42	RuleBasedCollator c = (RuleBasedCollator) super.clone ();
43	c.map = (Hashtable) map.clone ();
44	c.prefixes = (Hashtable) map.clone ();
45	return c;
46	}
47
48	// A helper for CollationElementIterator.next().
49	int ceiNext (CollationElementIterator cei)
50	{
51	if (cei.lookahead_set)
52	{
53	cei.lookahead_set = false;
54	return cei.lookahead;
55	}
56
57	int save = cei.index;
58	int max = cei.text.length();
59	String s = null;
60
61	// It is possible to have a case where `abc' has a mapping, but
62	// neither `ab' nor `abd' do. In this case we must treat `abd' as
63	// nothing special.
64	boolean found = false;
65
66	int i;
67	for (i = save + 1; i <= max; ++i)
68	{
69	s = cei.text.substring(save, i);
70	if (prefixes.get(s) == null)
71	break;
72	found = true;
73	}
74	// Assume s != null.
75
76	Object obj = map.get(s);
77	// The special case.
78	while (found && obj == null && s.length() > 1)
79	{
80	--i;
81	s = cei.text.substring(save, i);
82	obj = map.get(s);
83	}
84
85	// Update state.
86	cei.index = i;
87
88	if (obj == null)
89	{
90	// This idea, and the values, come from JDK.
91	// assert (s.length() == 1)
92	cei.lookahead_set = true;
93	cei.lookahead = s.charAt(0) << 8;
94	return 0x7fff << 16;
95	}
96
97	return ((Integer) obj).intValue();
98	}
99
100	// A helper for compareTo() that returns the next character that has
101	// a nonzero ordering at the indicated strength. This is also used
102	// in CollationKey.
103	static final int next (CollationElementIterator iter, int strength)
104	{
105	while (true)
106	{
107	int os = iter.next();
108	if (os == CollationElementIterator.NULLORDER)
109	return os;
110	int c = 0;
111	switch (strength)
112	{
113	case PRIMARY:
114	c = os & ~0xffff;
115	break;
116	case SECONDARY:
117	c = os & ~0x00ff;
118	break;
119	case TERTIARY:
120	case IDENTICAL:
121	c = os;
122	break;
123	}
124	if (c != 0)
125	return c;
126	}
127	}
128
129	public int compare (String source, String target)
130	{
131	CollationElementIterator cs, ct;
132
133	cs = new CollationElementIterator (source, this);
134	ct = new CollationElementIterator (target, this);
135
136	while (true)
137	{
138	int os = next (cs, strength);
139	int ot = next (ct, strength);
140
141	if (os == CollationElementIterator.NULLORDER
142	&& ot == CollationElementIterator.NULLORDER)
143	break;
144	else if (os == CollationElementIterator.NULLORDER)
145	{
146	// Source string is shorter, so return "less than".
147	return -1;
148	}
149	else if (ot == CollationElementIterator.NULLORDER)
150	{
151	// Target string is shorter, so return "greater than".
152	return 1;
153	}
154
155	if (os != ot)
156	return os - ot;
157	}
158
159	return 0;
160	}
161
162	public boolean equals (Object obj)
163	{
164	if (! (obj instanceof RuleBasedCollator) \|\| ! super.equals(obj))
165	return false;
166	RuleBasedCollator rbc = (RuleBasedCollator) obj;
167	// FIXME: this is probably wrong. Instead we should compare maps
168	// directly.
169	return (frenchAccents == rbc.frenchAccents
170	&& rules.equals(rbc.rules));
171	}
172
173	public CollationElementIterator getCollationElementIterator (String source)
174	{
175	StringBuffer expand = new StringBuffer (source.length());
176	int max = source.length();
177	for (int i = 0; i < max; ++i)
178	decomposeCharacter (source.charAt(i), expand);
179	return new CollationElementIterator (expand.toString(), this);
180	}
181
182	public CollationElementIterator getCollationElementIterator (CharacterIterator source)
183	{
184	StringBuffer expand = new StringBuffer ();
185	for (char c = source.first ();
186	c != CharacterIterator.DONE;
187	c = source.next ())
188	decomposeCharacter (c, expand);
189
190	return new CollationElementIterator (expand.toString(), this);
191	}
192
193	public CollationKey getCollationKey (String source)
194	{
195	return new CollationKey (getCollationElementIterator (source), source,
196	strength);
197	}
198
199	public String getRules ()
200	{
201	return rules;
202	}
203
204	public int hashCode ()
205	{
206	return (frenchAccents ? 1231 : 1237
207	^ rules.hashCode()
208	^ map.hashCode()
209	^ prefixes.hashCode());
210	}
211
212	private final boolean is_special (char c)
213	{
214	// Rules from JCL book.
215	return ((c >= 0x0009 && c <= 0x000d)
216	\|\| (c >= 0x0020 && c <= 0x002f)
217	\|\| (c >= 0x003a && c <= 0x0040)
218	\|\| (c >= 0x005b && c <= 0x0060)
219	\|\| (c >= 0x007b && c <= 0x007e));
220	}
221
222	private final int text_argument (String rules, int index,
223	StringBuffer result)
224	{
225	result.setLength(0);
226	int len = rules.length();
227	while (index < len)
228	{
229	char c = rules.charAt(index);
230	if (c == '\'' && index + 2 < len
231	&& rules.charAt(index + 2) == '\''
232	&& is_special (rules.charAt(index + 1)))
233	index += 2;
234	else if (is_special (c) \|\| Character.isWhitespace(c))
235	return index;
236	result.append(c);
237	++index;
238	}
239	return index;
240	}
241
242	public RuleBasedCollator (String rules) throws ParseException
243	{
244	this.rules = rules;
245	this.frenchAccents = false;
246
247	// We keep each rule in order in a vector. At the end we traverse
248	// the vector and compute collation values from it.
249	int insertion_index = 0;
250	Vector vec = new Vector ();
251
252	StringBuffer argument = new StringBuffer ();
253
254	int len = rules.length();
255	for (int index = 0; index < len; ++index)
256	{
257	char c = rules.charAt(index);
258
259	// Just skip whitespace.
260	if (Character.isWhitespace(c))
261	continue;
262
263	// Modifier.
264	if (c == '@')
265	{
266	frenchAccents = true;
267	continue;
268	}
269
270	// Check for relation or reset operator.
271	if (! (c == '<' \|\| c == ';' \|\| c == ',' \|\| c == '=' \|\| c == '&'))
272	throw new ParseException ("invalid character", index);
273
274	++index;
275	while (index < len)
276	{
277	if (! Character.isWhitespace(rules.charAt(index)))
278	break;
279	++index;
280	}
281	if (index == len)
282	throw new ParseException ("missing argument", index);
283
284	int save = index;
285	index = text_argument (rules, index, argument);
286	if (argument.length() == 0)
287	throw new ParseException ("invalid character", save);
288	String arg = argument.toString();
289	int item_index = vec.indexOf(arg);
290	if (c != '&')
291	{
292	// If the argument already appears in the vector, then we
293	// must remove it in order to re-order.
294	if (item_index != -1)
295	{
296	vec.removeElementAt(item_index);
297	if (insertion_index >= item_index)
298	--insertion_index;
299	}
300	RBCElement r = new RBCElement (arg, c);
301	vec.insertElementAt(r, insertion_index);
302	++insertion_index;
303	}
304	else
305	{
306	// Reset.
307	if (item_index == -1)
308	throw
309	new ParseException ("argument to reset not previously seen",
310	save);
311	insertion_index = item_index + 1;
312	}
313
314	// Ugly: in this case the resulting INDEX comes from
315	// text_argument, which returns the index of the next
316	// character we should examine.
317	--index;
318	}
319
320	// Now construct a hash table that maps strings onto their
321	// collation values.
322	int primary = 0;
323	int secondary = 0;
324	int tertiary = 0;
325	this.map = new Hashtable ();
326	this.prefixes = new Hashtable ();
327	Enumeration e = vec.elements();
328	while (e.hasMoreElements())
329	{
330	RBCElement r = (RBCElement) e.nextElement();
331	switch (r.relation)
332	{
333	case '<':
334	++primary;
335	secondary = 0;
336	tertiary = 0;
337	break;
338	case ';':
339	++secondary;
340	tertiary = 0;
341	break;
342	case ',':
343	++tertiary;
344	break;
345	case '=':
346	break;
347	}
348	// This must match CollationElementIterator.
349	map.put(r.key, new Integer (primary << 16
350	\| secondary << 8 \| tertiary));
351
352	// Make a map of all lookaheads we might need.
353	for (int i = r.key.length() - 1; i >= 1; --i)
354	prefixes.put(r.key.substring(0, i), Boolean.TRUE);
355	}
356	}
357
358	// True if we are using French-style accent ordering.
359	private boolean frenchAccents;
360
361	// It's easier to just save the rules than to try to recreate them.
362	private String rules;
363
364	// This maps strings onto collation values.
365	private Hashtable map;
366	// An entry in this hash means that more lookahead is required for
367	// the prefix string.
368	private Hashtable prefixes;
369	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/src/gcc/libjava/java/text/RuleBasedCollator.java@ 1389

Download in other formats: