Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

qregexp.cpp@ 240

Last change on this file since 240 was 2, checked in by Dmitry A. Kuminov, 16 years ago
Initially imported qt-all-opensource-src-4.5.1 from Trolltech.
File size: 125.9 KB

Line
1	/****************************************************************************
2	**
3	** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4	** Contact: Qt Software Information ([email protected])
5	**
6	** This file is part of the QtCore module of the Qt Toolkit.
7	**
8	** $QT_BEGIN_LICENSE:LGPL$
9	** Commercial Usage
10	** Licensees holding valid Qt Commercial licenses may use this file in
11	** accordance with the Qt Commercial License Agreement provided with the
12	** Software or, alternatively, in accordance with the terms contained in
13	** a written agreement between you and Nokia.
14	**
15	** GNU Lesser General Public License Usage
16	** Alternatively, this file may be used under the terms of the GNU Lesser
17	** General Public License version 2.1 as published by the Free Software
18	** Foundation and appearing in the file LICENSE.LGPL included in the
19	** packaging of this file. Please review the following information to
20	** ensure the GNU Lesser General Public License version 2.1 requirements
21	** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22	**
23	** In addition, as a special exception, Nokia gives you certain
24	** additional rights. These rights are described in the Nokia Qt LGPL
25	** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26	** package.
27	**
28	** GNU General Public License Usage
29	** Alternatively, this file may be used under the terms of the GNU
30	** General Public License version 3.0 as published by the Free Software
31	** Foundation and appearing in the file LICENSE.GPL included in the
32	** packaging of this file. Please review the following information to
33	** ensure the GNU General Public License version 3.0 requirements will be
34	** met: http://www.gnu.org/copyleft/gpl.html.
35	**
36	** If you are unsure which license is appropriate for your use, please
37	** contact the sales department at [email protected].
38	** $QT_END_LICENSE$
39	**
40	****************************************************************************/
41
42	#include "qregexp.h"
43
44	#include "qalgorithms.h"
45	#include "qbitarray.h"
46	#include "qcache.h"
47	#include "qdatastream.h"
48	#include "qlist.h"
49	#include "qmap.h"
50	#include "qmutex.h"
51	#include "qstring.h"
52	#include "qstringlist.h"
53	#include "qstringmatcher.h"
54	#include "qvector.h"
55
56	#include <limits.h>
57
58	QT_BEGIN_NAMESPACE
59
60	int qFindString(const QChar *haystack, int haystackLen, int from,
61	const QChar *needle, int needleLen, Qt::CaseSensitivity cs);
62
63	// error strings for the regexp parser
64	#define RXERR_OK QT_TRANSLATE_NOOP("QRegExp", "no error occurred")
65	#define RXERR_DISABLED QT_TRANSLATE_NOOP("QRegExp", "disabled feature used")
66	#define RXERR_CHARCLASS QT_TRANSLATE_NOOP("QRegExp", "bad char class syntax")
67	#define RXERR_LOOKAHEAD QT_TRANSLATE_NOOP("QRegExp", "bad lookahead syntax")
68	#define RXERR_REPETITION QT_TRANSLATE_NOOP("QRegExp", "bad repetition syntax")
69	#define RXERR_OCTAL QT_TRANSLATE_NOOP("QRegExp", "invalid octal value")
70	#define RXERR_LEFTDELIM QT_TRANSLATE_NOOP("QRegExp", "missing left delim")
71	#define RXERR_END QT_TRANSLATE_NOOP("QRegExp", "unexpected end")
72	#define RXERR_LIMIT QT_TRANSLATE_NOOP("QRegExp", "met internal limit")
73
74	/*
75	WARNING! Be sure to read qregexp.tex before modifying this file.
76	*/
77
78	/*!
79	\class QRegExp
80	\reentrant
81	\brief The QRegExp class provides pattern matching using regular expressions.
82
83	\ingroup tools
84	\ingroup misc
85	\ingroup shared
86	\mainclass
87	\keyword regular expression
88
89	A regular expression, or "regexp", is a pattern for matching
90	substrings in a text. This is useful in many contexts, e.g.,
91
92	\table
93	\row \i Validation
94	\i A regexp can test whether a substring meets some criteria,
95	e.g. is an integer or contains no whitespace.
96	\row \i Searching
97	\i A regexp provides more powerful pattern matching than
98	simple substring matching, e.g., match one of the words
99	\e{mail}, \e{letter} or \e{correspondence}, but none of the
100	words \e{email}, \e{mailman}, \e{mailer}, \e{letterbox}, etc.
101	\row \i Search and Replace
102	\i A regexp can replace all occurrences of a substring with a
103	different substring, e.g., replace all occurrences of \e{&}
104	with \e{\&} except where the \e{&} is already followed by
105	an \e{amp;}.
106	\row \i String Splitting
107	\i A regexp can be used to identify where a string should be
108	split apart, e.g. splitting tab-delimited strings.
109	\endtable
110
111	A brief introduction to regexps is presented, a description of
112	Qt's regexp language, some examples, and the function
113	documentation itself. QRegExp is modeled on Perl's regexp
114	language. It fully supports Unicode. QRegExp can also be used in a
115	simpler, \e{wildcard mode} that is similar to the functionality
116	found in command shells. The syntax rules used by QRegExp can be
117	changed with setPatternSyntax(). In particular, the pattern syntax
118	can be set to QRegExp::FixedString, which means the pattern to be
119	matched is interpreted as a plain string, i.e., special characters
120	(e.g., backslash) are not escaped.
121
122	A good text on regexps is \e {Mastering Regular Expressions}
123	(Third Edition) by Jeffrey E. F. Friedl, ISBN 0-596-52812-4.
124
125	\tableofcontents
126
127	\section1 Introduction
128
129	Regexps are built up from expressions, quantifiers, and
130	assertions. The simplest expression is a character, e.g. \bold{x}
131	or \bold{5}. An expression can also be a set of characters
132	enclosed in square brackets. \bold{[ABCD]} will match an \bold{A}
133	or a \bold{B} or a \bold{C} or a \bold{D}. We can write this same
134	expression as \bold{[A-D]}, and an experession to match any
135	captital letter in the English alphabet is written as
136	\bold{[A-Z]}.
137
138	A quantifier specifies the number of occurrences of an expression
139	that must be matched. \bold{x{1,1}} means match one and only one
140	\bold{x}. \bold{x{1,5}} means match a sequence of \bold{x}
141	characters that contains at least one \bold{x} but no more than
142	five.
143
144	Note that in general regexps cannot be used to check for balanced
145	brackets or tags. For example, a regexp can be written to match an
146	opening html \c{<b>} and its closing \c{</b>}, if the \c{<b>} tags
147	are not nested, but if the \c{<b>} tags are nested, that same
148	regexp will match an opening \c{<b>} tag with the wrong closing
149	\c{</b>}. For the fragment \c{<b>bold <b>bolder</b></b>}, the
150	first \c{<b>} would be matched with the first \c{</b>}, which is
151	not correct. However, it is possible to write a regexp that will
152	match nested brackets or tags correctly, but only if the number of
153	nesting levels is fixed and known. If the number of nesting levels
154	is not fixed and known, it is impossible to write a regexp that
155	will not fail.
156
157	Suppose we want a regexp to match integers in the range 0 to 99.
158	At least one digit is required, so we start with the expression
159	\bold{[0-9]{1,1}}, which matches a single digit exactly once. This
160	regexp matches integers in the range 0 to 9. To match integers up
161	to 99, increase the maximum number of occurrences to 2, so the
162	regexp becomes \bold{[0-9]{1,2}}. This regexp satisfies the
163	original requirement to match integers from 0 to 99, but it will
164	also match integers that occur in the middle of strings. If we
165	want the matched integer to be the whole string, we must use the
166	anchor assertions, \bold{^} (caret) and \bold{$} (dollar). When
167	\bold{^} is the first character in a regexp, it means the regexp
168	must match from the beginning of the string. When \bold{$} is the
169	last character of the regexp, it means the regexp must match to
170	the end of the string. The regexp becomes \bold{^[0-9]{1,2}$}.
171	Note that assertions, e.g. \bold{^} and \bold{$}, do not match
172	characters but locations in the string.
173
174	If you have seen regexps described elsewhere, they may have looked
175	different from the ones shown here. This is because some sets of
176	characters and some quantifiers are so common that they have been
177	given special symbols to represent them. \bold{[0-9]} can be
178	replaced with the symbol \bold{\\d}. The quantifier to match
179	exactly one occurrence, \bold{{1,1}}, can be replaced with the
180	expression itself, i.e. \bold{x{1,1}} is the same as \bold{x}. So
181	our 0 to 99 matcher could be written as \bold{^\\d{1,2}$}. It can
182	also be written \bold{^\\d\\d{0,1}$}, i.e. \e{From the start of
183	the string, match a digit, followed immediately by 0 or 1 digits}.
184	In practice, it would be written as \bold{^\\d\\d?$}. The \bold{?}
185	is shorthand for the quantifier \bold{{0,1}}, i.e. 0 or 1
186	occurrences. \bold{?} makes an expression optional. The regexp
187	\bold{^\\d\\d?$} means \e{From the beginning of the string, match
188	one digit, followed immediately by 0 or 1 more digit, followed
189	immediately by end of string}.
190
191	To write a regexp that matches one of the words 'mail' \e or
192	'letter' \e or 'correspondence' but does not match words that
193	contain these words, e.g., 'email', 'mailman', 'mailer', and
194	'letterbox', start with a regexp that matches 'mail'. Expressed
195	fully, the regexp is \bold{m{1,1}a{1,1}i{1,1}l{1,1}}, but because
196	a character expression is automatically quantified by
197	\bold{{1,1}}, we can simplify the regexp to \bold{mail}, i.e., an
198	'm' followed by an 'a' followed by an 'i' followed by an 'l'. Now
199	we can use the vertical bar \bold{\|}, which means \bold{or}, to
200	include the other two words, so our regexp for matching any of the
201	three words becomes \bold{mail\|letter\|correspondence}. Match
202	'mail' \bold{or} 'letter' \bold{or} 'correspondence'. While this
203	regexp will match one of the three words we want to match, it will
204	also match words we don't want to match, e.g., 'email'. To
205	prevent the regexp from matching unwanted words, we must tell it
206	to begin and end the match at word boundaries. First we enclose
207	our regexp in parentheses, \bold{(mail\|letter\|correspondence)}.
208	Parentheses group expressions together, and they identify a part
209	of the regexp that we wish to \l{capturing text}{capture}.
210	Enclosing the expression in parentheses allows us to use it as a
211	component in more complex regexps. It also allows us to examine
212	which of the three words was actually matched. To force the match
213	to begin and end on word boundaries, we enclose the regexp in
214	\bold{\\b} \e{word boundary} assertions:
215	\bold{\\b(mail\|letter\|correspondence)\\b}. Now the regexp means:
216	\e{Match a word boundary, followed by the regexp in parentheses,
217	followed by a word boundary}. The \bold{\\b} assertion matches a
218	\e position in the regexp, not a \e character. A word boundary is
219	any non-word character, e.g., a space, newline, or the beginning
220	or ending of a string.
221
222	If we want to replace ampersand characters with the HTML entity
223	\bold{\&}, the regexp to match is simply \bold{\&}. But this
224	regexp will also match ampersands that have already been converted
225	to HTML entities. We want to replace only ampersands that are not
226	already followed by \bold{amp;}. For this, we need the negative
227	lookahead assertion, \bold{(?!}__\bold{)}. The regexp can then be
228	written as \bold{\&(?!amp;)}, i.e. \e{Match an ampersand that is}
229	\bold{not} \e{followed by} \bold{amp;}.
230
231	If we want to count all the occurrences of 'Eric' and 'Eirik' in a
232	string, two valid solutions are \bold{\\b(Eric\|Eirik)\\b} and
233	\bold{\\bEi?ri[ck]\\b}. The word boundary assertion '\\b' is
234	required to avoid matching words that contain either name,
235	e.g. 'Ericsson'. Note that the second regexp matches more
236	spellings than we want: 'Eric', 'Erik', 'Eiric' and 'Eirik'.
237
238	Some of the examples discussed above are implemented in the
239	\link #code-examples code examples \endlink section.
240
241	\target characters-and-abbreviations-for-sets-of-characters
242	\section1 Characters and Abbreviations for Sets of Characters
243
244	\table
245	\header \i Element \i Meaning
246	\row \i \bold{c}
247	\i A character represents itself unless it has a special
248	regexp meaning. e.g. \bold{c} matches the character \e c.
249	\row \i \bold{\\c}
250	\i A character that follows a backslash matches the character
251	itself, except as specified below. e.g., To match a literal
252	caret at the beginning of a string, write \bold{\\^}.
253	\row \i \bold{\\a}
254	\i Matches the ASCII bell (BEL, 0x07).
255	\row \i \bold{\\f}
256	\i Matches the ASCII form feed (FF, 0x0C).
257	\row \i \bold{\\n}
258	\i Matches the ASCII line feed (LF, 0x0A, Unix newline).
259	\row \i \bold{\\r}
260	\i Matches the ASCII carriage return (CR, 0x0D).
261	\row \i \bold{\\t}
262	\i Matches the ASCII horizontal tab (HT, 0x09).
263	\row \i \bold{\\v}
264	\i Matches the ASCII vertical tab (VT, 0x0B).
265	\row \i \bold{\\x\e{hhhh}}
266	\i Matches the Unicode character corresponding to the
267	hexadecimal number \e{hhhh} (between 0x0000 and 0xFFFF).
268	\row \i \bold{\\0\e{ooo}} (i.e., \\zero \e{ooo})
269	\i matches the ASCII/Latin1 character for the octal number
270	\e{ooo} (between 0 and 0377).
271	\row \i \bold{. (dot)}
272	\i Matches any character (including newline).
273	\row \i \bold{\\d}
274	\i Matches a digit (QChar::isDigit()).
275	\row \i \bold{\\D}
276	\i Matches a non-digit.
277	\row \i \bold{\\s}
278	\i Matches a whitespace character (QChar::isSpace()).
279	\row \i \bold{\\S}
280	\i Matches a non-whitespace character.
281	\row \i \bold{\\w}
282	\i Matches a word character (QChar::isLetterOrNumber(), QChar::isMark(), or '_').
283	\row \i \bold{\\W}
284	\i Matches a non-word character.
285	\row \i \bold{\\\e{n}}
286	\i The \e{n}-th \l backreference, e.g. \\1, \\2, etc.
287	\endtable
288
289	\bold{Note:} The C++ compiler transforms backslashes in strings.
290	To include a \bold{\\} in a regexp, enter it twice, i.e. \c{\\}.
291	To match the backslash character itself, enter it four times, i.e.
292	\c{\\\\}.
293
294	\target sets-of-characters
295	\section1 Sets of Characters
296
297	Square brackets mean match any character contained in the square
298	brackets. The character set abbreviations described above can
299	appear in a character set in square brackets. Except for the
300	character set abbreviations and the following two exceptions,
301	characters do not have special meanings in square brackets.
302
303	\table
304	\row \i \bold{^}
305
306	\i The caret negates the character set if it occurs as the
307	first character (i.e. immediately after the opening square
308	bracket). \bold{[abc]} matches 'a' or 'b' or 'c', but
309	\bold{[^abc]} matches anything \e but 'a' or 'b' or 'c'.
310
311	\row \i \bold{-}
312
313	\i The dash indicates a range of characters. \bold{[W-Z]}
314	matches 'W' or 'X' or 'Y' or 'Z'.
315
316	\endtable
317
318	Using the predefined character set abbreviations is more portable
319	than using character ranges across platforms and languages. For
320	example, \bold{[0-9]} matches a digit in Western alphabets but
321	\bold{\\d} matches a digit in \e any alphabet.
322
323	Note: In other regexp documentation, sets of characters are often
324	called "character classes".
325
326	\target quantifiers
327	\section1 Quantifiers
328
329	By default, an expression is automatically quantified by
330	\bold{{1,1}}, i.e. it should occur exactly once. In the following
331	list, \bold{\e {E}} stands for expression. An expression is a
332	character, or an abbreviation for a set of characters, or a set of
333	characters in square brackets, or an expression in parentheses.
334
335	\table
336	\row \i \bold{\e {E}?}
337
338	\i Matches zero or one occurrences of \e E. This quantifier
339	means \e{The previous expression is optional}, because it
340	will match whether or not the expression is found. \bold{\e
341	{E}?} is the same as \bold{\e {E}{0,1}}. e.g., \bold{dents?}
342	matches 'dent' or 'dents'.
343
344	\row \i \bold{\e {E}+}
345
346	\i Matches one or more occurrences of \e E. \bold{\e {E}+} is
347	the same as \bold{\e {E}{1,}}. e.g., \bold{0+} matches '0',
348	'00', '000', etc.
349
350	\row \i \bold{\e {E}*}
351
352	\i Matches zero or more occurrences of \e E. It is the same
353	as \bold{\e {E}{0,}}. The \bold{*} quantifier is often used
354	in error where \bold{+} should be used. For example, if
355	\bold{\\s*$} is used in an expression to match strings that
356	end in whitespace, it will match every string because
357	\bold{\\s*$} means \e{Match zero or more whitespaces followed
358	by end of string}. The correct regexp to match strings that
359	have at least one trailing whitespace character is
360	\bold{\\s+$}.
361
362	\row \i \bold{\e {E}{n}}
363
364	\i Matches exactly \e n occurrences of \e E. \bold{\e {E}{n}}
365	is the same as repeating \e E \e n times. For example,
366	\bold{x{5}} is the same as \bold{xxxxx}. It is also the same
367	as \bold{\e {E}{n,n}}, e.g. \bold{x{5,5}}.
368
369	\row \i \bold{\e {E}{n,}}
370	\i Matches at least \e n occurrences of \e E.
371
372	\row \i \bold{\e {E}{,m}}
373	\i Matches at most \e m occurrences of \e E. \bold{\e {E}{,m}}
374	is the same as \bold{\e {E}{0,m}}.
375
376	\row \i \bold{\e {E}{n,m}}
377	\i Matches at least \e n and at most \e m occurrences of \e E.
378	\endtable
379
380	To apply a quantifier to more than just the preceding character,
381	use parentheses to group characters together in an expression. For
382	example, \bold{tag+} matches a 't' followed by an 'a' followed by
383	at least one 'g', whereas \bold{(tag)+} matches at least one
384	occurrence of 'tag'.
385
386	Note: Quantifiers are normally "greedy". They always match as much
387	text as they can. For example, \bold{0+} matches the first zero it
388	finds and all the consecutive zeros after the first zero. Applied
389	to '20005', it matches'2\underline{000}5'. Quantifiers can be made
390	non-greedy, see setMinimal().
391
392	\target capturing parentheses
393	\target backreferences
394	\section1 Capturing Text
395
396	Parentheses allow us to group elements together so that we can
397	quantify and capture them. For example if we have the expression
398	\bold{mail\|letter\|correspondence} that matches a string we know
399	that \e one of the words matched but not which one. Using
400	parentheses allows us to "capture" whatever is matched within
401	their bounds, so if we used \bold{(mail\|letter\|correspondence)}
402	and matched this regexp against the string "I sent you some email"
403	we can use the cap() or capturedTexts() functions to extract the
404	matched characters, in this case 'mail'.
405
406	We can use captured text within the regexp itself. To refer to the
407	captured text we use \e backreferences which are indexed from 1,
408	the same as for cap(). For example we could search for duplicate
409	words in a string using \bold{\\b(\\w+)\\W+\\1\\b} which means match a
410	word boundary followed by one or more word characters followed by
411	one or more non-word characters followed by the same text as the
412	first parenthesized expression followed by a word boundary.
413
414	If we want to use parentheses purely for grouping and not for
415	capturing we can use the non-capturing syntax, e.g.
416	\bold{(?:green\|blue)}. Non-capturing parentheses begin '(?:' and
417	end ')'. In this example we match either 'green' or 'blue' but we
418	do not capture the match so we only know whether or not we matched
419	but not which color we actually found. Using non-capturing
420	parentheses is more efficient than using capturing parentheses
421	since the regexp engine has to do less book-keeping.
422
423	Both capturing and non-capturing parentheses may be nested.
424
425	\target greedy quantifiers
426
427	For historical reasons, quantifiers (e.g. \bold{*}) that apply to
428	capturing parentheses are more "greedy" than other quantifiers.
429	For example, \bold{a(a)} will match "aaa" with cap(1) == "aaa".
430	This behavior is different from what other regexp engines do
431	(notably, Perl). To obtain a more intuitive capturing behavior,
432	specify QRegExp::RegExp2 to the QRegExp constructor or call
433	setPatternSyntax(QRegExp::RegExp2).
434
435	\target cap_in_a_loop
436
437	When the number of matches cannot be determined in advance, a
438	common idiom is to use cap() in a loop. For example:
439
440	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 0
441
442	\target assertions
443	\section1 Assertions
444
445	Assertions make some statement about the text at the point where
446	they occur in the regexp but they do not match any characters. In
447	the following list \bold{\e {E}} stands for any expression.
448
449	\table
450	\row \i \bold{^}
451	\i The caret signifies the beginning of the string. If you
452	wish to match a literal \c{^} you must escape it by
453	writing \c{\\^}. For example, \bold{^#include} will only
454	match strings which \e begin with the characters '#include'.
455	(When the caret is the first character of a character set it
456	has a special meaning, see \link #sets-of-characters Sets of
457	Characters \endlink.)
458
459	\row \i \bold{$}
460	\i The dollar signifies the end of the string. For example
461	\bold{\\d\\s*$} will match strings which end with a digit
462	optionally followed by whitespace. If you wish to match a
463	literal \c{$} you must escape it by writing
464	\c{\\$}.
465
466	\row \i \bold{\\b}
467	\i A word boundary. For example the regexp
468	\bold{\\bOK\\b} means match immediately after a word
469	boundary (e.g. start of string or whitespace) the letter 'O'
470	then the letter 'K' immediately before another word boundary
471	(e.g. end of string or whitespace). But note that the
472	assertion does not actually match any whitespace so if we
473	write \bold{(\\bOK\\b)} and we have a match it will only
474	contain 'OK' even if the string is "It's \underline{OK} now".
475
476	\row \i \bold{\\B}
477	\i A non-word boundary. This assertion is true wherever
478	\bold{\\b} is false. For example if we searched for
479	\bold{\\Bon\\B} in "Left on" the match would fail (space
480	and end of string aren't non-word boundaries), but it would
481	match in "t\underline{on}ne".
482
483	\row \i \bold{(?=\e E)}
484	\i Positive lookahead. This assertion is true if the
485	expression matches at this point in the regexp. For example,
486	\bold{const(?=\\s+char)} matches 'const' whenever it is
487	followed by 'char', as in 'static \underline{const} char *'.
488	(Compare with \bold{const\\s+char}, which matches 'static
489	\underline{const char} *'.)
490
491	\row \i \bold{(?!\e E)}
492	\i Negative lookahead. This assertion is true if the
493	expression does not match at this point in the regexp. For
494	example, \bold{const(?!\\s+char)} matches 'const' \e except
495	when it is followed by 'char'.
496	\endtable
497
498	\keyword QRegExp wildcard matching
499	\section1 Wildcard Matching
500
501	Most command shells such as \e bash or \e cmd.exe support "file
502	globbing", the ability to identify a group of files by using
503	wildcards. The setPatternSyntax() function is used to switch
504	between regexp and wildcard mode. Wildcard matching is much
505	simpler than full regexps and has only four features:
506
507	\table
508	\row \i \bold{c}
509	\i Any character represents itself apart from those mentioned
510	below. Thus \bold{c} matches the character \e c.
511	\row \i \bold{?}
512	\i Matches any single character. It is the same as
513	\bold{.} in full regexps.
514	\row \i \bold{*}
515	\i Matches zero or more of any characters. It is the
516	same as \bold{.*} in full regexps.
517	\row \i \bold{[...]}
518	\i Sets of characters can be represented in square brackets,
519	similar to full regexps. Within the character class, like
520	outside, backslash has no special meaning.
521	\endtable
522
523	For example if we are in wildcard mode and have strings which
524	contain filenames we could identify HTML files with \bold{*.html}.
525	This will match zero or more characters followed by a dot followed
526	by 'h', 't', 'm' and 'l'.
527
528	To test a string against a wildcard expression, use exactMatch().
529	For example:
530
531	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 1
532
533	\target perl-users
534	\section1 Notes for Perl Users
535
536	Most of the character class abbreviations supported by Perl are
537	supported by QRegExp, see \link
538	#characters-and-abbreviations-for-sets-of-characters characters
539	and abbreviations for sets of characters \endlink.
540
541	In QRegExp, apart from within character classes, \c{^} always
542	signifies the start of the string, so carets must always be
543	escaped unless used for that purpose. In Perl the meaning of caret
544	varies automagically depending on where it occurs so escaping it
545	is rarely necessary. The same applies to \c{$} which in
546	QRegExp always signifies the end of the string.
547
548	QRegExp's quantifiers are the same as Perl's greedy quantifiers
549	(but see the \l{greedy quantifiers}{note above}). Non-greedy
550	matching cannot be applied to individual quantifiers, but can be
551	applied to all the quantifiers in the pattern. For example, to
552	match the Perl regexp \bold{ro+?m} requires:
553
554	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 2
555
556	The equivalent of Perl's \c{/i} option is
557	setCaseSensitivity(Qt::CaseInsensitive).
558
559	Perl's \c{/g} option can be emulated using a \l{#cap_in_a_loop}{loop}.
560
561	In QRegExp \bold{.} matches any character, therefore all QRegExp
562	regexps have the equivalent of Perl's \c{/s} option. QRegExp
563	does not have an equivalent to Perl's \c{/m} option, but this
564	can be emulated in various ways for example by splitting the input
565	into lines or by looping with a regexp that searches for newlines.
566
567	Because QRegExp is string oriented, there are no \\A, \\Z, or \\z
568	assertions. The \\G assertion is not supported but can be emulated
569	in a loop.
570
571	Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp
572	equivalents for $`, $' or $+. Perl's capturing variables, $1, $2,
573	... correspond to cap(1) or capturedTexts()[1], cap(2) or
574	capturedTexts()[2], etc.
575
576	To substitute a pattern use QString::replace().
577
578	Perl's extended \c{/x} syntax is not supported, nor are
579	directives, e.g. (?i), or regexp comments, e.g. (?#comment). On
580	the other hand, C++'s rules for literal strings can be used to
581	achieve the same:
582
583	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 3
584
585	Both zero-width positive and zero-width negative lookahead
586	assertions (?=pattern) and (?!pattern) are supported with the same
587	syntax as Perl. Perl's lookbehind assertions, "independent"
588	subexpressions and conditional expressions are not supported.
589
590	Non-capturing parentheses are also supported, with the same
591	(?:pattern) syntax.
592
593	See QString::split() and QStringList::join() for equivalents
594	to Perl's split and join functions.
595
596	Note: because C++ transforms \\'s they must be written \e twice in
597	code, e.g. \bold{\\b} must be written \bold{\\\\b}.
598
599	\target code-examples
600	\section1 Code Examples
601
602	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 4
603
604	The third string matches '\underline{6}'. This is a simple validation
605	regexp for integers in the range 0 to 99.
606
607	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 5
608
609	The second string matches '\underline{This_is-OK}'. We've used the
610	character set abbreviation '\\S' (non-whitespace) and the anchors
611	to match strings which contain no whitespace.
612
613	In the following example we match strings containing 'mail' or
614	'letter' or 'correspondence' but only match whole words i.e. not
615	'email'
616
617	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 6
618
619	The second string matches "Please write the \underline{letter}". The
620	word 'letter' is also captured (because of the parentheses). We
621	can see what text we've captured like this:
622
623	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 7
624
625	This will capture the text from the first set of capturing
626	parentheses (counting capturing left parentheses from left to
627	right). The parentheses are counted from 1 since cap(0) is the
628	whole matched regexp (equivalent to '&' in most regexp engines).
629
630	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 8
631
632	Here we've passed the QRegExp to QString's replace() function to
633	replace the matched text with new text.
634
635	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 9
636
637	We've used the indexIn() function to repeatedly match the regexp in
638	the string. Note that instead of moving forward by one character
639	at a time \c pos++ we could have written \c {pos +=
640	rx.matchedLength()} to skip over the already matched string. The
641	count will equal 3, matching 'One \underline{Eric} another
642	\underline{Eirik}, and an Ericsson. How many Eiriks, \underline{Eric}?'; it
643	doesn't match 'Ericsson' or 'Eiriks' because they are not bounded
644	by non-word boundaries.
645
646	One common use of regexps is to split lines of delimited data into
647	their component fields.
648
649	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 10
650
651	In this example our input lines have the format company name, web
652	address and country. Unfortunately the regexp is rather long and
653	not very versatile -- the code will break if we add any more
654	fields. A simpler and better solution is to look for the
655	separator, '\\t' in this case, and take the surrounding text. The
656	QString::split() function can take a separator string or regexp
657	as an argument and split a string accordingly.
658
659	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 11
660
661	Here field[0] is the company, field[1] the web address and so on.
662
663	To imitate the matching of a shell we can use wildcard mode.
664
665	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 12
666
667	Wildcard matching can be convenient because of its simplicity, but
668	any wildcard regexp can be defined using full regexps, e.g.
669	\bold{.*\.html$}. Notice that we can't match both \c .html and \c
670	.htm files with a wildcard unless we use \bold{.htm} which will
671	also match 'test.html.bak'. A full regexp gives us the precision
672	we need, \bold{.*\\.html?$}.
673
674	QRegExp can match case insensitively using setCaseSensitivity(),
675	and can use non-greedy matching, see setMinimal(). By
676	default QRegExp uses full regexps but this can be changed with
677	setWildcard(). Searching can be forward with indexIn() or backward
678	with lastIndexIn(). Captured text can be accessed using
679	capturedTexts() which returns a string list of all captured
680	strings, or using cap() which returns the captured string for the
681	given index. The pos() function takes a match index and returns
682	the position in the string where the match was made (or -1 if
683	there was no match).
684
685	\sa QString, QStringList, QRegExpValidator, QSortFilterProxyModel,
686	{tools/regexp}{Regular Expression Example}
687	*/
688
689	const int NumBadChars = 64;
690	#define BadChar(ch) ((ch).unicode() % NumBadChars)
691
692	const int NoOccurrence = INT_MAX;
693	const int EmptyCapture = INT_MAX;
694	const int InftyLen = INT_MAX;
695	const int InftyRep = 1025;
696	const int EOS = -1;
697
698	static bool isWord(QChar ch)
699	{
700	return ch.isLetterOrNumber() \|\| ch.isMark() \|\| ch == QLatin1Char('_');
701	}
702
703	/*
704	Merges two vectors of ints and puts the result into the first
705	one.
706	*/
707	static void mergeInto(QVector<int> *a, const QVector<int> &b)
708	{
709	int asize = a->size();
710	int bsize = b.size();
711	if (asize == 0) {
712	*a = b;
713	#ifndef QT_NO_REGEXP_OPTIM
714	} else if (bsize == 1 && a->at(asize - 1) < b.at(0)) {
715	a->resize(asize + 1);
716	(*a)[asize] = b.at(0);
717	#endif
718	} else if (bsize >= 1) {
719	int csize = asize + bsize;
720	QVector<int> c(csize);
721	int i = 0, j = 0, k = 0;
722	while (i < asize) {
723	if (j < bsize) {
724	if (a->at(i) == b.at(j)) {
725	++i;
726	--csize;
727	} else if (a->at(i) < b.at(j)) {
728	c[k++] = a->at(i++);
729	} else {
730	c[k++] = b.at(j++);
731	}
732	} else {
733	memcpy(c.data() + k, a->constData() + i, (asize - i) * sizeof(int));
734	break;
735	}
736	}
737	c.resize(csize);
738	if (j < bsize)
739	memcpy(c.data() + k, b.constData() + j, (bsize - j) * sizeof(int));
740	*a = c;
741	}
742	}
743
744	#ifndef QT_NO_REGEXP_WILDCARD
745	/*
746	Translates a wildcard pattern to an equivalent regular expression
747	pattern (e.g., .cpp to .\.cpp).
748	*/
749	static QString wc2rx(const QString &wc_str)
750	{
751	int wclen = wc_str.length();
752	QString rx;
753	int i = 0;
754	const QChar *wc = wc_str.unicode();
755	while (i < wclen) {
756	QChar c = wc[i++];
757	switch (c.unicode()) {
758	case '*':
759	rx += QLatin1String(".*");
760	break;
761	case '?':
762	rx += QLatin1Char('.');
763	break;
764	case '$':
765	case '(':
766	case ')':
767	case '+':
768	case '.':
769	case '\\':
770	case '^':
771	case '{':
772	case '\|':
773	case '}':
774	rx += QLatin1Char('\\');
775	rx += c;
776	break;
777	case '[':
778	rx += c;
779	if (wc[i] == QLatin1Char('^'))
780	rx += wc[i++];
781	if (i < wclen) {
782	if (rx[i] == QLatin1Char(']'))
783	rx += wc[i++];
784	while (i < wclen && wc[i] != QLatin1Char(']')) {
785	if (wc[i] == QLatin1Char('\\'))
786	rx += QLatin1Char('\\');
787	rx += wc[i++];
788	}
789	}
790	break;
791	default:
792	rx += c;
793	}
794	}
795	return rx;
796	}
797	#endif
798
799	static int caretIndex(int offset, QRegExp::CaretMode caretMode)
800	{
801	if (caretMode == QRegExp::CaretAtZero) {
802	return 0;
803	} else if (caretMode == QRegExp::CaretAtOffset) {
804	return offset;
805	} else { // QRegExp::CaretWontMatch
806	return -1;
807	}
808	}
809
810	/*
811	The QRegExpEngineKey struct uniquely identifies an engine.
812	*/
813	struct QRegExpEngineKey
814	{
815	QString pattern;
816	QRegExp::PatternSyntax patternSyntax;
817	Qt::CaseSensitivity cs;
818
819	inline QRegExpEngineKey(const QString &pattern, QRegExp::PatternSyntax patternSyntax,
820	Qt::CaseSensitivity cs)
821	: pattern(pattern), patternSyntax(patternSyntax), cs(cs) {}
822
823	inline void clear() {
824	pattern.clear();
825	patternSyntax = QRegExp::RegExp;
826	cs = Qt::CaseSensitive;
827	}
828	};
829
830	bool operator==(const QRegExpEngineKey &key1, const QRegExpEngineKey &key2)
831	{
832	return key1.pattern == key2.pattern && key1.patternSyntax == key2.patternSyntax
833	&& key1.cs == key2.cs;
834	}
835
836	class QRegExpEngine;
837
838	//Q_DECLARE_TYPEINFO(QVector<int>, Q_MOVABLE_TYPE);
839
840	/*
841	This is the engine state during matching.
842	*/
843	struct QRegExpMatchState
844	{
845	const QChar *in; // a pointer to the input string data
846	int pos; // the current position in the string
847	int caretPos;
848	int len; // the length of the input string
849	bool minimal; // minimal matching?
850	int *bigArray; // big array holding the data for the next pointers
851	int *inNextStack; // is state is nextStack?
852	int *curStack; // stack of current states
853	int *nextStack; // stack of next states
854	int *curCapBegin; // start of current states' captures
855	int *nextCapBegin; // start of next states' captures
856	int *curCapEnd; // end of current states' captures
857	int *nextCapEnd; // end of next states' captures
858	int *tempCapBegin; // start of temporary captures
859	int *tempCapEnd; // end of temporary captures
860	int *capBegin; // start of captures for a next state
861	int *capEnd; // end of captures for a next state
862	int *slideTab; // bump-along slide table for bad-character heuristic
863	int *captured; // what match() returned last
864	int slideTabSize; // size of slide table
865	int capturedSize;
866	#ifndef QT_NO_REGEXP_BACKREF
867	QList<QVector<int> > sleeping; // list of back-reference sleepers
868	#endif
869	int matchLen; // length of match
870	int oneTestMatchedLen; // length of partial match
871
872	const QRegExpEngine *eng;
873
874	inline QRegExpMatchState() : bigArray(0), captured(0) {}
875	inline ~QRegExpMatchState() { free(bigArray); }
876
877	void drain() { free(bigArray); bigArray = 0; captured = 0; } // to save memory
878	void prepareForMatch(QRegExpEngine *eng);
879	void match(const QChar *str, int len, int pos, bool minimal,
880	bool oneTest, int caretIndex);
881	bool matchHere();
882	bool testAnchor(int i, int a, const int *capBegin);
883	};
884
885	/*
886	The struct QRegExpAutomatonState represents one state in a modified NFA. The
887	input characters matched are stored in the state instead of on
888	the transitions, something possible for an automaton
889	constructed from a regular expression.
890	*/
891	struct QRegExpAutomatonState
892	{
893	#ifndef QT_NO_REGEXP_CAPTURE
894	int atom; // which atom does this state belong to?
895	#endif
896	int match; // what does it match? (see CharClassBit and BackRefBit)
897	QVector<int> outs; // out-transitions
898	QMap<int, int> reenter; // atoms reentered when transiting out
899	QMap<int, int> anchors; // anchors met when transiting out
900
901	inline QRegExpAutomatonState() { }
902	#ifndef QT_NO_REGEXP_CAPTURE
903	inline QRegExpAutomatonState(int a, int m)
904	: atom(a), match(m) { }
905	#else
906	inline QRegExpAutomatonState(int m)
907	: match(m) { }
908	#endif
909	};
910
911	Q_DECLARE_TYPEINFO(QRegExpAutomatonState, Q_MOVABLE_TYPE);
912
913	/*
914	The struct QRegExpCharClassRange represents a range of characters (e.g.,
915	[0-9] denotes range 48 to 57).
916	*/
917	struct QRegExpCharClassRange
918	{
919	ushort from; // 48
920	ushort len; // 10
921	};
922
923	Q_DECLARE_TYPEINFO(QRegExpCharClassRange, Q_PRIMITIVE_TYPE);
924
925	#ifndef QT_NO_REGEXP_CAPTURE
926	/*
927	The struct QRegExpAtom represents one node in the hierarchy of regular
928	expression atoms.
929	*/
930	struct QRegExpAtom
931	{
932	enum { NoCapture = -1, OfficialCapture = -2, UnofficialCapture = -3 };
933
934	int parent; // index of parent in array of atoms
935	int capture; // index of capture, from 1 to ncap - 1
936	};
937
938	Q_DECLARE_TYPEINFO(QRegExpAtom, Q_PRIMITIVE_TYPE);
939	#endif
940
941	struct QRegExpLookahead;
942
943	#ifndef QT_NO_REGEXP_ANCHOR_ALT
944	/*
945	The struct QRegExpAnchorAlternation represents a pair of anchors with
946	OR semantics.
947	*/
948	struct QRegExpAnchorAlternation
949	{
950	int a; // this anchor...
951	int b; // ...or this one
952	};
953
954	Q_DECLARE_TYPEINFO(QRegExpAnchorAlternation, Q_PRIMITIVE_TYPE);
955	#endif
956
957	#ifndef QT_NO_REGEXP_CCLASS
958	/*
959	The class QRegExpCharClass represents a set of characters, such as can
960	be found in regular expressions (e.g., [a-z] denotes the set
961	{a, b, ..., z}).
962	*/
963	class QRegExpCharClass
964	{
965	public:
966	QRegExpCharClass();
967	inline QRegExpCharClass(const QRegExpCharClass &cc) { operator=(cc); }
968
969	QRegExpCharClass &operator=(const QRegExpCharClass &cc);
970
971	void clear();
972	bool negative() const { return n; }
973	void setNegative(bool negative);
974	void addCategories(int cats);
975	void addRange(ushort from, ushort to);
976	void addSingleton(ushort ch) { addRange(ch, ch); }
977
978	bool in(QChar ch) const;
979	#ifndef QT_NO_REGEXP_OPTIM
980	const QVector<int> &firstOccurrence() const { return occ1; }
981	#endif
982
983	#if defined(QT_DEBUG)
984	void dump() const;
985	#endif
986
987	private:
988	int c; // character classes
989	QVector<QRegExpCharClassRange> r; // character ranges
990	bool n; // negative?
991	#ifndef QT_NO_REGEXP_OPTIM
992	QVector<int> occ1; // first-occurrence array
993	#endif
994	};
995	#else
996	struct QRegExpCharClass
997	{
998	int dummy;
999
1000	#ifndef QT_NO_REGEXP_OPTIM
1001	QRegExpCharClass() { occ1.fill(0, NumBadChars); }
1002
1003	const QVector<int> &firstOccurrence() const { return occ1; }
1004	QVector<int> occ1;
1005	#endif
1006	};
1007	#endif
1008
1009	Q_DECLARE_TYPEINFO(QRegExpCharClass, Q_MOVABLE_TYPE);
1010
1011	/*
1012	The QRegExpEngine class encapsulates a modified nondeterministic
1013	finite automaton (NFA).
1014	*/
1015	class QRegExpEngine
1016	{
1017	public:
1018	QRegExpEngine(Qt::CaseSensitivity cs, bool greedyQuantifiers)
1019	: cs(cs), greedyQuantifiers(greedyQuantifiers) { setup(); }
1020
1021	QRegExpEngine(const QRegExpEngineKey &key);
1022	~QRegExpEngine();
1023
1024	bool isValid() const { return valid; }
1025	const QString &errorString() const { return yyError; }
1026	int numCaptures() const { return officialncap; }
1027
1028	int createState(QChar ch);
1029	int createState(const QRegExpCharClass &cc);
1030	#ifndef QT_NO_REGEXP_BACKREF
1031	int createState(int bref);
1032	#endif
1033
1034	void addCatTransitions(const QVector<int> &from, const QVector<int> &to);
1035	#ifndef QT_NO_REGEXP_CAPTURE
1036	void addPlusTransitions(const QVector<int> &from, const QVector<int> &to, int atom);
1037	#endif
1038
1039	#ifndef QT_NO_REGEXP_ANCHOR_ALT
1040	int anchorAlternation(int a, int b);
1041	int anchorConcatenation(int a, int b);
1042	#else
1043	int anchorAlternation(int a, int b) { return a & b; }
1044	int anchorConcatenation(int a, int b) { return a \| b; }
1045	#endif
1046	void addAnchors(int from, int to, int a);
1047
1048	#ifndef QT_NO_REGEXP_OPTIM
1049	void heuristicallyChooseHeuristic();
1050	#endif
1051
1052	#if defined(QT_DEBUG)
1053	void dump() const;
1054	#endif
1055
1056	QAtomicInt ref;
1057
1058	private:
1059	enum { CharClassBit = 0x10000, BackRefBit = 0x20000 };
1060	enum { InitialState = 0, FinalState = 1 };
1061
1062	void setup();
1063	int setupState(int match);
1064
1065	/*
1066	Let's hope that 13 lookaheads and 14 back-references are
1067	enough.
1068	*/
1069	enum { MaxLookaheads = 13, MaxBackRefs = 14 };
1070	enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002, Anchor_Word = 0x00000004,
1071	Anchor_NonWord = 0x00000008, Anchor_FirstLookahead = 0x00000010,
1072	Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads,
1073	Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1,
1074	Anchor_Alternation = unsigned(Anchor_BackRef1Empty) << MaxBackRefs,
1075
1076	Anchor_LookaheadMask = (Anchor_FirstLookahead - 1) ^
1077	((Anchor_FirstLookahead << MaxLookaheads) - 1) };
1078	#ifndef QT_NO_REGEXP_CAPTURE
1079	int startAtom(bool officialCapture);
1080	void finishAtom(int atom, bool needCapture);
1081	#endif
1082
1083	#ifndef QT_NO_REGEXP_LOOKAHEAD
1084	int addLookahead(QRegExpEngine *eng, bool negative);
1085	#endif
1086
1087	#ifndef QT_NO_REGEXP_OPTIM
1088	bool goodStringMatch(QRegExpMatchState &matchState) const;
1089	bool badCharMatch(QRegExpMatchState &matchState) const;
1090	#else
1091	bool bruteMatch(QRegExpMatchState &matchState) const;
1092	#endif
1093
1094	QVector<QRegExpAutomatonState> s; // array of states
1095	#ifndef QT_NO_REGEXP_CAPTURE
1096	QVector<QRegExpAtom> f; // atom hierarchy
1097	int nf; // number of atoms
1098	int cf; // current atom
1099	QVector<int> captureForOfficialCapture;
1100	#endif
1101	int officialncap; // number of captures, seen from the outside
1102	int ncap; // number of captures, seen from the inside
1103	#ifndef QT_NO_REGEXP_CCLASS
1104	QVector<QRegExpCharClass> cl; // array of character classes
1105	#endif
1106	#ifndef QT_NO_REGEXP_LOOKAHEAD
1107	QVector<QRegExpLookahead *> ahead; // array of lookaheads
1108	#endif
1109	#ifndef QT_NO_REGEXP_ANCHOR_ALT
1110	QVector<QRegExpAnchorAlternation> aa; // array of (a, b) pairs of anchors
1111	#endif
1112	#ifndef QT_NO_REGEXP_OPTIM
1113	bool caretAnchored; // does the regexp start with ^?
1114	bool trivial; // is the good-string all that needs to match?
1115	#endif
1116	bool valid; // is the regular expression valid?
1117	Qt::CaseSensitivity cs; // case sensitive?
1118	bool greedyQuantifiers; // RegExp2?
1119	#ifndef QT_NO_REGEXP_BACKREF
1120	int nbrefs; // number of back-references
1121	#endif
1122
1123	#ifndef QT_NO_REGEXP_OPTIM
1124	bool useGoodStringHeuristic; // use goodStringMatch? otherwise badCharMatch
1125
1126	int goodEarlyStart; // the index where goodStr can first occur in a match
1127	int goodLateStart; // the index where goodStr can last occur in a match
1128	QString goodStr; // the string that any match has to contain
1129
1130	int minl; // the minimum length of a match
1131	QVector<int> occ1; // first-occurrence array
1132	#endif
1133
1134	/*
1135	The class Box is an abstraction for a regular expression
1136	fragment. It can also be seen as one node in the syntax tree of
1137	a regular expression with synthetized attributes.
1138
1139	Its interface is ugly for performance reasons.
1140	*/
1141	class Box
1142	{
1143	public:
1144	Box(QRegExpEngine *engine);
1145	Box(const Box &b) { operator=(b); }
1146
1147	Box &operator=(const Box &b);
1148
1149	void clear() { operator=(Box(eng)); }
1150	void set(QChar ch);
1151	void set(const QRegExpCharClass &cc);
1152	#ifndef QT_NO_REGEXP_BACKREF
1153	void set(int bref);
1154	#endif
1155
1156	void cat(const Box &b);
1157	void orx(const Box &b);
1158	void plus(int atom);
1159	void opt();
1160	void catAnchor(int a);
1161	#ifndef QT_NO_REGEXP_OPTIM
1162	void setupHeuristics();
1163	#endif
1164
1165	#if defined(QT_DEBUG)
1166	void dump() const;
1167	#endif
1168
1169	private:
1170	void addAnchorsToEngine(const Box &to) const;
1171
1172	QRegExpEngine *eng; // the automaton under construction
1173	QVector<int> ls; // the left states (firstpos)
1174	QVector<int> rs; // the right states (lastpos)
1175	QMap<int, int> lanchors; // the left anchors
1176	QMap<int, int> ranchors; // the right anchors
1177	int skipanchors; // the anchors to match if the box is skipped
1178
1179	#ifndef QT_NO_REGEXP_OPTIM
1180	int earlyStart; // the index where str can first occur
1181	int lateStart; // the index where str can last occur
1182	QString str; // a string that has to occur in any match
1183	QString leftStr; // a string occurring at the left of this box
1184	QString rightStr; // a string occurring at the right of this box
1185	int maxl; // the maximum length of this box (possibly InftyLen)
1186	#endif
1187
1188	int minl; // the minimum length of this box
1189	#ifndef QT_NO_REGEXP_OPTIM
1190	QVector<int> occ1; // first-occurrence array
1191	#endif
1192	};
1193
1194	friend class Box;
1195
1196	/*
1197	This is the lexical analyzer for regular expressions.
1198	*/
1199	enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen, Tok_PosLookahead,
1200	Tok_NegLookahead, Tok_RightParen, Tok_CharClass, Tok_Caret, Tok_Quantifier, Tok_Bar,
1201	Tok_Word, Tok_NonWord, Tok_Char = 0x10000, Tok_BackRef = 0x20000 };
1202	int getChar();
1203	int getEscape();
1204	#ifndef QT_NO_REGEXP_INTERVAL
1205	int getRep(int def);
1206	#endif
1207	#ifndef QT_NO_REGEXP_LOOKAHEAD
1208	void skipChars(int n);
1209	#endif
1210	void error(const char *msg);
1211	void startTokenizer(const QChar *rx, int len);
1212	int getToken();
1213
1214	const QChar *yyIn; // a pointer to the input regular expression pattern
1215	int yyPos0; // the position of yyTok in the input pattern
1216	int yyPos; // the position of the next character to read
1217	int yyLen; // the length of yyIn
1218	int yyCh; // the last character read
1219	QRegExpCharClass *yyCharClass; // attribute for Tok_CharClass tokens
1220	int yyMinRep; // attribute for Tok_Quantifier
1221	int yyMaxRep; // ditto
1222	QString yyError; // syntax error or overflow during parsing?
1223
1224	/*
1225	This is the syntactic analyzer for regular expressions.
1226	*/
1227	int parse(const QChar *rx, int len);
1228	void parseAtom(Box *box);
1229	void parseFactor(Box *box);
1230	void parseTerm(Box *box);
1231	void parseExpression(Box *box);
1232
1233	int yyTok; // the last token read
1234	bool yyMayCapture; // set this to false to disable capturing
1235
1236	friend struct QRegExpMatchState;
1237	};
1238
1239	#ifndef QT_NO_REGEXP_LOOKAHEAD
1240	/*
1241	The struct QRegExpLookahead represents a lookahead a la Perl (e.g.,
1242	(?=foo) and (?!bar)).
1243	*/
1244	struct QRegExpLookahead
1245	{
1246	QRegExpEngine *eng; // NFA representing the embedded regular expression
1247	bool neg; // negative lookahead?
1248
1249	inline QRegExpLookahead(QRegExpEngine *eng0, bool neg0)
1250	: eng(eng0), neg(neg0) { }
1251	inline ~QRegExpLookahead() { delete eng; }
1252	};
1253	#endif
1254
1255	QRegExpEngine::QRegExpEngine(const QRegExpEngineKey &key)
1256	: cs(key.cs), greedyQuantifiers(key.patternSyntax == QRegExp::RegExp2)
1257	{
1258	setup();
1259
1260	QString rx;
1261
1262	switch (key.patternSyntax) {
1263	case QRegExp::Wildcard:
1264	#ifndef QT_NO_REGEXP_WILDCARD
1265	rx = wc2rx(key.pattern);
1266	#endif
1267	break;
1268	case QRegExp::FixedString:
1269	rx = QRegExp::escape(key.pattern);
1270	break;
1271	default:
1272	rx = key.pattern;
1273	}
1274
1275	valid = (parse(rx.unicode(), rx.length()) == rx.length());
1276	if (!valid) {
1277	#ifndef QT_NO_REGEXP_OPTIM
1278	trivial = false;
1279	#endif
1280	error(RXERR_LEFTDELIM);
1281	}
1282	}
1283
1284	QRegExpEngine::~QRegExpEngine()
1285	{
1286	#ifndef QT_NO_REGEXP_LOOKAHEAD
1287	qDeleteAll(ahead);
1288	#endif
1289	}
1290
1291	void QRegExpMatchState::prepareForMatch(QRegExpEngine *eng)
1292	{
1293	/*
1294	We use one QVector<int> for all the big data used a lot in
1295	matchHere() and friends.
1296	*/
1297	int ns = eng->s.size(); // number of states
1298	int ncap = eng->ncap;
1299	#ifndef QT_NO_REGEXP_OPTIM
1300	slideTabSize = qMax(eng->minl + 1, 16);
1301	#else
1302	slideTabSize = 0;
1303	#endif
1304	int numCaptures = eng->numCaptures();
1305	capturedSize = 2 + 2 * numCaptures;
1306	bigArray = (int )realloc(bigArray, ((3 + 4 ncap) * ns + 4 * ncap + slideTabSize + capturedSize)*sizeof(int));
1307
1308	inNextStack = bigArray;
1309	memset(inNextStack, -1, ns * sizeof(int));
1310	curStack = inNextStack + ns;
1311	nextStack = inNextStack + 2 * ns;
1312
1313	curCapBegin = inNextStack + 3 * ns;
1314	nextCapBegin = curCapBegin + ncap * ns;
1315	curCapEnd = curCapBegin + 2 * ncap * ns;
1316	nextCapEnd = curCapBegin + 3 * ncap * ns;
1317
1318	tempCapBegin = curCapBegin + 4 * ncap * ns;
1319	tempCapEnd = tempCapBegin + ncap;
1320	capBegin = tempCapBegin + 2 * ncap;
1321	capEnd = tempCapBegin + 3 * ncap;
1322
1323	slideTab = tempCapBegin + 4 * ncap;
1324	captured = slideTab + slideTabSize;
1325	memset(captured, -1, capturedSize*sizeof(int));
1326	this->eng = eng;
1327	}
1328
1329	/*
1330	Tries to match in str and returns an array of (begin, length) pairs
1331	for captured text. If there is no match, all pairs are (-1, -1).
1332	*/
1333	void QRegExpMatchState::match(const QChar *str0, int len0, int pos0,
1334	bool minimal0, bool oneTest, int caretIndex)
1335	{
1336	bool matched = false;
1337	QChar char_null;
1338
1339	#ifndef QT_NO_REGEXP_OPTIM
1340	if (eng->trivial && !oneTest) {
1341	pos = qFindString(str0, len0, pos0, eng->goodStr.unicode(), eng->goodStr.length(), eng->cs);
1342	matchLen = eng->goodStr.length();
1343	matched = (pos != -1);
1344	} else
1345	#endif
1346	{
1347	in = str0;
1348	if (in == 0)
1349	in = &char_null;
1350	pos = pos0;
1351	caretPos = caretIndex;
1352	len = len0;
1353	minimal = minimal0;
1354	matchLen = 0;
1355	oneTestMatchedLen = 0;
1356
1357	if (eng->valid && pos >= 0 && pos <= len) {
1358	#ifndef QT_NO_REGEXP_OPTIM
1359	if (oneTest) {
1360	matched = matchHere();
1361	} else {
1362	if (pos <= len - eng->minl) {
1363	if (eng->caretAnchored) {
1364	matched = matchHere();
1365	} else if (eng->useGoodStringHeuristic) {
1366	matched = eng->goodStringMatch(*this);
1367	} else {
1368	matched = eng->badCharMatch(*this);
1369	}
1370	}
1371	}
1372	#else
1373	matched = oneTest ? matchHere() : eng->bruteMatch(*this);
1374	#endif
1375	}
1376	}
1377
1378	if (matched) {
1379	int *c = captured;
1380	*c++ = pos;
1381	*c++ = matchLen;
1382
1383	int numCaptures = (capturedSize - 2) >> 1;
1384	#ifndef QT_NO_REGEXP_CAPTURE
1385	for (int i = 0; i < numCaptures; ++i) {
1386	int j = eng->captureForOfficialCapture.at(i);
1387	int len = capEnd[j] - capBegin[j];
1388	*c++ = (len > 0) ? pos + capBegin[j] : 0;
1389	*c++ = len;
1390	}
1391	#endif
1392	} else {
1393	// we rely on 2's complement here
1394	memset(captured, -1, capturedSize * sizeof(int));
1395	}
1396	}
1397
1398	/*
1399	The three following functions add one state to the automaton and
1400	return the number of the state.
1401	*/
1402
1403	int QRegExpEngine::createState(QChar ch)
1404	{
1405	return setupState(ch.unicode());
1406	}
1407
1408	int QRegExpEngine::createState(const QRegExpCharClass &cc)
1409	{
1410	#ifndef QT_NO_REGEXP_CCLASS
1411	int n = cl.size();
1412	cl += QRegExpCharClass(cc);
1413	return setupState(CharClassBit \| n);
1414	#else
1415	Q_UNUSED(cc);
1416	return setupState(CharClassBit);
1417	#endif
1418	}
1419
1420	#ifndef QT_NO_REGEXP_BACKREF
1421	int QRegExpEngine::createState(int bref)
1422	{
1423	if (bref > nbrefs) {
1424	nbrefs = bref;
1425	if (nbrefs > MaxBackRefs) {
1426	error(RXERR_LIMIT);
1427	return 0;
1428	}
1429	}
1430	return setupState(BackRefBit \| bref);
1431	}
1432	#endif
1433
1434	/*
1435	The two following functions add a transition between all pairs of
1436	states (i, j) where i is found in from, and j is found in to.
1437
1438	Cat-transitions are distinguished from plus-transitions for
1439	capturing.
1440	*/
1441
1442	void QRegExpEngine::addCatTransitions(const QVector<int> &from, const QVector<int> &to)
1443	{
1444	for (int i = 0; i < from.size(); i++)
1445	mergeInto(&s[from.at(i)].outs, to);
1446	}
1447
1448	#ifndef QT_NO_REGEXP_CAPTURE
1449	void QRegExpEngine::addPlusTransitions(const QVector<int> &from, const QVector<int> &to, int atom)
1450	{
1451	for (int i = 0; i < from.size(); i++) {
1452	QRegExpAutomatonState &st = s[from.at(i)];
1453	const QVector<int> oldOuts = st.outs;
1454	mergeInto(&st.outs, to);
1455	if (f.at(atom).capture != QRegExpAtom::NoCapture) {
1456	for (int j = 0; j < to.size(); j++) {
1457	// ### st.reenter.contains(to.at(j)) check looks suspicious
1458	if (!st.reenter.contains(to.at(j)) &&
1459	qBinaryFind(oldOuts.constBegin(), oldOuts.constEnd(), to.at(j)) == oldOuts.end())
1460	st.reenter.insert(to.at(j), atom);
1461	}
1462	}
1463	}
1464	}
1465	#endif
1466
1467	#ifndef QT_NO_REGEXP_ANCHOR_ALT
1468	/*
1469	Returns an anchor that means a OR b.
1470	*/
1471	int QRegExpEngine::anchorAlternation(int a, int b)
1472	{
1473	if (((a & b) == a \|\| (a & b) == b) && ((a \| b) & Anchor_Alternation) == 0)
1474	return a & b;
1475
1476	int n = aa.size();
1477	#ifndef QT_NO_REGEXP_OPTIM
1478	if (n > 0 && aa.at(n - 1).a == a && aa.at(n - 1).b == b)
1479	return Anchor_Alternation \| (n - 1);
1480	#endif
1481
1482	aa.resize(n + 1);
1483	aa[n].a = a;
1484	aa[n].b = b;
1485	return Anchor_Alternation \| n;
1486	}
1487
1488	/*
1489	Returns an anchor that means a AND b.
1490	*/
1491	int QRegExpEngine::anchorConcatenation(int a, int b)
1492	{
1493	if (((a \| b) & Anchor_Alternation) == 0)
1494	return a \| b;
1495	if ((b & Anchor_Alternation) != 0)
1496	qSwap(a, b);
1497
1498	int aprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).a, b);
1499	int bprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).b, b);
1500	return anchorAlternation(aprime, bprime);
1501	}
1502	#endif
1503
1504	/*
1505	Adds anchor a on a transition caracterised by its from state and
1506	its to state.
1507	*/
1508	void QRegExpEngine::addAnchors(int from, int to, int a)
1509	{
1510	QRegExpAutomatonState &st = s[from];
1511	if (st.anchors.contains(to))
1512	a = anchorAlternation(st.anchors.value(to), a);
1513	st.anchors.insert(to, a);
1514	}
1515
1516	#ifndef QT_NO_REGEXP_OPTIM
1517	/*
1518	This function chooses between the good-string and the bad-character
1519	heuristics. It computes two scores and chooses the heuristic with
1520	the highest score.
1521
1522	Here are some common-sense constraints on the scores that should be
1523	respected if the formulas are ever modified: (1) If goodStr is
1524	empty, the good-string heuristic scores 0. (2) If the regular
1525	expression is trivial, the good-string heuristic should be used.
1526	(3) If the search is case insensitive, the good-string heuristic
1527	should be used, unless it scores 0. (Case insensitivity turns all
1528	entries of occ1 to 0.) (4) If (goodLateStart - goodEarlyStart) is
1529	big, the good-string heuristic should score less.
1530	*/
1531	void QRegExpEngine::heuristicallyChooseHeuristic()
1532	{
1533	if (minl == 0) {
1534	useGoodStringHeuristic = false;
1535	} else if (trivial) {
1536	useGoodStringHeuristic = true;
1537	} else {
1538	/*
1539	Magic formula: The good string has to constitute a good
1540	proportion of the minimum-length string, and appear at a
1541	more-or-less known index.
1542	*/
1543	int goodStringScore = (64 * goodStr.length() / minl) -
1544	(goodLateStart - goodEarlyStart);
1545	/*
1546	Less magic formula: We pick some characters at random, and
1547	check whether they are good or bad.
1548	*/
1549	int badCharScore = 0;
1550	int step = qMax(1, NumBadChars / 32);
1551	for (int i = 1; i < NumBadChars; i += step) {
1552	if (occ1.at(i) == NoOccurrence)
1553	badCharScore += minl;
1554	else
1555	badCharScore += occ1.at(i);
1556	}
1557	badCharScore /= minl;
1558	useGoodStringHeuristic = (goodStringScore > badCharScore);
1559	}
1560	}
1561	#endif
1562
1563	#if defined(QT_DEBUG)
1564	void QRegExpEngine::dump() const
1565	{
1566	int i, j;
1567	qDebug("Case %ssensitive engine", cs ? "" : "in");
1568	qDebug(" States");
1569	for (i = 0; i < s.size(); i++) {
1570	qDebug(" %d%s", i, i == InitialState ? " (initial)" : i == FinalState ? " (final)" : "");
1571	#ifndef QT_NO_REGEXP_CAPTURE
1572	if (nf > 0)
1573	qDebug(" in atom %d", s[i].atom);
1574	#endif
1575	int m = s[i].match;
1576	if ((m & CharClassBit) != 0) {
1577	qDebug(" match character class %d", m ^ CharClassBit);
1578	#ifndef QT_NO_REGEXP_CCLASS
1579	cl[m ^ CharClassBit].dump();
1580	#else
1581	qDebug(" negative character class");
1582	#endif
1583	} else if ((m & BackRefBit) != 0) {
1584	qDebug(" match back-reference %d", m ^ BackRefBit);
1585	} else if (m >= 0x20 && m <= 0x7e) {
1586	qDebug(" match 0x%.4x (%c)", m, m);
1587	} else {
1588	qDebug(" match 0x%.4x", m);
1589	}
1590	for (j = 0; j < s[i].outs.size(); j++) {
1591	int next = s[i].outs[j];
1592	qDebug(" -> %d", next);
1593	if (s[i].reenter.contains(next))
1594	qDebug(" [reenter %d]", s[i].reenter[next]);
1595	if (s[i].anchors.value(next) != 0)
1596	qDebug(" [anchors 0x%.8x]", s[i].anchors[next]);
1597	}
1598	}
1599	#ifndef QT_NO_REGEXP_CAPTURE
1600	if (nf > 0) {
1601	qDebug(" Atom Parent Capture");
1602	for (i = 0; i < nf; i++) {
1603	if (f[i].capture == QRegExpAtom::NoCapture) {
1604	qDebug(" %6d %6d nil", i, f[i].parent);
1605	} else {
1606	int cap = f[i].capture;
1607	bool official = captureForOfficialCapture.contains(cap);
1608	qDebug(" %6d %6d %6d %s", i, f[i].parent, f[i].capture,
1609	official ? "official" : "");
1610	}
1611	}
1612	}
1613	#endif
1614	#ifndef QT_NO_REGEXP_ANCHOR_ALT
1615	for (i = 0; i < aa.size(); i++)
1616	qDebug(" Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, aa[i].b);
1617	#endif
1618	}
1619	#endif
1620
1621	void QRegExpEngine::setup()
1622	{
1623	ref = 1;
1624	#ifndef QT_NO_REGEXP_CAPTURE
1625	f.resize(32);
1626	nf = 0;
1627	cf = -1;
1628	#endif
1629	officialncap = 0;
1630	ncap = 0;
1631	#ifndef QT_NO_REGEXP_OPTIM
1632	caretAnchored = true;
1633	trivial = true;
1634	#endif
1635	valid = false;
1636	#ifndef QT_NO_REGEXP_BACKREF
1637	nbrefs = 0;
1638	#endif
1639	#ifndef QT_NO_REGEXP_OPTIM
1640	useGoodStringHeuristic = true;
1641	minl = 0;
1642	occ1.fill(0, NumBadChars);
1643	#endif
1644	}
1645
1646	int QRegExpEngine::setupState(int match)
1647	{
1648	#ifndef QT_NO_REGEXP_CAPTURE
1649	s += QRegExpAutomatonState(cf, match);
1650	#else
1651	s += QRegExpAutomatonState(match);
1652	#endif
1653	return s.size() - 1;
1654	}
1655
1656	#ifndef QT_NO_REGEXP_CAPTURE
1657	/*
1658	Functions startAtom() and finishAtom() should be called to delimit
1659	atoms. When a state is created, it is assigned to the current atom.
1660	The information is later used for capturing.
1661	*/
1662	int QRegExpEngine::startAtom(bool officialCapture)
1663	{
1664	if ((nf & (nf + 1)) == 0 && nf + 1 >= f.size())
1665	f.resize((nf + 1) << 1);
1666	f[nf].parent = cf;
1667	cf = nf++;
1668	f[cf].capture = officialCapture ? QRegExpAtom::OfficialCapture : QRegExpAtom::NoCapture;
1669	return cf;
1670	}
1671
1672	void QRegExpEngine::finishAtom(int atom, bool needCapture)
1673	{
1674	if (greedyQuantifiers && needCapture && f[atom].capture == QRegExpAtom::NoCapture)
1675	f[atom].capture = QRegExpAtom::UnofficialCapture;
1676	cf = f.at(atom).parent;
1677	}
1678	#endif
1679
1680	#ifndef QT_NO_REGEXP_LOOKAHEAD
1681	/*
1682	Creates a lookahead anchor.
1683	*/
1684	int QRegExpEngine::addLookahead(QRegExpEngine *eng, bool negative)
1685	{
1686	int n = ahead.size();
1687	if (n == MaxLookaheads) {
1688	error(RXERR_LIMIT);
1689	return 0;
1690	}
1691	ahead += new QRegExpLookahead(eng, negative);
1692	return Anchor_FirstLookahead << n;
1693	}
1694	#endif
1695
1696	#ifndef QT_NO_REGEXP_CAPTURE
1697	/*
1698	We want the longest leftmost captures.
1699	*/
1700	static bool isBetterCapture(int ncap, const int begin1, const int end1, const int *begin2,
1701	const int *end2)
1702	{
1703	for (int i = 0; i < ncap; i++) {
1704	int delta = begin2[i] - begin1[i]; // it has to start early...
1705	if (delta == 0)
1706	delta = end1[i] - end2[i]; // ...and end late
1707
1708	if (delta != 0)
1709	return delta > 0;
1710	}
1711	return false;
1712	}
1713	#endif
1714
1715	/*
1716	Returns true if anchor a matches at position pos + i in the input
1717	string, otherwise false.
1718	*/
1719	bool QRegExpMatchState::testAnchor(int i, int a, const int *capBegin)
1720	{
1721	int j;
1722
1723	#ifndef QT_NO_REGEXP_ANCHOR_ALT
1724	if ((a & QRegExpEngine::Anchor_Alternation) != 0)
1725	return testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).a, capBegin)
1726	\|\| testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).b, capBegin);
1727	#endif
1728
1729	if ((a & QRegExpEngine::Anchor_Caret) != 0) {
1730	if (pos + i != caretPos)
1731	return false;
1732	}
1733	if ((a & QRegExpEngine::Anchor_Dollar) != 0) {
1734	if (pos + i != len)
1735	return false;
1736	}
1737	#ifndef QT_NO_REGEXP_ESCAPE
1738	if ((a & (QRegExpEngine::Anchor_Word \| QRegExpEngine::Anchor_NonWord)) != 0) {
1739	bool before = false;
1740	bool after = false;
1741	if (pos + i != 0)
1742	before = isWord(in[pos + i - 1]);
1743	if (pos + i != len)
1744	after = isWord(in[pos + i]);
1745	if ((a & QRegExpEngine::Anchor_Word) != 0 && (before == after))
1746	return false;
1747	if ((a & QRegExpEngine::Anchor_NonWord) != 0 && (before != after))
1748	return false;
1749	}
1750	#endif
1751	#ifndef QT_NO_REGEXP_LOOKAHEAD
1752	if ((a & QRegExpEngine::Anchor_LookaheadMask) != 0) {
1753	const QVector<QRegExpLookahead *> &ahead = eng->ahead;
1754	for (j = 0; j < ahead.size(); j++) {
1755	if ((a & (QRegExpEngine::Anchor_FirstLookahead << j)) != 0) {
1756	QRegExpMatchState matchState;
1757	matchState.prepareForMatch(ahead[j]->eng);
1758	matchState.match(in + pos + i, len - pos - i, 0,
1759	true, true, matchState.caretPos - matchState.pos - i);
1760	if ((matchState.captured[0] == 0) == ahead[j]->neg)
1761	return false;
1762	}
1763	}
1764	}
1765	#endif
1766	#ifndef QT_NO_REGEXP_CAPTURE
1767	#ifndef QT_NO_REGEXP_BACKREF
1768	for (j = 0; j < eng->nbrefs; j++) {
1769	if ((a & (QRegExpEngine::Anchor_BackRef1Empty << j)) != 0) {
1770	int i = eng->captureForOfficialCapture.at(j);
1771	if (capBegin[i] != EmptyCapture)
1772	return false;
1773	}
1774	}
1775	#endif
1776	#endif
1777	return true;
1778	}
1779
1780	#ifndef QT_NO_REGEXP_OPTIM
1781	/*
1782	The three following functions are what Jeffrey Friedl would call
1783	transmissions (or bump-alongs). Using one or the other should make
1784	no difference except in performance.
1785	*/
1786
1787	bool QRegExpEngine::goodStringMatch(QRegExpMatchState &matchState) const
1788	{
1789	int k = matchState.pos + goodEarlyStart;
1790	QStringMatcher matcher(goodStr.unicode(), goodStr.length(), cs);
1791	while ((k = matcher.indexIn(matchState.in, matchState.len, k)) != -1) {
1792	int from = k - goodLateStart;
1793	int to = k - goodEarlyStart;
1794	if (from > matchState.pos)
1795	matchState.pos = from;
1796
1797	while (matchState.pos <= to) {
1798	if (matchState.matchHere())
1799	return true;
1800	++matchState.pos;
1801	}
1802	++k;
1803	}
1804	return false;
1805	}
1806
1807	bool QRegExpEngine::badCharMatch(QRegExpMatchState &matchState) const
1808	{
1809	int slideHead = 0;
1810	int slideNext = 0;
1811	int i;
1812	int lastPos = matchState.len - minl;
1813	memset(matchState.slideTab, 0, matchState.slideTabSize * sizeof(int));
1814
1815	/*
1816	Set up the slide table, used for the bad-character heuristic,
1817	using the table of first occurrence of each character.
1818	*/
1819	for (i = 0; i < minl; i++) {
1820	int sk = occ1[BadChar(matchState.in[matchState.pos + i])];
1821	if (sk == NoOccurrence)
1822	sk = i + 1;
1823	if (sk > 0) {
1824	int k = i + 1 - sk;
1825	if (k < 0) {
1826	sk = i + 1;
1827	k = 0;
1828	}
1829	if (sk > matchState.slideTab[k])
1830	matchState.slideTab[k] = sk;
1831	}
1832	}
1833
1834	if (matchState.pos > lastPos)
1835	return false;
1836
1837	for (;;) {
1838	if (++slideNext >= matchState.slideTabSize)
1839	slideNext = 0;
1840	if (matchState.slideTab[slideHead] > 0) {
1841	if (matchState.slideTab[slideHead] - 1 > matchState.slideTab[slideNext])
1842	matchState.slideTab[slideNext] = matchState.slideTab[slideHead] - 1;
1843	matchState.slideTab[slideHead] = 0;
1844	} else {
1845	if (matchState.matchHere())
1846	return true;
1847	}
1848
1849	if (matchState.pos == lastPos)
1850	break;
1851
1852	/*
1853	Update the slide table. This code has much in common with
1854	the initialization code.
1855	*/
1856	int sk = occ1[BadChar(matchState.in[matchState.pos + minl])];
1857	if (sk == NoOccurrence) {
1858	matchState.slideTab[slideNext] = minl;
1859	} else if (sk > 0) {
1860	int k = slideNext + minl - sk;
1861	if (k >= matchState.slideTabSize)
1862	k -= matchState.slideTabSize;
1863	if (sk > matchState.slideTab[k])
1864	matchState.slideTab[k] = sk;
1865	}
1866	slideHead = slideNext;
1867	++matchState.pos;
1868	}
1869	return false;
1870	}
1871	#else
1872	bool QRegExpEngine::bruteMatch(QRegExpMatchState &matchState) const
1873	{
1874	while (matchState.pos <= matchState.len) {
1875	if (matchState.matchHere())
1876	return true;
1877	++matchState.pos;
1878	}
1879	return false;
1880	}
1881	#endif
1882
1883	/*
1884	Here's the core of the engine. It tries to do a match here and now.
1885	*/
1886	bool QRegExpMatchState::matchHere()
1887	{
1888	int ncur = 1, nnext = 0;
1889	int i = 0, j, k, m;
1890	bool stop = false;
1891
1892	matchLen = -1;
1893	oneTestMatchedLen = -1;
1894	curStack[0] = QRegExpEngine::InitialState;
1895
1896	int ncap = eng->ncap;
1897	#ifndef QT_NO_REGEXP_CAPTURE
1898	if (ncap > 0) {
1899	for (j = 0; j < ncap; j++) {
1900	curCapBegin[j] = EmptyCapture;
1901	curCapEnd[j] = EmptyCapture;
1902	}
1903	}
1904	#endif
1905
1906	#ifndef QT_NO_REGEXP_BACKREF
1907	while ((ncur > 0 \|\| !sleeping.isEmpty()) && i <= len - pos && !stop)
1908	#else
1909	while (ncur > 0 && i <= len - pos && !stop)
1910	#endif
1911	{
1912	int ch = (i < len - pos) ? in[pos + i].unicode() : 0;
1913	for (j = 0; j < ncur; j++) {
1914	int cur = curStack[j];
1915	const QRegExpAutomatonState &scur = eng->s.at(cur);
1916	const QVector<int> &outs = scur.outs;
1917	for (k = 0; k < outs.size(); k++) {
1918	int next = outs.at(k);
1919	const QRegExpAutomatonState &snext = eng->s.at(next);
1920	bool inside = true;
1921	#if !defined(QT_NO_REGEXP_BACKREF) && !defined(QT_NO_REGEXP_CAPTURE)
1922	int needSomeSleep = 0;
1923	#endif
1924
1925	/*
1926	First, check if the anchors are anchored properly.
1927	*/
1928	int a = scur.anchors.value(next);
1929	if (a != 0 && !testAnchor(i, a, curCapBegin + j * ncap))
1930	inside = false;
1931
1932	/*
1933	If indeed they are, check if the input character is
1934	correct for this transition.
1935	*/
1936	if (inside) {
1937	m = snext.match;
1938	if ((m & (QRegExpEngine::CharClassBit \| QRegExpEngine::BackRefBit)) == 0) {
1939	if (eng->cs)
1940	inside = (m == ch);
1941	else
1942	inside = (QChar(m).toLower() == QChar(ch).toLower());
1943	} else if (next == QRegExpEngine::FinalState) {
1944	matchLen = i;
1945	stop = minimal;
1946	inside = true;
1947	} else if ((m & QRegExpEngine::CharClassBit) != 0) {
1948	#ifndef QT_NO_REGEXP_CCLASS
1949	const QRegExpCharClass &cc = eng->cl.at(m ^ QRegExpEngine::CharClassBit);
1950	if (eng->cs)
1951	inside = cc.in(ch);
1952	else if (cc.negative())
1953	inside = cc.in(QChar(ch).toLower()) &&
1954	cc.in(QChar(ch).toUpper());
1955	else
1956	inside = cc.in(QChar(ch).toLower()) \|\|
1957	cc.in(QChar(ch).toUpper());
1958	#endif
1959	#if !defined(QT_NO_REGEXP_BACKREF) && !defined(QT_NO_REGEXP_CAPTURE)
1960	} else { /* ((m & QRegExpEngine::BackRefBit) != 0) */
1961	int bref = m ^ QRegExpEngine::BackRefBit;
1962	int ell = j * ncap + eng->captureForOfficialCapture.at(bref - 1);
1963
1964	inside = bref <= ncap && curCapBegin[ell] != EmptyCapture;
1965	if (inside) {
1966	if (eng->cs)
1967	inside = (in[pos + curCapBegin[ell]] == QChar(ch));
1968	else
1969	inside = (in[pos + curCapBegin[ell]].toLower()
1970	== QChar(ch).toLower());
1971	}
1972
1973	if (inside) {
1974	int delta;
1975	if (curCapEnd[ell] == EmptyCapture)
1976	delta = i - curCapBegin[ell];
1977	else
1978	delta = curCapEnd[ell] - curCapBegin[ell];
1979
1980	inside = (delta <= len - (pos + i));
1981	if (inside && delta > 1) {
1982	int n = 1;
1983	if (eng->cs) {
1984	while (n < delta) {
1985	if (in[pos + curCapBegin[ell] + n]
1986	!= in[pos + i + n])
1987	break;
1988	++n;
1989	}
1990	} else {
1991	while (n < delta) {
1992	QChar a = in[pos + curCapBegin[ell] + n];
1993	QChar b = in[pos + i + n];
1994	if (a.toLower() != b.toLower())
1995	break;
1996	++n;
1997	}
1998	}
1999	inside = (n == delta);
2000	if (inside)
2001	needSomeSleep = delta - 1;
2002	}
2003	}
2004	#endif
2005	}
2006	}
2007
2008	/*
2009	We must now update our data structures.
2010	*/
2011	if (inside) {
2012	#ifndef QT_NO_REGEXP_CAPTURE
2013	int capBegin, capEnd;
2014	#endif
2015	/*
2016	If the next state was not encountered yet, all
2017	is fine.
2018	*/
2019	if ((m = inNextStack[next]) == -1) {
2020	m = nnext++;
2021	nextStack[m] = next;
2022	inNextStack[next] = m;
2023	#ifndef QT_NO_REGEXP_CAPTURE
2024	capBegin = nextCapBegin + m * ncap;
2025	capEnd = nextCapEnd + m * ncap;
2026
2027	/*
2028	Otherwise, we'll first maintain captures in
2029	temporary arrays, and decide at the end whether
2030	it's best to keep the previous capture zones or
2031	the new ones.
2032	*/
2033	} else {
2034	capBegin = tempCapBegin;
2035	capEnd = tempCapEnd;
2036	#endif
2037	}
2038
2039	#ifndef QT_NO_REGEXP_CAPTURE
2040	/*
2041	Updating the capture zones is much of a task.
2042	*/
2043	if (ncap > 0) {
2044	memcpy(capBegin, curCapBegin + j * ncap, ncap * sizeof(int));
2045	memcpy(capEnd, curCapEnd + j * ncap, ncap * sizeof(int));
2046	int c = scur.atom, n = snext.atom;
2047	int p = -1, q = -1;
2048	int cap;
2049
2050	/*
2051	Lemma 1. For any x in the range [0..nf), we
2052	have f[x].parent < x.
2053
2054	Proof. By looking at startAtom(), it is
2055	clear that cf < nf holds all the time, and
2056	thus that f[nf].parent < nf.
2057	*/
2058
2059	/*
2060	If we are reentering an atom, we empty all
2061	capture zones inside it.
2062	*/
2063	if ((q = scur.reenter.value(next)) != 0) {
2064	QBitArray b(eng->nf, false);
2065	b.setBit(q, true);
2066	for (int ell = q + 1; ell < eng->nf; ell++) {
2067	if (b.testBit(eng->f.at(ell).parent)) {
2068	b.setBit(ell, true);
2069	cap = eng->f.at(ell).capture;
2070	if (cap >= 0) {
2071	capBegin[cap] = EmptyCapture;
2072	capEnd[cap] = EmptyCapture;
2073	}
2074	}
2075	}
2076	p = eng->f.at(q).parent;
2077
2078	/*
2079	Otherwise, close the capture zones we are
2080	leaving. We are leaving f[c].capture,
2081	f[f[c].parent].capture,
2082	f[f[f[c].parent].parent].capture, ...,
2083	until f[x].capture, with x such that
2084	f[x].parent is the youngest common ancestor
2085	for c and n.
2086
2087	We go up along c's and n's ancestry until
2088	we find x.
2089	*/
2090	} else {
2091	p = c;
2092	q = n;
2093	while (p != q) {
2094	if (p > q) {
2095	cap = eng->f.at(p).capture;
2096	if (cap >= 0) {
2097	if (capBegin[cap] == i) {
2098	capBegin[cap] = EmptyCapture;
2099	capEnd[cap] = EmptyCapture;
2100	} else {
2101	capEnd[cap] = i;
2102	}
2103	}
2104	p = eng->f.at(p).parent;
2105	} else {
2106	q = eng->f.at(q).parent;
2107	}
2108	}
2109	}
2110
2111	/*
2112	In any case, we now open the capture zones
2113	we are entering. We work upwards from n
2114	until we reach p (the parent of the atom we
2115	reenter or the youngest common ancestor).
2116	*/
2117	while (n > p) {
2118	cap = eng->f.at(n).capture;
2119	if (cap >= 0) {
2120	capBegin[cap] = i;
2121	capEnd[cap] = EmptyCapture;
2122	}
2123	n = eng->f.at(n).parent;
2124	}
2125	/*
2126	If the next state was already in
2127	nextStack, we must choose carefully which
2128	capture zones we want to keep.
2129	*/
2130	if (capBegin == tempCapBegin &&
2131	isBetterCapture(ncap, capBegin, capEnd, nextCapBegin + m * ncap,
2132	nextCapEnd + m * ncap)) {
2133	memcpy(nextCapBegin + m * ncap, capBegin, ncap * sizeof(int));
2134	memcpy(nextCapEnd + m * ncap, capEnd, ncap * sizeof(int));
2135	}
2136	}
2137	#ifndef QT_NO_REGEXP_BACKREF
2138	/*
2139	We are done with updating the capture zones.
2140	It's now time to put the next state to sleep,
2141	if it needs to, and to remove it from
2142	nextStack.
2143	*/
2144	if (needSomeSleep > 0) {
2145	QVector<int> zzZ(2 + 2 * ncap);
2146	zzZ[0] = i + needSomeSleep;
2147	zzZ[1] = next;
2148	if (ncap > 0) {
2149	memcpy(zzZ.data() + 2, capBegin, ncap * sizeof(int));
2150	memcpy(zzZ.data() + 2 + ncap, capEnd, ncap * sizeof(int));
2151	}
2152	inNextStack[nextStack[--nnext]] = -1;
2153	sleeping.append(zzZ);
2154	}
2155	#endif
2156	#endif
2157	}
2158	}
2159	}
2160	#ifndef QT_NO_REGEXP_CAPTURE
2161	/*
2162	If we reached the final state, hurray! Copy the captured
2163	zone.
2164	*/
2165	if (ncap > 0 && (m = inNextStack[QRegExpEngine::FinalState]) != -1) {
2166	memcpy(capBegin, nextCapBegin + m * ncap, ncap * sizeof(int));
2167	memcpy(capEnd, nextCapEnd + m * ncap, ncap * sizeof(int));
2168	}
2169	#ifndef QT_NO_REGEXP_BACKREF
2170	/*
2171	It's time to wake up the sleepers.
2172	*/
2173	j = 0;
2174	while (j < sleeping.count()) {
2175	if (sleeping.at(j)[0] == i) {
2176	const QVector<int> &zzZ = sleeping.at(j);
2177	int next = zzZ[1];
2178	const int *capBegin = zzZ.data() + 2;
2179	const int *capEnd = zzZ.data() + 2 + ncap;
2180	bool copyOver = true;
2181
2182	if ((m = inNextStack[next]) == -1) {
2183	m = nnext++;
2184	nextStack[m] = next;
2185	inNextStack[next] = m;
2186	} else {
2187	copyOver = isBetterCapture(ncap, nextCapBegin + m * ncap, nextCapEnd + m * ncap,
2188	capBegin, capEnd);
2189	}
2190	if (copyOver) {
2191	memcpy(nextCapBegin + m * ncap, capBegin, ncap * sizeof(int));
2192	memcpy(nextCapEnd + m * ncap, capEnd, ncap * sizeof(int));
2193	}
2194
2195	sleeping.removeAt(j);
2196	} else {
2197	++j;
2198	}
2199	}
2200	#endif
2201	#endif
2202	for (j = 0; j < nnext; j++)
2203	inNextStack[nextStack[j]] = -1;
2204
2205	// avoid needless iteration that confuses oneTestMatchedLen
2206	if (nnext == 1 && nextStack[0] == QRegExpEngine::FinalState
2207	#ifndef QT_NO_REGEXP_BACKREF
2208	&& sleeping.isEmpty()
2209	#endif
2210	)
2211	stop = true;
2212
2213	qSwap(curStack, nextStack);
2214	#ifndef QT_NO_REGEXP_CAPTURE
2215	qSwap(curCapBegin, nextCapBegin);
2216	qSwap(curCapEnd, nextCapEnd);
2217	#endif
2218	ncur = nnext;
2219	nnext = 0;
2220	++i;
2221	}
2222
2223	#ifndef QT_NO_REGEXP_BACKREF
2224	/*
2225	If minimal matching is enabled, we might have some sleepers
2226	left.
2227	*/
2228	if (!sleeping.isEmpty())
2229	sleeping.clear();
2230	#endif
2231
2232	oneTestMatchedLen = i - 1;
2233	return (matchLen >= 0);
2234	}
2235
2236	#ifndef QT_NO_REGEXP_CCLASS
2237
2238	QRegExpCharClass::QRegExpCharClass()
2239	: c(0), n(false)
2240	{
2241	#ifndef QT_NO_REGEXP_OPTIM
2242	occ1.fill(NoOccurrence, NumBadChars);
2243	#endif
2244	}
2245
2246	QRegExpCharClass &QRegExpCharClass::operator=(const QRegExpCharClass &cc)
2247	{
2248	c = cc.c;
2249	r = cc.r;
2250	n = cc.n;
2251	#ifndef QT_NO_REGEXP_OPTIM
2252	occ1 = cc.occ1;
2253	#endif
2254	return *this;
2255	}
2256
2257	void QRegExpCharClass::clear()
2258	{
2259	c = 0;
2260	r.resize(0);
2261	n = false;
2262	}
2263
2264	void QRegExpCharClass::setNegative(bool negative)
2265	{
2266	n = negative;
2267	#ifndef QT_NO_REGEXP_OPTIM
2268	occ1.fill(0, NumBadChars);
2269	#endif
2270	}
2271
2272	void QRegExpCharClass::addCategories(int cats)
2273	{
2274	c \|= cats;
2275	#ifndef QT_NO_REGEXP_OPTIM
2276	occ1.fill(0, NumBadChars);
2277	#endif
2278	}
2279
2280	void QRegExpCharClass::addRange(ushort from, ushort to)
2281	{
2282	if (from > to)
2283	qSwap(from, to);
2284	int m = r.size();
2285	r.resize(m + 1);
2286	r[m].from = from;
2287	r[m].len = to - from + 1;
2288
2289	#ifndef QT_NO_REGEXP_OPTIM
2290	int i;
2291
2292	if (to - from < NumBadChars) {
2293	if (from % NumBadChars <= to % NumBadChars) {
2294	for (i = from % NumBadChars; i <= to % NumBadChars; i++)
2295	occ1[i] = 0;
2296	} else {
2297	for (i = 0; i <= to % NumBadChars; i++)
2298	occ1[i] = 0;
2299	for (i = from % NumBadChars; i < NumBadChars; i++)
2300	occ1[i] = 0;
2301	}
2302	} else {
2303	occ1.fill(0, NumBadChars);
2304	}
2305	#endif
2306	}
2307
2308	bool QRegExpCharClass::in(QChar ch) const
2309	{
2310	#ifndef QT_NO_REGEXP_OPTIM
2311	if (occ1.at(BadChar(ch)) == NoOccurrence)
2312	return n;
2313	#endif
2314
2315	if (c != 0 && (c & (1 << (int)ch.category())) != 0)
2316	return !n;
2317
2318	const int uc = ch.unicode();
2319	int size = r.size();
2320
2321	for (int i = 0; i < size; ++i) {
2322	const QRegExpCharClassRange &range = r.at(i);
2323	if (uint(uc - range.from) < uint(r.at(i).len))
2324	return !n;
2325	}
2326	return n;
2327	}
2328
2329	#if defined(QT_DEBUG)
2330	void QRegExpCharClass::dump() const
2331	{
2332	int i;
2333	qDebug(" %stive character class", n ? "nega" : "posi");
2334	#ifndef QT_NO_REGEXP_CCLASS
2335	if (c != 0)
2336	qDebug(" categories 0x%.8x", c);
2337	#endif
2338	for (i = 0; i < r.size(); i++)
2339	qDebug(" 0x%.4x through 0x%.4x", r[i].from, r[i].from + r[i].len - 1);
2340	}
2341	#endif
2342	#endif
2343
2344	QRegExpEngine::Box::Box(QRegExpEngine *engine)
2345	: eng(engine), skipanchors(0)
2346	#ifndef QT_NO_REGEXP_OPTIM
2347	, earlyStart(0), lateStart(0), maxl(0)
2348	#endif
2349	{
2350	#ifndef QT_NO_REGEXP_OPTIM
2351	occ1.fill(NoOccurrence, NumBadChars);
2352	#endif
2353	minl = 0;
2354	}
2355
2356	QRegExpEngine::Box &QRegExpEngine::Box::operator=(const Box &b)
2357	{
2358	eng = b.eng;
2359	ls = b.ls;
2360	rs = b.rs;
2361	lanchors = b.lanchors;
2362	ranchors = b.ranchors;
2363	skipanchors = b.skipanchors;
2364	#ifndef QT_NO_REGEXP_OPTIM
2365	earlyStart = b.earlyStart;
2366	lateStart = b.lateStart;
2367	str = b.str;
2368	leftStr = b.leftStr;
2369	rightStr = b.rightStr;
2370	maxl = b.maxl;
2371	occ1 = b.occ1;
2372	#endif
2373	minl = b.minl;
2374	return *this;
2375	}
2376
2377	void QRegExpEngine::Box::set(QChar ch)
2378	{
2379	ls.resize(1);
2380	ls[0] = eng->createState(ch);
2381	rs = ls;
2382	#ifndef QT_NO_REGEXP_OPTIM
2383	str = ch;
2384	leftStr = ch;
2385	rightStr = ch;
2386	maxl = 1;
2387	occ1[BadChar(ch)] = 0;
2388	#endif
2389	minl = 1;
2390	}
2391
2392	void QRegExpEngine::Box::set(const QRegExpCharClass &cc)
2393	{
2394	ls.resize(1);
2395	ls[0] = eng->createState(cc);
2396	rs = ls;
2397	#ifndef QT_NO_REGEXP_OPTIM
2398	maxl = 1;
2399	occ1 = cc.firstOccurrence();
2400	#endif
2401	minl = 1;
2402	}
2403
2404	#ifndef QT_NO_REGEXP_BACKREF
2405	void QRegExpEngine::Box::set(int bref)
2406	{
2407	ls.resize(1);
2408	ls[0] = eng->createState(bref);
2409	rs = ls;
2410	if (bref >= 1 && bref <= MaxBackRefs)
2411	skipanchors = Anchor_BackRef0Empty << bref;
2412	#ifndef QT_NO_REGEXP_OPTIM
2413	maxl = InftyLen;
2414	#endif
2415	minl = 0;
2416	}
2417	#endif
2418
2419	void QRegExpEngine::Box::cat(const Box &b)
2420	{
2421	eng->addCatTransitions(rs, b.ls);
2422	addAnchorsToEngine(b);
2423	if (minl == 0) {
2424	lanchors.unite(b.lanchors);
2425	if (skipanchors != 0) {
2426	for (int i = 0; i < b.ls.size(); i++) {
2427	int a = eng->anchorConcatenation(lanchors.value(b.ls.at(i), 0), skipanchors);
2428	lanchors.insert(b.ls.at(i), a);
2429	}
2430	}
2431	mergeInto(&ls, b.ls);
2432	}
2433	if (b.minl == 0) {
2434	ranchors.unite(b.ranchors);
2435	if (b.skipanchors != 0) {
2436	for (int i = 0; i < rs.size(); i++) {
2437	int a = eng->anchorConcatenation(ranchors.value(rs.at(i), 0), b.skipanchors);
2438	ranchors.insert(rs.at(i), a);
2439	}
2440	}
2441	mergeInto(&rs, b.rs);
2442	} else {
2443	ranchors = b.ranchors;
2444	rs = b.rs;
2445	}
2446
2447	#ifndef QT_NO_REGEXP_OPTIM
2448	if (maxl != InftyLen) {
2449	if (rightStr.length() + b.leftStr.length() >
2450	qMax(str.length(), b.str.length())) {
2451	earlyStart = minl - rightStr.length();
2452	lateStart = maxl - rightStr.length();
2453	str = rightStr + b.leftStr;
2454	} else if (b.str.length() > str.length()) {
2455	earlyStart = minl + b.earlyStart;
2456	lateStart = maxl + b.lateStart;
2457	str = b.str;
2458	}
2459	}
2460
2461	if (leftStr.length() == maxl)
2462	leftStr += b.leftStr;
2463
2464	if (b.rightStr.length() == b.maxl) {
2465	rightStr += b.rightStr;
2466	} else {
2467	rightStr = b.rightStr;
2468	}
2469
2470	if (maxl == InftyLen \|\| b.maxl == InftyLen) {
2471	maxl = InftyLen;
2472	} else {
2473	maxl += b.maxl;
2474	}
2475
2476	for (int i = 0; i < NumBadChars; i++) {
2477	if (b.occ1.at(i) != NoOccurrence && minl + b.occ1.at(i) < occ1.at(i))
2478	occ1[i] = minl + b.occ1.at(i);
2479	}
2480	#endif
2481
2482	minl += b.minl;
2483	if (minl == 0)
2484	skipanchors = eng->anchorConcatenation(skipanchors, b.skipanchors);
2485	else
2486	skipanchors = 0;
2487	}
2488
2489	void QRegExpEngine::Box::orx(const Box &b)
2490	{
2491	mergeInto(&ls, b.ls);
2492	lanchors.unite(b.lanchors);
2493	mergeInto(&rs, b.rs);
2494	ranchors.unite(b.ranchors);
2495
2496	if (b.minl == 0) {
2497	if (minl == 0)
2498	skipanchors = eng->anchorAlternation(skipanchors, b.skipanchors);
2499	else
2500	skipanchors = b.skipanchors;
2501	}
2502
2503	#ifndef QT_NO_REGEXP_OPTIM
2504	for (int i = 0; i < NumBadChars; i++) {
2505	if (occ1.at(i) > b.occ1.at(i))
2506	occ1[i] = b.occ1.at(i);
2507	}
2508	earlyStart = 0;
2509	lateStart = 0;
2510	str = QString();
2511	leftStr = QString();
2512	rightStr = QString();
2513	if (b.maxl > maxl)
2514	maxl = b.maxl;
2515	#endif
2516	if (b.minl < minl)
2517	minl = b.minl;
2518	}
2519
2520	void QRegExpEngine::Box::plus(int atom)
2521	{
2522	#ifndef QT_NO_REGEXP_CAPTURE
2523	eng->addPlusTransitions(rs, ls, atom);
2524	#else
2525	Q_UNUSED(atom);
2526	eng->addCatTransitions(rs, ls);
2527	#endif
2528	addAnchorsToEngine(*this);
2529	#ifndef QT_NO_REGEXP_OPTIM
2530	maxl = InftyLen;
2531	#endif
2532	}
2533
2534	void QRegExpEngine::Box::opt()
2535	{
2536	#ifndef QT_NO_REGEXP_OPTIM
2537	earlyStart = 0;
2538	lateStart = 0;
2539	str = QString();
2540	leftStr = QString();
2541	rightStr = QString();
2542	#endif
2543	skipanchors = 0;
2544	minl = 0;
2545	}
2546
2547	void QRegExpEngine::Box::catAnchor(int a)
2548	{
2549	if (a != 0) {
2550	for (int i = 0; i < rs.size(); i++) {
2551	a = eng->anchorConcatenation(ranchors.value(rs.at(i), 0), a);
2552	ranchors.insert(rs.at(i), a);
2553	}
2554	if (minl == 0)
2555	skipanchors = eng->anchorConcatenation(skipanchors, a);
2556	}
2557	}
2558
2559	#ifndef QT_NO_REGEXP_OPTIM
2560	void QRegExpEngine::Box::setupHeuristics()
2561	{
2562	eng->goodEarlyStart = earlyStart;
2563	eng->goodLateStart = lateStart;
2564	eng->goodStr = eng->cs ? str : str.toLower();
2565
2566	eng->minl = minl;
2567	if (eng->cs) {
2568	/*
2569	A regular expression such as 112\|1 has occ1['2'] = 2 and minl =
2570	1 at this point. An entry of occ1 has to be at most minl or
2571	infinity for the rest of the algorithm to go well.
2572
2573	We waited until here before normalizing these cases (instead of
2574	doing it in Box::orx()) because sometimes things improve by
2575	themselves. Consider for example (112\|1)34.
2576	*/
2577	for (int i = 0; i < NumBadChars; i++) {
2578	if (occ1.at(i) != NoOccurrence && occ1.at(i) >= minl)
2579	occ1[i] = minl;
2580	}
2581	eng->occ1 = occ1;
2582	} else {
2583	eng->occ1.fill(0, NumBadChars);
2584	}
2585
2586	eng->heuristicallyChooseHeuristic();
2587	}
2588	#endif
2589
2590	#if defined(QT_DEBUG)
2591	void QRegExpEngine::Box::dump() const
2592	{
2593	int i;
2594	qDebug("Box of at least %d character%s", minl, minl == 1 ? "" : "s");
2595	qDebug(" Left states:");
2596	for (i = 0; i < ls.size(); i++) {
2597	if (lanchors.value(ls[i], 0) == 0)
2598	qDebug(" %d", ls[i]);
2599	else
2600	qDebug(" %d [anchors 0x%.8x]", ls[i], lanchors[ls[i]]);
2601	}
2602	qDebug(" Right states:");
2603	for (i = 0; i < rs.size(); i++) {
2604	if (ranchors.value(rs[i], 0) == 0)
2605	qDebug(" %d", rs[i]);
2606	else
2607	qDebug(" %d [anchors 0x%.8x]", rs[i], ranchors[rs[i]]);
2608	}
2609	qDebug(" Skip anchors: 0x%.8x", skipanchors);
2610	}
2611	#endif
2612
2613	void QRegExpEngine::Box::addAnchorsToEngine(const Box &to) const
2614	{
2615	for (int i = 0; i < to.ls.size(); i++) {
2616	for (int j = 0; j < rs.size(); j++) {
2617	int a = eng->anchorConcatenation(ranchors.value(rs.at(j), 0),
2618	to.lanchors.value(to.ls.at(i), 0));
2619	eng->addAnchors(rs[j], to.ls[i], a);
2620	}
2621	}
2622	}
2623
2624	int QRegExpEngine::getChar()
2625	{
2626	return (yyPos == yyLen) ? EOS : yyIn[yyPos++].unicode();
2627	}
2628
2629	int QRegExpEngine::getEscape()
2630	{
2631	#ifndef QT_NO_REGEXP_ESCAPE
2632	const char tab[] = "afnrtv"; // no b, as \b means word boundary
2633	const char backTab[] = "\a\f\n\r\t\v";
2634	ushort low;
2635	int i;
2636	#endif
2637	ushort val;
2638	int prevCh = yyCh;
2639
2640	if (prevCh == EOS) {
2641	error(RXERR_END);
2642	return Tok_Char \| '\\';
2643	}
2644	yyCh = getChar();
2645	#ifndef QT_NO_REGEXP_ESCAPE
2646	if ((prevCh & ~0xff) == 0) {
2647	const char *p = strchr(tab, prevCh);
2648	if (p != 0)
2649	return Tok_Char \| backTab[p - tab];
2650	}
2651	#endif
2652
2653	switch (prevCh) {
2654	#ifndef QT_NO_REGEXP_ESCAPE
2655	case '0':
2656	val = 0;
2657	for (i = 0; i < 3; i++) {
2658	if (yyCh >= '0' && yyCh <= '7')
2659	val = (val << 3) \| (yyCh - '0');
2660	else
2661	break;
2662	yyCh = getChar();
2663	}
2664	if ((val & ~0377) != 0)
2665	error(RXERR_OCTAL);
2666	return Tok_Char \| val;
2667	#endif
2668	#ifndef QT_NO_REGEXP_ESCAPE
2669	case 'B':
2670	return Tok_NonWord;
2671	#endif
2672	#ifndef QT_NO_REGEXP_CCLASS
2673	case 'D':
2674	// see QChar::isDigit()
2675	yyCharClass->addCategories(0x7fffffef);
2676	return Tok_CharClass;
2677	case 'S':
2678	// see QChar::isSpace()
2679	yyCharClass->addCategories(0x7ffff87f);
2680	yyCharClass->addRange(0x0000, 0x0008);
2681	yyCharClass->addRange(0x000e, 0x001f);
2682	yyCharClass->addRange(0x007f, 0x009f);
2683	return Tok_CharClass;
2684	case 'W':
2685	// see QChar::isLetterOrNumber() and QChar::isMark()
2686	yyCharClass->addCategories(0x7fe07f81);
2687	yyCharClass->addRange(0x203f, 0x2040);
2688	yyCharClass->addSingleton(0x2040);
2689	yyCharClass->addSingleton(0x2054);
2690	yyCharClass->addSingleton(0x30fb);
2691	yyCharClass->addRange(0xfe33, 0xfe34);
2692	yyCharClass->addRange(0xfe4d, 0xfe4f);
2693	yyCharClass->addSingleton(0xff3f);
2694	yyCharClass->addSingleton(0xff65);
2695	return Tok_CharClass;
2696	#endif
2697	#ifndef QT_NO_REGEXP_ESCAPE
2698	case 'b':
2699	return Tok_Word;
2700	#endif
2701	#ifndef QT_NO_REGEXP_CCLASS
2702	case 'd':
2703	// see QChar::isDigit()
2704	yyCharClass->addCategories(0x00000010);
2705	return Tok_CharClass;
2706	case 's':
2707	// see QChar::isSpace()
2708	yyCharClass->addCategories(0x00000380);
2709	yyCharClass->addRange(0x0009, 0x000d);
2710	return Tok_CharClass;
2711	case 'w':
2712	// see QChar::isLetterOrNumber() and QChar::isMark()
2713	yyCharClass->addCategories(0x000f807e);
2714	yyCharClass->addSingleton(0x005f); // '_'
2715	return Tok_CharClass;
2716	#endif
2717	#ifndef QT_NO_REGEXP_ESCAPE
2718	case 'x':
2719	val = 0;
2720	for (i = 0; i < 4; i++) {
2721	low = QChar(yyCh).toLower().unicode();
2722	if (low >= '0' && low <= '9')
2723	val = (val << 4) \| (low - '0');
2724	else if (low >= 'a' && low <= 'f')
2725	val = (val << 4) \| (low - 'a' + 10);
2726	else
2727	break;
2728	yyCh = getChar();
2729	}
2730	return Tok_Char \| val;
2731	#endif
2732	default:
2733	if (prevCh >= '1' && prevCh <= '9') {
2734	#ifndef QT_NO_REGEXP_BACKREF
2735	val = prevCh - '0';
2736	while (yyCh >= '0' && yyCh <= '9') {
2737	val = (val * 10) + (yyCh - '0');
2738	yyCh = getChar();
2739	}
2740	return Tok_BackRef \| val;
2741	#else
2742	error(RXERR_DISABLED);
2743	#endif
2744	}
2745	return Tok_Char \| prevCh;
2746	}
2747	}
2748
2749	#ifndef QT_NO_REGEXP_INTERVAL
2750	int QRegExpEngine::getRep(int def)
2751	{
2752	if (yyCh >= '0' && yyCh <= '9') {
2753	int rep = 0;
2754	do {
2755	rep = 10 * rep + yyCh - '0';
2756	if (rep >= InftyRep) {
2757	error(RXERR_REPETITION);
2758	rep = def;
2759	}
2760	yyCh = getChar();
2761	} while (yyCh >= '0' && yyCh <= '9');
2762	return rep;
2763	} else {
2764	return def;
2765	}
2766	}
2767	#endif
2768
2769	#ifndef QT_NO_REGEXP_LOOKAHEAD
2770	void QRegExpEngine::skipChars(int n)
2771	{
2772	if (n > 0) {
2773	yyPos += n - 1;
2774	yyCh = getChar();
2775	}
2776	}
2777	#endif
2778
2779	void QRegExpEngine::error(const char *msg)
2780	{
2781	if (yyError.isEmpty())
2782	yyError = QLatin1String(msg);
2783	}
2784
2785	void QRegExpEngine::startTokenizer(const QChar *rx, int len)
2786	{
2787	yyIn = rx;
2788	yyPos0 = 0;
2789	yyPos = 0;
2790	yyLen = len;
2791	yyCh = getChar();
2792	yyCharClass = new QRegExpCharClass;
2793	yyMinRep = 0;
2794	yyMaxRep = 0;
2795	yyError = QString();
2796	}
2797
2798	int QRegExpEngine::getToken()
2799	{
2800	#ifndef QT_NO_REGEXP_CCLASS
2801	ushort pendingCh = 0;
2802	bool charPending;
2803	bool rangePending;
2804	int tok;
2805	#endif
2806	int prevCh = yyCh;
2807
2808	yyPos0 = yyPos - 1;
2809	#ifndef QT_NO_REGEXP_CCLASS
2810	yyCharClass->clear();
2811	#endif
2812	yyMinRep = 0;
2813	yyMaxRep = 0;
2814	yyCh = getChar();
2815
2816	switch (prevCh) {
2817	case EOS:
2818	yyPos0 = yyPos;
2819	return Tok_Eos;
2820	case '$':
2821	return Tok_Dollar;
2822	case '(':
2823	if (yyCh == '?') {
2824	prevCh = getChar();
2825	yyCh = getChar();
2826	switch (prevCh) {
2827	#ifndef QT_NO_REGEXP_LOOKAHEAD
2828	case '!':
2829	return Tok_NegLookahead;
2830	case '=':
2831	return Tok_PosLookahead;
2832	#endif
2833	case ':':
2834	return Tok_MagicLeftParen;
2835	default:
2836	error(RXERR_LOOKAHEAD);
2837	return Tok_MagicLeftParen;
2838	}
2839	} else {
2840	return Tok_LeftParen;
2841	}
2842	case ')':
2843	return Tok_RightParen;
2844	case '*':
2845	yyMinRep = 0;
2846	yyMaxRep = InftyRep;
2847	return Tok_Quantifier;
2848	case '+':
2849	yyMinRep = 1;
2850	yyMaxRep = InftyRep;
2851	return Tok_Quantifier;
2852	case '.':
2853	#ifndef QT_NO_REGEXP_CCLASS
2854	yyCharClass->setNegative(true);
2855	#endif
2856	return Tok_CharClass;
2857	case '?':
2858	yyMinRep = 0;
2859	yyMaxRep = 1;
2860	return Tok_Quantifier;
2861	case '[':
2862	#ifndef QT_NO_REGEXP_CCLASS
2863	if (yyCh == '^') {
2864	yyCharClass->setNegative(true);
2865	yyCh = getChar();
2866	}
2867	charPending = false;
2868	rangePending = false;
2869	do {
2870	if (yyCh == '-' && charPending && !rangePending) {
2871	rangePending = true;
2872	yyCh = getChar();
2873	} else {
2874	if (charPending && !rangePending) {
2875	yyCharClass->addSingleton(pendingCh);
2876	charPending = false;
2877	}
2878	if (yyCh == '\\') {
2879	yyCh = getChar();
2880	tok = getEscape();
2881	if (tok == Tok_Word)
2882	tok = '\b';
2883	} else {
2884	tok = Tok_Char \| yyCh;
2885	yyCh = getChar();
2886	}
2887	if (tok == Tok_CharClass) {
2888	if (rangePending) {
2889	yyCharClass->addSingleton('-');
2890	yyCharClass->addSingleton(pendingCh);
2891	charPending = false;
2892	rangePending = false;
2893	}
2894	} else if ((tok & Tok_Char) != 0) {
2895	if (rangePending) {
2896	yyCharClass->addRange(pendingCh, tok ^ Tok_Char);
2897	charPending = false;
2898	rangePending = false;
2899	} else {
2900	pendingCh = tok ^ Tok_Char;
2901	charPending = true;
2902	}
2903	} else {
2904	error(RXERR_CHARCLASS);
2905	}
2906	}
2907	} while (yyCh != ']' && yyCh != EOS);
2908	if (rangePending)
2909	yyCharClass->addSingleton('-');
2910	if (charPending)
2911	yyCharClass->addSingleton(pendingCh);
2912	if (yyCh == EOS)
2913	error(RXERR_END);
2914	else
2915	yyCh = getChar();
2916	return Tok_CharClass;
2917	#else
2918	error(RXERR_END);
2919	return Tok_Char \| '[';
2920	#endif
2921	case '\\':
2922	return getEscape();
2923	case ']':
2924	error(RXERR_LEFTDELIM);
2925	return Tok_Char \| ']';
2926	case '^':
2927	return Tok_Caret;
2928	case '{':
2929	#ifndef QT_NO_REGEXP_INTERVAL
2930	yyMinRep = getRep(0);
2931	yyMaxRep = yyMinRep;
2932	if (yyCh == ',') {
2933	yyCh = getChar();
2934	yyMaxRep = getRep(InftyRep);
2935	}
2936	if (yyMaxRep < yyMinRep)
2937	qSwap(yyMinRep, yyMaxRep);
2938	if (yyCh != '}')
2939	error(RXERR_REPETITION);
2940	yyCh = getChar();
2941	return Tok_Quantifier;
2942	#else
2943	error(RXERR_DISABLED);
2944	return Tok_Char \| '{';
2945	#endif
2946	case '\|':
2947	return Tok_Bar;
2948	case '}':
2949	error(RXERR_LEFTDELIM);
2950	return Tok_Char \| '}';
2951	default:
2952	return Tok_Char \| prevCh;
2953	}
2954	}
2955
2956	int QRegExpEngine::parse(const QChar *pattern, int len)
2957	{
2958	valid = true;
2959	startTokenizer(pattern, len);
2960	yyTok = getToken();
2961	#ifndef QT_NO_REGEXP_CAPTURE
2962	yyMayCapture = true;
2963	#else
2964	yyMayCapture = false;
2965	#endif
2966
2967	#ifndef QT_NO_REGEXP_CAPTURE
2968	int atom = startAtom(false);
2969	#endif
2970	QRegExpCharClass anything;
2971	Box box(this); // create InitialState
2972	box.set(anything);
2973	Box rightBox(this); // create FinalState
2974	rightBox.set(anything);
2975
2976	Box middleBox(this);
2977	parseExpression(&middleBox);
2978	#ifndef QT_NO_REGEXP_CAPTURE
2979	finishAtom(atom, false);
2980	#endif
2981	#ifndef QT_NO_REGEXP_OPTIM
2982	middleBox.setupHeuristics();
2983	#endif
2984	box.cat(middleBox);
2985	box.cat(rightBox);
2986	delete yyCharClass;
2987	yyCharClass = 0;
2988
2989	#ifndef QT_NO_REGEXP_CAPTURE
2990	for (int i = 0; i < nf; ++i) {
2991	switch (f[i].capture) {
2992	case QRegExpAtom::NoCapture:
2993	break;
2994	case QRegExpAtom::OfficialCapture:
2995	f[i].capture = ncap;
2996	captureForOfficialCapture.append(ncap);
2997	++ncap;
2998	++officialncap;
2999	break;
3000	case QRegExpAtom::UnofficialCapture:
3001	f[i].capture = greedyQuantifiers ? ncap++ : QRegExpAtom::NoCapture;
3002	}
3003	}
3004
3005	#ifndef QT_NO_REGEXP_BACKREF
3006	#ifndef QT_NO_REGEXP_OPTIM
3007	if (officialncap == 0 && nbrefs == 0) {
3008	ncap = nf = 0;
3009	f.clear();
3010	}
3011	#endif
3012	// handle the case where there's a \5 with no corresponding capture
3013	// (captureForOfficialCapture.size() != officialncap)
3014	for (int i = 0; i < nbrefs - officialncap; ++i) {
3015	captureForOfficialCapture.append(ncap);
3016	++ncap;
3017	}
3018	#endif
3019	#endif
3020
3021	if (!yyError.isEmpty())
3022	return -1;
3023
3024	#ifndef QT_NO_REGEXP_OPTIM
3025	const QRegExpAutomatonState &sinit = s.at(InitialState);
3026	caretAnchored = !sinit.anchors.isEmpty();
3027	if (caretAnchored) {
3028	const QMap<int, int> &anchors = sinit.anchors;
3029	QMap<int, int>::const_iterator a;
3030	for (a = anchors.constBegin(); a != anchors.constEnd(); ++a) {
3031	if (
3032	#ifndef QT_NO_REGEXP_ANCHOR_ALT
3033	(*a & Anchor_Alternation) != 0 \|\|
3034	#endif
3035	(*a & Anchor_Caret) == 0)
3036	{
3037	caretAnchored = false;
3038	break;
3039	}
3040	}
3041	}
3042	#endif
3043
3044	// cleanup anchors
3045	int numStates = s.count();
3046	for (int i = 0; i < numStates; ++i) {
3047	QRegExpAutomatonState &state = s[i];
3048	if (!state.anchors.isEmpty()) {
3049	QMap<int, int>::iterator a = state.anchors.begin();
3050	while (a != state.anchors.end()) {
3051	if (a.value() == 0)
3052	a = state.anchors.erase(a);
3053	else
3054	++a;
3055	}
3056	}
3057	}
3058
3059	return yyPos0;
3060	}
3061
3062	void QRegExpEngine::parseAtom(Box *box)
3063	{
3064	#ifndef QT_NO_REGEXP_LOOKAHEAD
3065	QRegExpEngine *eng = 0;
3066	bool neg;
3067	int len;
3068	#endif
3069
3070	if ((yyTok & Tok_Char) != 0) {
3071	box->set(QChar(yyTok ^ Tok_Char));
3072	} else {
3073	#ifndef QT_NO_REGEXP_OPTIM
3074	trivial = false;
3075	#endif
3076	switch (yyTok) {
3077	case Tok_Dollar:
3078	box->catAnchor(Anchor_Dollar);
3079	break;
3080	case Tok_Caret:
3081	box->catAnchor(Anchor_Caret);
3082	break;
3083	#ifndef QT_NO_REGEXP_LOOKAHEAD
3084	case Tok_PosLookahead:
3085	case Tok_NegLookahead:
3086	neg = (yyTok == Tok_NegLookahead);
3087	eng = new QRegExpEngine(cs, greedyQuantifiers);
3088	len = eng->parse(yyIn + yyPos - 1, yyLen - yyPos + 1);
3089	if (len >= 0)
3090	skipChars(len);
3091	else
3092	error(RXERR_LOOKAHEAD);
3093	box->catAnchor(addLookahead(eng, neg));
3094	yyTok = getToken();
3095	if (yyTok != Tok_RightParen)
3096	error(RXERR_LOOKAHEAD);
3097	break;
3098	#endif
3099	#ifndef QT_NO_REGEXP_ESCAPE
3100	case Tok_Word:
3101	box->catAnchor(Anchor_Word);
3102	break;
3103	case Tok_NonWord:
3104	box->catAnchor(Anchor_NonWord);
3105	break;
3106	#endif
3107	case Tok_LeftParen:
3108	case Tok_MagicLeftParen:
3109	yyTok = getToken();
3110	parseExpression(box);
3111	if (yyTok != Tok_RightParen)
3112	error(RXERR_END);
3113	break;
3114	case Tok_CharClass:
3115	box->set(*yyCharClass);
3116	break;
3117	case Tok_Quantifier:
3118	error(RXERR_REPETITION);
3119	break;
3120	default:
3121	#ifndef QT_NO_REGEXP_BACKREF
3122	if ((yyTok & Tok_BackRef) != 0)
3123	box->set(yyTok ^ Tok_BackRef);
3124	else
3125	#endif
3126	error(RXERR_DISABLED);
3127	}
3128	}
3129	yyTok = getToken();
3130	}
3131
3132	void QRegExpEngine::parseFactor(Box *box)
3133	{
3134	#ifndef QT_NO_REGEXP_CAPTURE
3135	int outerAtom = greedyQuantifiers ? startAtom(false) : -1;
3136	int innerAtom = startAtom(yyMayCapture && yyTok == Tok_LeftParen);
3137	bool magicLeftParen = (yyTok == Tok_MagicLeftParen);
3138	#else
3139	const int innerAtom = -1;
3140	#endif
3141
3142	#ifndef QT_NO_REGEXP_INTERVAL
3143	#define YYREDO() \
3144	yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, \
3145	*yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok
3146
3147	const QChar *in = yyIn;
3148	int pos0 = yyPos0;
3149	int pos = yyPos;
3150	int len = yyLen;
3151	int ch = yyCh;
3152	QRegExpCharClass charClass;
3153	if (yyTok == Tok_CharClass)
3154	charClass = *yyCharClass;
3155	int tok = yyTok;
3156	bool mayCapture = yyMayCapture;
3157	#endif
3158
3159	parseAtom(box);
3160	#ifndef QT_NO_REGEXP_CAPTURE
3161	finishAtom(innerAtom, magicLeftParen);
3162	#endif
3163
3164	bool hasQuantifier = (yyTok == Tok_Quantifier);
3165	if (hasQuantifier) {
3166	#ifndef QT_NO_REGEXP_OPTIM
3167	trivial = false;
3168	#endif
3169	if (yyMaxRep == InftyRep) {
3170	box->plus(innerAtom);
3171	#ifndef QT_NO_REGEXP_INTERVAL
3172	} else if (yyMaxRep == 0) {
3173	box->clear();
3174	#endif
3175	}
3176	if (yyMinRep == 0)
3177	box->opt();
3178
3179	#ifndef QT_NO_REGEXP_INTERVAL
3180	yyMayCapture = false;
3181	int alpha = (yyMinRep == 0) ? 0 : yyMinRep - 1;
3182	int beta = (yyMaxRep == InftyRep) ? 0 : yyMaxRep - (alpha + 1);
3183
3184	Box rightBox(this);
3185	int i;
3186
3187	for (i = 0; i < beta; i++) {
3188	YYREDO();
3189	Box leftBox(this);
3190	parseAtom(&leftBox);
3191	leftBox.cat(rightBox);
3192	leftBox.opt();
3193	rightBox = leftBox;
3194	}
3195	for (i = 0; i < alpha; i++) {
3196	YYREDO();
3197	Box leftBox(this);
3198	parseAtom(&leftBox);
3199	leftBox.cat(rightBox);
3200	rightBox = leftBox;
3201	}
3202	rightBox.cat(*box);
3203	*box = rightBox;
3204	#endif
3205	yyTok = getToken();
3206	#ifndef QT_NO_REGEXP_INTERVAL
3207	yyMayCapture = mayCapture;
3208	#endif
3209	}
3210	#undef YYREDO
3211	#ifndef QT_NO_REGEXP_CAPTURE
3212	if (greedyQuantifiers)
3213	finishAtom(outerAtom, hasQuantifier);
3214	#endif
3215	}
3216
3217	void QRegExpEngine::parseTerm(Box *box)
3218	{
3219	#ifndef QT_NO_REGEXP_OPTIM
3220	if (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar)
3221	parseFactor(box);
3222	#endif
3223	while (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar) {
3224	Box rightBox(this);
3225	parseFactor(&rightBox);
3226	box->cat(rightBox);
3227	}
3228	}
3229
3230	void QRegExpEngine::parseExpression(Box *box)
3231	{
3232	parseTerm(box);
3233	while (yyTok == Tok_Bar) {
3234	#ifndef QT_NO_REGEXP_OPTIM
3235	trivial = false;
3236	#endif
3237	Box rightBox(this);
3238	yyTok = getToken();
3239	parseTerm(&rightBox);
3240	box->orx(rightBox);
3241	}
3242	}
3243
3244	/*
3245	The struct QRegExpPrivate contains the private data of a regular
3246	expression other than the automaton. It makes it possible for many
3247	QRegExp objects to use the same QRegExpEngine object with different
3248	QRegExpPrivate objects.
3249	*/
3250	struct QRegExpPrivate
3251	{
3252	QRegExpEngine *eng;
3253	QRegExpEngineKey engineKey;
3254	bool minimal;
3255	#ifndef QT_NO_REGEXP_CAPTURE
3256	QString t; // last string passed to QRegExp::indexIn() or lastIndexIn()
3257	QStringList capturedCache; // what QRegExp::capturedTexts() returned last
3258	#endif
3259	QRegExpMatchState matchState;
3260
3261	inline QRegExpPrivate()
3262	: eng(0), engineKey(QString(), QRegExp::RegExp, Qt::CaseSensitive), minimal(false) { }
3263	inline QRegExpPrivate(const QRegExpEngineKey &key)
3264	: eng(0), engineKey(key), minimal(false) {}
3265	};
3266
3267	#if !defined(QT_NO_REGEXP_OPTIM)
3268	uint qHash(const QRegExpEngineKey &key)
3269	{
3270	return qHash(key.pattern);
3271	}
3272
3273	typedef QCache<QRegExpEngineKey, QRegExpEngine> EngineCache;
3274	Q_GLOBAL_STATIC(EngineCache, globalEngineCache)
3275	Q_GLOBAL_STATIC(QMutex, mutex)
3276	#endif // QT_NO_REGEXP_OPTIM
3277
3278	static void derefEngine(QRegExpEngine *eng, const QRegExpEngineKey &key)
3279	{
3280	if (!eng->ref.deref()) {
3281	#if !defined(QT_NO_REGEXP_OPTIM)
3282	if (globalEngineCache()) {
3283	QMutexLocker locker(mutex());
3284	globalEngineCache()->insert(key, eng, 4 + key.pattern.length() / 4);
3285	}
3286	else
3287	delete eng;
3288	#else
3289	Q_UNUSED(key);
3290	delete eng;
3291	#endif
3292	}
3293	}
3294
3295	static void prepareEngine_helper(QRegExpPrivate *priv)
3296	{
3297	bool initMatchState = !priv->eng;
3298	#if !defined(QT_NO_REGEXP_OPTIM)
3299	if (!priv->eng) {
3300	QMutexLocker locker(mutex());
3301	priv->eng = globalEngineCache()->take(priv->engineKey);
3302	if (priv->eng != 0)
3303	priv->eng->ref.ref();
3304	}
3305	#endif // QT_NO_REGEXP_OPTIM
3306
3307	if (!priv->eng)
3308	priv->eng = new QRegExpEngine(priv->engineKey);
3309
3310	if (initMatchState)
3311	priv->matchState.prepareForMatch(priv->eng);
3312	}
3313
3314	inline static void prepareEngine(QRegExpPrivate *priv)
3315	{
3316	if (priv->eng)
3317	return;
3318	prepareEngine_helper(priv);
3319	}
3320
3321	static void prepareEngineForMatch(QRegExpPrivate *priv, const QString &str)
3322	{
3323	prepareEngine(priv);
3324	priv->matchState.prepareForMatch(priv->eng);
3325	#ifndef QT_NO_REGEXP_CAPTURE
3326	priv->t = str;
3327	priv->capturedCache.clear();
3328	#else
3329	Q_UNUSED(str);
3330	#endif
3331	}
3332
3333	static void invalidateEngine(QRegExpPrivate *priv)
3334	{
3335	if (priv->eng != 0) {
3336	derefEngine(priv->eng, priv->engineKey);
3337	priv->eng = 0;
3338	priv->matchState.drain();
3339	}
3340	}
3341
3342	/*!
3343	\enum QRegExp::CaretMode
3344
3345	The CaretMode enum defines the different meanings of the caret
3346	(\bold{^}) in a regular expression. The possible values are:
3347
3348	\value CaretAtZero
3349	The caret corresponds to index 0 in the searched string.
3350
3351	\value CaretAtOffset
3352	The caret corresponds to the start offset of the search.
3353
3354	\value CaretWontMatch
3355	The caret never matches.
3356	*/
3357
3358	/*!
3359	\enum QRegExp::PatternSyntax
3360
3361	The syntax used to interpret the meaning of the pattern.
3362
3363	\value RegExp A rich Perl-like pattern matching syntax. This is
3364	the default.
3365
3366	\value RegExp2 Like RegExp, but with \l{greedy quantifiers}. This
3367	will be the default in Qt 5. (Introduced in Qt 4.2.)
3368
3369	\value Wildcard This provides a simple pattern matching syntax
3370	similar to that used by shells (command interpreters) for "file
3371	globbing". See \l{Wildcard Matching}.
3372
3373	\value FixedString The pattern is a fixed string. This is
3374	equivalent to using the RegExp pattern on a string in
3375	which all metacharacters are escaped using escape().
3376
3377	\sa setPatternSyntax()
3378	*/
3379
3380	/*!
3381	Constructs an empty regexp.
3382
3383	\sa isValid(), errorString()
3384	*/
3385	QRegExp::QRegExp()
3386	{
3387	priv = new QRegExpPrivate;
3388	}
3389
3390	/*!
3391	Constructs a regular expression object for the given \a pattern
3392	string. The pattern must be given using wildcard notation if \a
3393	syntax is \l Wildcard; the default is \l RegExp. The pattern is
3394	case sensitive, unless \a cs is Qt::CaseInsensitive. Matching is
3395	greedy (maximal), but can be changed by calling
3396	setMinimal().
3397
3398	\sa setPattern(), setCaseSensitivity(), setPatternSyntax()
3399	*/
3400	QRegExp::QRegExp(const QString &pattern, Qt::CaseSensitivity cs, PatternSyntax syntax)
3401	{
3402	priv = new QRegExpPrivate(QRegExpEngineKey(pattern, syntax, cs));
3403	}
3404
3405	/*!
3406	Constructs a regular expression as a copy of \a rx.
3407
3408	\sa operator=()
3409	*/
3410	QRegExp::QRegExp(const QRegExp &rx)
3411	{
3412	priv = new QRegExpPrivate;
3413	operator=(rx);
3414	}
3415
3416	/*!
3417	Destroys the regular expression and cleans up its internal data.
3418	*/
3419	QRegExp::~QRegExp()
3420	{
3421	invalidateEngine(priv);
3422	delete priv;
3423	}
3424
3425	/*!
3426	Copies the regular expression \a rx and returns a reference to the
3427	copy. The case sensitivity, wildcard, and minimal matching options
3428	are also copied.
3429	*/
3430	QRegExp &QRegExp::operator=(const QRegExp &rx)
3431	{
3432	prepareEngine(rx.priv); // to allow sharing
3433	QRegExpEngine *otherEng = rx.priv->eng;
3434	if (otherEng)
3435	otherEng->ref.ref();
3436	invalidateEngine(priv);
3437	priv->eng = otherEng;
3438	priv->engineKey = rx.priv->engineKey;
3439	priv->minimal = rx.priv->minimal;
3440	#ifndef QT_NO_REGEXP_CAPTURE
3441	priv->t = rx.priv->t;
3442	priv->capturedCache = rx.priv->capturedCache;
3443	#endif
3444	if (priv->eng)
3445	priv->matchState.prepareForMatch(priv->eng);
3446	priv->matchState.captured = rx.priv->matchState.captured;
3447	return *this;
3448	}
3449
3450	/*!
3451	Returns true if this regular expression is equal to \a rx;
3452	otherwise returns false.
3453
3454	Two QRegExp objects are equal if they have the same pattern
3455	strings and the same settings for case sensitivity, wildcard and
3456	minimal matching.
3457	*/
3458	bool QRegExp::operator==(const QRegExp &rx) const
3459	{
3460	return priv->engineKey == rx.priv->engineKey && priv->minimal == rx.priv->minimal;
3461	}
3462
3463	/*!
3464	\fn bool QRegExp::operator!=(const QRegExp &rx) const
3465
3466	Returns true if this regular expression is not equal to \a rx;
3467	otherwise returns false.
3468
3469	\sa operator==()
3470	*/
3471
3472	/*!
3473	Returns true if the pattern string is empty; otherwise returns
3474	false.
3475
3476	If you call exactMatch() with an empty pattern on an empty string
3477	it will return true; otherwise it returns false since it operates
3478	over the whole string. If you call indexIn() with an empty pattern
3479	on \e any string it will return the start offset (0 by default)
3480	because the empty pattern matches the 'emptiness' at the start of
3481	the string. In this case the length of the match returned by
3482	matchedLength() will be 0.
3483
3484	See QString::isEmpty().
3485	*/
3486
3487	bool QRegExp::isEmpty() const
3488	{
3489	return priv->engineKey.pattern.isEmpty();
3490	}
3491
3492	/*!
3493	Returns true if the regular expression is valid; otherwise returns
3494	false. An invalid regular expression never matches.
3495
3496	The pattern \bold{[a-z} is an example of an invalid pattern, since
3497	it lacks a closing square bracket.
3498
3499	Note that the validity of a regexp may also depend on the setting
3500	of the wildcard flag, for example \bold{*.html} is a valid
3501	wildcard regexp but an invalid full regexp.
3502
3503	\sa errorString()
3504	*/
3505	bool QRegExp::isValid() const
3506	{
3507	if (priv->engineKey.pattern.isEmpty()) {
3508	return true;
3509	} else {
3510	prepareEngine(priv);
3511	return priv->eng->isValid();
3512	}
3513	}
3514
3515	/*!
3516	Returns the pattern string of the regular expression. The pattern
3517	has either regular expression syntax or wildcard syntax, depending
3518	on patternSyntax().
3519
3520	\sa patternSyntax(), caseSensitivity()
3521	*/
3522	QString QRegExp::pattern() const
3523	{
3524	return priv->engineKey.pattern;
3525	}
3526
3527	/*!
3528	Sets the pattern string to \a pattern. The case sensitivity,
3529	wildcard, and minimal matching options are not changed.
3530
3531	\sa setPatternSyntax(), setCaseSensitivity()
3532	*/
3533	void QRegExp::setPattern(const QString &pattern)
3534	{
3535	if (priv->engineKey.pattern != pattern) {
3536	invalidateEngine(priv);
3537	priv->engineKey.pattern = pattern;
3538	}
3539	}
3540
3541	/*!
3542	Returns Qt::CaseSensitive if the regexp is matched case
3543	sensitively; otherwise returns Qt::CaseInsensitive.
3544
3545	\sa patternSyntax(), pattern(), isMinimal()
3546	*/
3547	Qt::CaseSensitivity QRegExp::caseSensitivity() const
3548	{
3549	return priv->engineKey.cs;
3550	}
3551
3552	/*!
3553	Sets case sensitive matching to \a cs.
3554
3555	If \a cs is Qt::CaseSensitive, \bold{\\.txt$} matches
3556	\c{readme.txt} but not \c{README.TXT}.
3557
3558	\sa setPatternSyntax(), setPattern(), setMinimal()
3559	*/
3560	void QRegExp::setCaseSensitivity(Qt::CaseSensitivity cs)
3561	{
3562	if ((bool)cs != (bool)priv->engineKey.cs) {
3563	invalidateEngine(priv);
3564	priv->engineKey.cs = cs;
3565	}
3566	}
3567
3568	/*!
3569	Returns the syntax used by the regular expression. The default is
3570	QRegExp::RegExp.
3571
3572	\sa pattern(), caseSensitivity()
3573	*/
3574	QRegExp::PatternSyntax QRegExp::patternSyntax() const
3575	{
3576	return priv->engineKey.patternSyntax;
3577	}
3578
3579	/*!
3580	Sets the syntax mode for the regular expression. The default is
3581	QRegExp::RegExp.
3582
3583	Setting \a syntax to QRegExp::Wildcard enables simple shell-like
3584	\l{wildcard matching}. For example, \bold{r*.txt} matches the
3585	string \c{readme.txt} in wildcard mode, but does not match
3586	\c{readme}.
3587
3588	Setting \a syntax to QRegExp::FixedString means that the pattern
3589	is interpreted as a plain string. Special characters (e.g.,
3590	backslash) don't need to be escaped then.
3591
3592	\sa setPattern(), setCaseSensitivity(), escape()
3593	*/
3594	void QRegExp::setPatternSyntax(PatternSyntax syntax)
3595	{
3596	if (syntax != priv->engineKey.patternSyntax) {
3597	invalidateEngine(priv);
3598	priv->engineKey.patternSyntax = syntax;
3599	}
3600	}
3601
3602	/*!
3603	Returns true if minimal (non-greedy) matching is enabled;
3604	otherwise returns false.
3605
3606	\sa caseSensitivity(), setMinimal()
3607	*/
3608	bool QRegExp::isMinimal() const
3609	{
3610	return priv->minimal;
3611	}
3612
3613	/*!
3614	Enables or disables minimal matching. If \a minimal is false,
3615	matching is greedy (maximal) which is the default.
3616
3617	For example, suppose we have the input string "We must be
3618	<b>bold</b>, very <b>bold</b>!" and the pattern
3619	\bold{<b>.*</b>}. With the default greedy (maximal) matching,
3620	the match is "We must be \underline{<b>bold</b>, very
3621	<b>bold</b>}!". But with minimal (non-greedy) matching, the
3622	first match is: "We must be \underline{<b>bold</b>}, very
3623	<b>bold</b>!" and the second match is "We must be <b>bold</b>,
3624	very \underline{<b>bold</b>}!". In practice we might use the pattern
3625	\bold{<b>[^<]*\</b>} instead, although this will still fail for
3626	nested tags.
3627
3628	\sa setCaseSensitivity()
3629	*/
3630	void QRegExp::setMinimal(bool minimal)
3631	{
3632	priv->minimal = minimal;
3633	}
3634
3635	// ### Qt 5: make non-const
3636	/*!
3637	Returns true if \a str is matched exactly by this regular
3638	expression; otherwise returns false. You can determine how much of
3639	the string was matched by calling matchedLength().
3640
3641	For a given regexp string R, exactMatch("R") is the equivalent of
3642	indexIn("^R$") since exactMatch() effectively encloses the regexp
3643	in the start of string and end of string anchors, except that it
3644	sets matchedLength() differently.
3645
3646	For example, if the regular expression is \bold{blue}, then
3647	exactMatch() returns true only for input \c blue. For inputs \c
3648	bluebell, \c blutak and \c lightblue, exactMatch() returns false
3649	and matchedLength() will return 4, 3 and 0 respectively.
3650
3651	Although const, this function sets matchedLength(),
3652	capturedTexts(), and pos().
3653
3654	\sa indexIn(), lastIndexIn()
3655	*/
3656	bool QRegExp::exactMatch(const QString &str) const
3657	{
3658	prepareEngineForMatch(priv, str);
3659	priv->matchState.match(str.unicode(), str.length(), 0, priv->minimal, true, 0);
3660	if (priv->matchState.captured[1] == str.length()) {
3661	return true;
3662	} else {
3663	priv->matchState.captured[0] = 0;
3664	priv->matchState.captured[1] = priv->matchState.oneTestMatchedLen;
3665	return false;
3666	}
3667	}
3668
3669	// ### Qt 5: make non-const
3670	/*!
3671	Attempts to find a match in \a str from position \a offset (0 by
3672	default). If \a offset is -1, the search starts at the last
3673	character; if -2, at the next to last character; etc.
3674
3675	Returns the position of the first match, or -1 if there was no
3676	match.
3677
3678	The \a caretMode parameter can be used to instruct whether \bold{^}
3679	should match at index 0 or at \a offset.
3680
3681	You might prefer to use QString::indexOf(), QString::contains(),
3682	or even QStringList::filter(). To replace matches use
3683	QString::replace().
3684
3685	Example:
3686	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 13
3687
3688	Although const, this function sets matchedLength(),
3689	capturedTexts() and pos().
3690
3691	If the QRegExp is a wildcard expression (see setPatternSyntax())
3692	and want to test a string against the whole wildcard expression,
3693	use exactMatch() instead of this function.
3694
3695	\sa lastIndexIn(), exactMatch()
3696	*/
3697
3698	int QRegExp::indexIn(const QString &str, int offset, CaretMode caretMode) const
3699	{
3700	prepareEngineForMatch(priv, str);
3701	if (offset < 0)
3702	offset += str.length();
3703	priv->matchState.match(str.unicode(), str.length(), offset,
3704	priv->minimal, false, caretIndex(offset, caretMode));
3705	return priv->matchState.captured[0];
3706	}
3707
3708	// ### Qt 5: make non-const
3709	/*!
3710	Attempts to find a match backwards in \a str from position \a
3711	offset. If \a offset is -1 (the default), the search starts at the
3712	last character; if -2, at the next to last character; etc.
3713
3714	Returns the position of the first match, or -1 if there was no
3715	match.
3716
3717	The \a caretMode parameter can be used to instruct whether \bold{^}
3718	should match at index 0 or at \a offset.
3719
3720	Although const, this function sets matchedLength(),
3721	capturedTexts() and pos().
3722
3723	\warning Searching backwards is much slower than searching
3724	forwards.
3725
3726	\sa indexIn(), exactMatch()
3727	*/
3728
3729	int QRegExp::lastIndexIn(const QString &str, int offset, CaretMode caretMode) const
3730	{
3731	prepareEngineForMatch(priv, str);
3732	if (offset < 0)
3733	offset += str.length();
3734	if (offset < 0 \|\| offset > str.length()) {
3735	memset(priv->matchState.captured, -1, priv->matchState.capturedSize*sizeof(int));
3736	return -1;
3737	}
3738
3739	while (offset >= 0) {
3740	priv->matchState.match(str.unicode(), str.length(), offset,
3741	priv->minimal, true, caretIndex(offset, caretMode));
3742	if (priv->matchState.captured[0] == offset)
3743	return offset;
3744	--offset;
3745	}
3746	return -1;
3747	}
3748
3749	/*!
3750	Returns the length of the last matched string, or -1 if there was
3751	no match.
3752
3753	\sa exactMatch(), indexIn(), lastIndexIn()
3754	*/
3755	int QRegExp::matchedLength() const
3756	{
3757	return priv->matchState.captured[1];
3758	}
3759
3760	#ifndef QT_NO_REGEXP_CAPTURE
3761	/*!
3762	Returns the number of captures contained in the regular expression.
3763	*/
3764	int QRegExp::numCaptures() const
3765	{
3766	prepareEngine(priv);
3767	return priv->eng->numCaptures();
3768	}
3769
3770	/*!
3771	Returns a list of the captured text strings.
3772
3773	The first string in the list is the entire matched string. Each
3774	subsequent list element contains a string that matched a
3775	(capturing) subexpression of the regexp.
3776
3777	For example:
3778	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 14
3779
3780	The above example also captures elements that may be present but
3781	which we have no interest in. This problem can be solved by using
3782	non-capturing parentheses:
3783
3784	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 15
3785
3786	Note that if you want to iterate over the list, you should iterate
3787	over a copy, e.g.
3788	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 16
3789
3790	Some regexps can match an indeterminate number of times. For
3791	example if the input string is "Offsets: 12 14 99 231 7" and the
3792	regexp, \c{rx}, is \bold{(\\d+)+}, we would hope to get a list of
3793	all the numbers matched. However, after calling
3794	\c{rx.indexIn(str)}, capturedTexts() will return the list ("12",
3795	"12"), i.e. the entire match was "12" and the first subexpression
3796	matched was "12". The correct approach is to use cap() in a
3797	\l{QRegExp#cap_in_a_loop}{loop}.
3798
3799	The order of elements in the string list is as follows. The first
3800	element is the entire matching string. Each subsequent element
3801	corresponds to the next capturing open left parentheses. Thus
3802	capturedTexts()[1] is the text of the first capturing parentheses,
3803	capturedTexts()[2] is the text of the second and so on
3804	(corresponding to $1, $2, etc., in some other regexp languages).
3805
3806	\sa cap(), pos()
3807	*/
3808	QStringList QRegExp::capturedTexts() const
3809	{
3810	if (priv->capturedCache.isEmpty()) {
3811	prepareEngine(priv);
3812	const int *captured = priv->matchState.captured;
3813	int n = priv->matchState.capturedSize;
3814
3815	for (int i = 0; i < n; i += 2) {
3816	QString m;
3817	if (captured[i + 1] == 0)
3818	m = QLatin1String(""); // ### Qt 5: don't distinguish between null and empty
3819	else if (captured[i] >= 0)
3820	m = priv->t.mid(captured[i], captured[i + 1]);
3821	priv->capturedCache.append(m);
3822	}
3823	priv->t.clear();
3824	}
3825	return priv->capturedCache;
3826	}
3827
3828	/*!
3829	\internal
3830	*/
3831	QStringList QRegExp::capturedTexts()
3832	{
3833	return const_cast<const QRegExp *>(this)->capturedTexts();
3834	}
3835
3836	/*!
3837	Returns the text captured by the \a nth subexpression. The entire
3838	match has index 0 and the parenthesized subexpressions have
3839	indexes starting from 1 (excluding non-capturing parentheses).
3840
3841	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 17
3842
3843	The order of elements matched by cap() is as follows. The first
3844	element, cap(0), is the entire matching string. Each subsequent
3845	element corresponds to the next capturing open left parentheses.
3846	Thus cap(1) is the text of the first capturing parentheses, cap(2)
3847	is the text of the second, and so on.
3848
3849	\sa capturedTexts(), pos()
3850	*/
3851	QString QRegExp::cap(int nth) const
3852	{
3853	return capturedTexts().value(nth);
3854	}
3855
3856	/*!
3857	\internal
3858	*/
3859	QString QRegExp::cap(int nth)
3860	{
3861	return const_cast<const QRegExp *>(this)->cap(nth);
3862	}
3863
3864	/*!
3865	Returns the position of the \a nth captured text in the searched
3866	string. If \a nth is 0 (the default), pos() returns the position
3867	of the whole match.
3868
3869	Example:
3870	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 18
3871
3872	For zero-length matches, pos() always returns -1. (For example, if
3873	cap(4) would return an empty string, pos(4) returns -1.) This is
3874	a feature of the implementation.
3875
3876	\sa cap(), capturedTexts()
3877	*/
3878	int QRegExp::pos(int nth) const
3879	{
3880	if (nth < 0 \|\| nth >= priv->matchState.capturedSize / 2)
3881	return -1;
3882	else
3883	return priv->matchState.captured[2 * nth];
3884	}
3885
3886	/*!
3887	\internal
3888	*/
3889	int QRegExp::pos(int nth)
3890	{
3891	return const_cast<const QRegExp *>(this)->pos(nth);
3892	}
3893
3894	/*!
3895	Returns a text string that explains why a regexp pattern is
3896	invalid the case being; otherwise returns "no error occurred".
3897
3898	\sa isValid()
3899	*/
3900	QString QRegExp::errorString() const
3901	{
3902	if (isValid()) {
3903	return QString::fromLatin1(RXERR_OK);
3904	} else {
3905	return priv->eng->errorString();
3906	}
3907	}
3908
3909	/*!
3910	\internal
3911	*/
3912	QString QRegExp::errorString()
3913	{
3914	return const_cast<const QRegExp *>(this)->errorString();
3915	}
3916	#endif
3917
3918	/*!
3919	Returns the string \a str with every regexp special character
3920	escaped with a backslash. The special characters are $, (,), *, +,
3921	., ?, [, \,], ^, {, \| and }.
3922
3923	Example:
3924
3925	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 19
3926
3927	This function is useful to construct regexp patterns dynamically:
3928
3929	\snippet doc/src/snippets/code/src_corelib_tools_qregexp.cpp 20
3930
3931	\sa setPatternSyntax()
3932	*/
3933	QString QRegExp::escape(const QString &str)
3934	{
3935	QString quoted;
3936	const int count = str.count();
3937	quoted.reserve(count * 2);
3938	const QLatin1Char backslash('\\');
3939	for (int i = 0; i < count; i++) {
3940	switch (str.at(i).toLatin1()) {
3941	case '$':
3942	case '(':
3943	case ')':
3944	case '*':
3945	case '+':
3946	case '.':
3947	case '?':
3948	case '[':
3949	case '\\':
3950	case ']':
3951	case '^':
3952	case '{':
3953	case '\|':
3954	case '}':
3955	quoted.append(backslash);
3956	}
3957	quoted.append(str.at(i));
3958	}
3959	return quoted;
3960	}
3961
3962	/*!
3963	\fn bool QRegExp::caseSensitive() const
3964
3965	Use \l caseSensitivity() instead.
3966	*/
3967
3968	/*!
3969	\fn void QRegExp::setCaseSensitive(bool sensitive)
3970
3971	Use \l setCaseSensitivity() instead.
3972	*/
3973
3974	/*!
3975	\fn bool QRegExp::wildcard() const
3976
3977	Use \l patternSyntax() instead.
3978
3979	\oldcode
3980	bool wc = rx.wildcard();
3981	\newcode
3982	bool wc = (rx.patternSyntax() == QRegExp::Wildcard);
3983	\endcode
3984	*/
3985
3986	/*!
3987	\fn void QRegExp::setWildcard(bool wildcard)
3988
3989	Use \l setPatternSyntax() instead.
3990
3991	\oldcode
3992	rx.setWildcard(wc);
3993	\newcode
3994	rx.setPatternSyntax(wc ? QRegExp::Wildcard : QRegExp::RegExp);
3995	\endcode
3996	*/
3997
3998	/*!
3999	\fn bool QRegExp::minimal() const
4000
4001	Use \l isMinimal() instead.
4002	*/
4003
4004	/*!
4005	\fn int QRegExp::search(const QString &str, int from = 0,
4006	CaretMode caretMode = CaretAtZero) const
4007
4008	Use \l indexIn() instead.
4009	*/
4010
4011	/*!
4012	\fn int QRegExp::searchRev(const QString &str, int from = -1, \
4013	CaretMode caretMode = CaretAtZero) const
4014
4015	Use \l lastIndexIn() instead.
4016	*/
4017
4018	/*!
4019	\fn QRegExp::QRegExp(const QString &pattern, bool cs, bool wildcard = false)
4020
4021	Use another constructor instead.
4022
4023	\oldcode
4024	QRegExp rx("*.txt", false, true);
4025	\newcode
4026	QRegExp rx("*.txt", Qt::CaseInsensitive, QRegExp::Wildcard);
4027	\endcode
4028	*/
4029
4030	#ifndef QT_NO_DATASTREAM
4031	/*!
4032	\relates QRegExp
4033
4034	Writes the regular expression \a regExp to stream \a out.
4035
4036	\sa {Format of the QDataStream Operators}
4037	*/
4038	QDataStream &operator<<(QDataStream &out, const QRegExp &regExp)
4039	{
4040	return out << regExp.pattern() << (quint8)regExp.caseSensitivity()
4041	<< (quint8)regExp.patternSyntax()
4042	<< (quint8)!!regExp.isMinimal();
4043	}
4044
4045	/*!
4046	\relates QRegExp
4047
4048	Reads a regular expression from stream \a in into \a regExp.
4049
4050	\sa {Format of the QDataStream Operators}
4051	*/
4052	QDataStream &operator>>(QDataStream &in, QRegExp &regExp)
4053	{
4054	QString pattern;
4055	quint8 cs;
4056	quint8 patternSyntax;
4057	quint8 isMinimal;
4058
4059	in >> pattern >> cs >> patternSyntax >> isMinimal;
4060
4061	QRegExp newRegExp(pattern, Qt::CaseSensitivity(cs),
4062	QRegExp::PatternSyntax(patternSyntax));
4063
4064	newRegExp.setMinimal(isMinimal);
4065	regExp = newRegExp;
4066	return in;
4067	}
4068	#endif
4069
4070	QT_END_NAMESPACE

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/src/corelib/tools/qregexp.cpp@ 240

Download in other formats: