source: trunk/src/corelib/tools/qstringmatcher.cpp@ 788

Last change on this file since 788 was 651, checked in by Dmitry A. Kuminov, 15 years ago

trunk: Merged in qt 4.6.2 sources.

File size: 10.1 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
4** All rights reserved.
5** Contact: Nokia Corporation ([email protected])
6**
7** This file is part of the QtCore module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial Usage
11** Licensees holding valid Qt Commercial licenses may use this file in
12** accordance with the Qt Commercial License Agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and Nokia.
15**
16** GNU Lesser General Public License Usage
17** Alternatively, this file may be used under the terms of the GNU Lesser
18** General Public License version 2.1 as published by the Free Software
19** Foundation and appearing in the file LICENSE.LGPL included in the
20** packaging of this file. Please review the following information to
21** ensure the GNU Lesser General Public License version 2.1 requirements
22** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23**
24** In addition, as a special exception, Nokia gives you certain additional
25** rights. These rights are described in the Nokia Qt LGPL Exception
26** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you have questions regarding the use of this file, please contact
37** Nokia at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "qstringmatcher.h"
43#include "qunicodetables_p.h"
44
45QT_BEGIN_NAMESPACE
46
47static void bm_init_skiptable(const ushort *uc, int len, uchar *skiptable, Qt::CaseSensitivity cs)
48{
49 int l = qMin(len, 255);
50 memset(skiptable, l, 256*sizeof(uchar));
51 uc += len - l;
52 if (cs == Qt::CaseSensitive) {
53 while (l--) {
54 skiptable[*uc & 0xff] = l;
55 uc++;
56 }
57 } else {
58 const ushort *start = uc;
59 while (l--) {
60 skiptable[foldCase(uc, start) & 0xff] = l;
61 uc++;
62 }
63 }
64}
65
66static inline int bm_find(const ushort *uc, uint l, int index, const ushort *puc, uint pl,
67 const uchar *skiptable, Qt::CaseSensitivity cs)
68{
69 if (pl == 0)
70 return index > (int)l ? -1 : index;
71 const uint pl_minus_one = pl - 1;
72
73 register const ushort *current = uc + index + pl_minus_one;
74 const ushort *end = uc + l;
75 if (cs == Qt::CaseSensitive) {
76 while (current < end) {
77 uint skip = skiptable[*current & 0xff];
78 if (!skip) {
79 // possible match
80 while (skip < pl) {
81 if (*(current - skip) != puc[pl_minus_one-skip])
82 break;
83 skip++;
84 }
85 if (skip > pl_minus_one) // we have a match
86 return (current - uc) - pl_minus_one;
87
88 // in case we don't have a match we are a bit inefficient as we only skip by one
89 // when we have the non matching char in the string.
90 if (skiptable[*(current - skip) & 0xff] == pl)
91 skip = pl - skip;
92 else
93 skip = 1;
94 }
95 if (current > end - skip)
96 break;
97 current += skip;
98 }
99 } else {
100 while (current < end) {
101 uint skip = skiptable[foldCase(current, uc) & 0xff];
102 if (!skip) {
103 // possible match
104 while (skip < pl) {
105 if (foldCase(current - skip, uc) != foldCase(puc + pl_minus_one - skip, puc))
106 break;
107 skip++;
108 }
109 if (skip > pl_minus_one) // we have a match
110 return (current - uc) - pl_minus_one;
111 // in case we don't have a match we are a bit inefficient as we only skip by one
112 // when we have the non matching char in the string.
113 if (skiptable[foldCase(current - skip, uc) & 0xff] == pl)
114 skip = pl - skip;
115 else
116 skip = 1;
117 }
118 if (current > end - skip)
119 break;
120 current += skip;
121 }
122 }
123 return -1; // not found
124}
125
126/*!
127 \class QStringMatcher
128 \brief The QStringMatcher class holds a sequence of characters that
129 can be quickly matched in a Unicode string.
130
131 \ingroup tools
132 \ingroup string-processing
133
134 This class is useful when you have a sequence of \l{QChar}s that
135 you want to repeatedly match against some strings (perhaps in a
136 loop), or when you want to search for the same sequence of
137 characters multiple times in the same string. Using a matcher
138 object and indexIn() is faster than matching a plain QString with
139 QString::indexOf() if repeated matching takes place. This class
140 offers no benefit if you are doing one-off string matches.
141
142 Create the QStringMatcher with the QString you want to search
143 for. Then call indexIn() on the QString that you want to search.
144
145 \sa QString, QByteArrayMatcher, QRegExp
146*/
147
148/*!
149 Constructs an empty string matcher that won't match anything.
150 Call setPattern() to give it a pattern to match.
151*/
152QStringMatcher::QStringMatcher()
153 : d_ptr(0), q_cs(Qt::CaseSensitive)
154{
155 qMemSet(q_data, 0, sizeof(q_data));
156}
157
158/*!
159 Constructs a string matcher that will search for \a pattern, with
160 case sensitivity \a cs.
161
162 Call indexIn() to perform a search.
163*/
164QStringMatcher::QStringMatcher(const QString &pattern, Qt::CaseSensitivity cs)
165 : d_ptr(0), q_pattern(pattern), q_cs(cs)
166{
167 p.uc = pattern.unicode();
168 p.len = pattern.size();
169 bm_init_skiptable((const ushort *)p.uc, p.len, p.q_skiptable, cs);
170}
171
172/*!
173 \fn QStringMatcher::QStringMatcher(const QChar *uc, int length, Qt::CaseSensitivity cs)
174 \since 4.5
175
176 Constructs a string matcher that will search for the pattern referred to
177 by \a uc with the given \a length and case sensitivity specified by \a cs.
178*/
179QStringMatcher::QStringMatcher(const QChar *uc, int len, Qt::CaseSensitivity cs)
180 : d_ptr(0), q_cs(cs)
181{
182 p.uc = uc;
183 p.len = len;
184 bm_init_skiptable((const ushort *)p.uc, len, p.q_skiptable, cs);
185}
186
187/*!
188 Copies the \a other string matcher to this string matcher.
189*/
190QStringMatcher::QStringMatcher(const QStringMatcher &other)
191 : d_ptr(0)
192{
193 operator=(other);
194}
195
196/*!
197 Destroys the string matcher.
198*/
199QStringMatcher::~QStringMatcher()
200{
201}
202
203/*!
204 Assigns the \a other string matcher to this string matcher.
205*/
206QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other)
207{
208 if (this != &other) {
209 q_pattern = other.q_pattern;
210 q_cs = other.q_cs;
211 qMemCopy(q_data, other.q_data, sizeof(q_data));
212 }
213 return *this;
214}
215
216/*!
217 Sets the string that this string matcher will search for to \a
218 pattern.
219
220 \sa pattern(), setCaseSensitivity(), indexIn()
221*/
222void QStringMatcher::setPattern(const QString &pattern)
223{
224 q_pattern = pattern;
225 p.uc = pattern.unicode();
226 p.len = pattern.size();
227 bm_init_skiptable((const ushort *)pattern.unicode(), pattern.size(), p.q_skiptable, q_cs);
228}
229
230/*!
231 \fn QString QStringMatcher::pattern() const
232
233 Returns the string pattern that this string matcher will search
234 for.
235
236 \sa setPattern()
237*/
238
239QString QStringMatcher::pattern() const
240{
241 if (!q_pattern.isEmpty())
242 return q_pattern;
243 return QString(p.uc, p.len);
244}
245
246/*!
247 Sets the case sensitivity setting of this string matcher to \a
248 cs.
249
250 \sa caseSensitivity(), setPattern(), indexIn()
251*/
252void QStringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs)
253{
254 if (cs == q_cs)
255 return;
256 bm_init_skiptable((const ushort *)q_pattern.unicode(), q_pattern.size(), p.q_skiptable, cs);
257 q_cs = cs;
258}
259
260/*!
261 Searches the string \a str from character position \a from
262 (default 0, i.e. from the first character), for the string
263 pattern() that was set in the constructor or in the most recent
264 call to setPattern(). Returns the position where the pattern()
265 matched in \a str, or -1 if no match was found.
266
267 \sa setPattern(), setCaseSensitivity()
268*/
269int QStringMatcher::indexIn(const QString &str, int from) const
270{
271 if (from < 0)
272 from = 0;
273 return bm_find((const ushort *)str.unicode(), str.size(), from,
274 (const ushort *)p.uc, p.len,
275 p.q_skiptable, q_cs);
276}
277
278/*!
279 \since 4.5
280
281 Searches the string starting at \a str (of length \a length) from
282 character position \a from (default 0, i.e. from the first
283 character), for the string pattern() that was set in the
284 constructor or in the most recent call to setPattern(). Returns
285 the position where the pattern() matched in \a str, or -1 if no
286 match was found.
287
288 \sa setPattern(), setCaseSensitivity()
289*/
290int QStringMatcher::indexIn(const QChar *str, int length, int from) const
291{
292 if (from < 0)
293 from = 0;
294 return bm_find((const ushort *)str, length, from,
295 (const ushort *)p.uc, p.len,
296 p.q_skiptable, q_cs);
297}
298
299/*!
300 \fn Qt::CaseSensitivity QStringMatcher::caseSensitivity() const
301
302 Returns the case sensitivity setting for this string matcher.
303
304 \sa setCaseSensitivity()
305*/
306
307/*!
308 \internal
309*/
310
311int qFindStringBoyerMoore(
312 const QChar *haystack, int haystackLen, int haystackOffset,
313 const QChar *needle, int needleLen, Qt::CaseSensitivity cs)
314{
315 uchar skiptable[256];
316 bm_init_skiptable((const ushort *)needle, needleLen, skiptable, cs);
317 if (haystackOffset < 0)
318 haystackOffset = 0;
319 return bm_find((const ushort *)haystack, haystackLen, haystackOffset,
320 (const ushort *)needle, needleLen, skiptable, cs);
321}
322
323QT_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.