1 | /****************************************************************************
|
---|
2 | **
|
---|
3 | ** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
|
---|
4 | ** All rights reserved.
|
---|
5 | ** Contact: Nokia Corporation ([email protected])
|
---|
6 | **
|
---|
7 | ** This file is part of the QtCore module of the Qt Toolkit.
|
---|
8 | **
|
---|
9 | ** $QT_BEGIN_LICENSE:LGPL$
|
---|
10 | ** Commercial Usage
|
---|
11 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
---|
12 | ** accordance with the Qt Commercial License Agreement provided with the
|
---|
13 | ** Software or, alternatively, in accordance with the terms contained in
|
---|
14 | ** a written agreement between you and Nokia.
|
---|
15 | **
|
---|
16 | ** GNU Lesser General Public License Usage
|
---|
17 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
---|
18 | ** General Public License version 2.1 as published by the Free Software
|
---|
19 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
---|
20 | ** packaging of this file. Please review the following information to
|
---|
21 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
---|
22 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
---|
23 | **
|
---|
24 | ** In addition, as a special exception, Nokia gives you certain additional
|
---|
25 | ** rights. These rights are described in the Nokia Qt LGPL Exception
|
---|
26 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
---|
27 | **
|
---|
28 | ** GNU General Public License Usage
|
---|
29 | ** Alternatively, this file may be used under the terms of the GNU
|
---|
30 | ** General Public License version 3.0 as published by the Free Software
|
---|
31 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
32 | ** packaging of this file. Please review the following information to
|
---|
33 | ** ensure the GNU General Public License version 3.0 requirements will be
|
---|
34 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
---|
35 | **
|
---|
36 | ** If you have questions regarding the use of this file, please contact
|
---|
37 | ** Nokia at [email protected].
|
---|
38 | ** $QT_END_LICENSE$
|
---|
39 | **
|
---|
40 | ****************************************************************************/
|
---|
41 |
|
---|
42 | #include "qstringmatcher.h"
|
---|
43 | #include "qunicodetables_p.h"
|
---|
44 |
|
---|
45 | QT_BEGIN_NAMESPACE
|
---|
46 |
|
---|
47 | static void bm_init_skiptable(const ushort *uc, int len, uchar *skiptable, Qt::CaseSensitivity cs)
|
---|
48 | {
|
---|
49 | int l = qMin(len, 255);
|
---|
50 | memset(skiptable, l, 256*sizeof(uchar));
|
---|
51 | uc += len - l;
|
---|
52 | if (cs == Qt::CaseSensitive) {
|
---|
53 | while (l--) {
|
---|
54 | skiptable[*uc & 0xff] = l;
|
---|
55 | uc++;
|
---|
56 | }
|
---|
57 | } else {
|
---|
58 | const ushort *start = uc;
|
---|
59 | while (l--) {
|
---|
60 | skiptable[foldCase(uc, start) & 0xff] = l;
|
---|
61 | uc++;
|
---|
62 | }
|
---|
63 | }
|
---|
64 | }
|
---|
65 |
|
---|
66 | static inline int bm_find(const ushort *uc, uint l, int index, const ushort *puc, uint pl,
|
---|
67 | const uchar *skiptable, Qt::CaseSensitivity cs)
|
---|
68 | {
|
---|
69 | if (pl == 0)
|
---|
70 | return index > (int)l ? -1 : index;
|
---|
71 | const uint pl_minus_one = pl - 1;
|
---|
72 |
|
---|
73 | register const ushort *current = uc + index + pl_minus_one;
|
---|
74 | const ushort *end = uc + l;
|
---|
75 | if (cs == Qt::CaseSensitive) {
|
---|
76 | while (current < end) {
|
---|
77 | uint skip = skiptable[*current & 0xff];
|
---|
78 | if (!skip) {
|
---|
79 | // possible match
|
---|
80 | while (skip < pl) {
|
---|
81 | if (*(current - skip) != puc[pl_minus_one-skip])
|
---|
82 | break;
|
---|
83 | skip++;
|
---|
84 | }
|
---|
85 | if (skip > pl_minus_one) // we have a match
|
---|
86 | return (current - uc) - pl_minus_one;
|
---|
87 |
|
---|
88 | // in case we don't have a match we are a bit inefficient as we only skip by one
|
---|
89 | // when we have the non matching char in the string.
|
---|
90 | if (skiptable[*(current - skip) & 0xff] == pl)
|
---|
91 | skip = pl - skip;
|
---|
92 | else
|
---|
93 | skip = 1;
|
---|
94 | }
|
---|
95 | if (current > end - skip)
|
---|
96 | break;
|
---|
97 | current += skip;
|
---|
98 | }
|
---|
99 | } else {
|
---|
100 | while (current < end) {
|
---|
101 | uint skip = skiptable[foldCase(current, uc) & 0xff];
|
---|
102 | if (!skip) {
|
---|
103 | // possible match
|
---|
104 | while (skip < pl) {
|
---|
105 | if (foldCase(current - skip, uc) != foldCase(puc + pl_minus_one - skip, puc))
|
---|
106 | break;
|
---|
107 | skip++;
|
---|
108 | }
|
---|
109 | if (skip > pl_minus_one) // we have a match
|
---|
110 | return (current - uc) - pl_minus_one;
|
---|
111 | // in case we don't have a match we are a bit inefficient as we only skip by one
|
---|
112 | // when we have the non matching char in the string.
|
---|
113 | if (skiptable[foldCase(current - skip, uc) & 0xff] == pl)
|
---|
114 | skip = pl - skip;
|
---|
115 | else
|
---|
116 | skip = 1;
|
---|
117 | }
|
---|
118 | if (current > end - skip)
|
---|
119 | break;
|
---|
120 | current += skip;
|
---|
121 | }
|
---|
122 | }
|
---|
123 | return -1; // not found
|
---|
124 | }
|
---|
125 |
|
---|
126 | /*!
|
---|
127 | \class QStringMatcher
|
---|
128 | \brief The QStringMatcher class holds a sequence of characters that
|
---|
129 | can be quickly matched in a Unicode string.
|
---|
130 |
|
---|
131 | \ingroup tools
|
---|
132 | \ingroup string-processing
|
---|
133 |
|
---|
134 | This class is useful when you have a sequence of \l{QChar}s that
|
---|
135 | you want to repeatedly match against some strings (perhaps in a
|
---|
136 | loop), or when you want to search for the same sequence of
|
---|
137 | characters multiple times in the same string. Using a matcher
|
---|
138 | object and indexIn() is faster than matching a plain QString with
|
---|
139 | QString::indexOf() if repeated matching takes place. This class
|
---|
140 | offers no benefit if you are doing one-off string matches.
|
---|
141 |
|
---|
142 | Create the QStringMatcher with the QString you want to search
|
---|
143 | for. Then call indexIn() on the QString that you want to search.
|
---|
144 |
|
---|
145 | \sa QString, QByteArrayMatcher, QRegExp
|
---|
146 | */
|
---|
147 |
|
---|
148 | /*!
|
---|
149 | Constructs an empty string matcher that won't match anything.
|
---|
150 | Call setPattern() to give it a pattern to match.
|
---|
151 | */
|
---|
152 | QStringMatcher::QStringMatcher()
|
---|
153 | : d_ptr(0), q_cs(Qt::CaseSensitive)
|
---|
154 | {
|
---|
155 | qMemSet(q_data, 0, sizeof(q_data));
|
---|
156 | }
|
---|
157 |
|
---|
158 | /*!
|
---|
159 | Constructs a string matcher that will search for \a pattern, with
|
---|
160 | case sensitivity \a cs.
|
---|
161 |
|
---|
162 | Call indexIn() to perform a search.
|
---|
163 | */
|
---|
164 | QStringMatcher::QStringMatcher(const QString &pattern, Qt::CaseSensitivity cs)
|
---|
165 | : d_ptr(0), q_pattern(pattern), q_cs(cs)
|
---|
166 | {
|
---|
167 | p.uc = pattern.unicode();
|
---|
168 | p.len = pattern.size();
|
---|
169 | bm_init_skiptable((const ushort *)p.uc, p.len, p.q_skiptable, cs);
|
---|
170 | }
|
---|
171 |
|
---|
172 | /*!
|
---|
173 | \fn QStringMatcher::QStringMatcher(const QChar *uc, int length, Qt::CaseSensitivity cs)
|
---|
174 | \since 4.5
|
---|
175 |
|
---|
176 | Constructs a string matcher that will search for the pattern referred to
|
---|
177 | by \a uc with the given \a length and case sensitivity specified by \a cs.
|
---|
178 | */
|
---|
179 | QStringMatcher::QStringMatcher(const QChar *uc, int len, Qt::CaseSensitivity cs)
|
---|
180 | : d_ptr(0), q_cs(cs)
|
---|
181 | {
|
---|
182 | p.uc = uc;
|
---|
183 | p.len = len;
|
---|
184 | bm_init_skiptable((const ushort *)p.uc, len, p.q_skiptable, cs);
|
---|
185 | }
|
---|
186 |
|
---|
187 | /*!
|
---|
188 | Copies the \a other string matcher to this string matcher.
|
---|
189 | */
|
---|
190 | QStringMatcher::QStringMatcher(const QStringMatcher &other)
|
---|
191 | : d_ptr(0)
|
---|
192 | {
|
---|
193 | operator=(other);
|
---|
194 | }
|
---|
195 |
|
---|
196 | /*!
|
---|
197 | Destroys the string matcher.
|
---|
198 | */
|
---|
199 | QStringMatcher::~QStringMatcher()
|
---|
200 | {
|
---|
201 | }
|
---|
202 |
|
---|
203 | /*!
|
---|
204 | Assigns the \a other string matcher to this string matcher.
|
---|
205 | */
|
---|
206 | QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other)
|
---|
207 | {
|
---|
208 | if (this != &other) {
|
---|
209 | q_pattern = other.q_pattern;
|
---|
210 | q_cs = other.q_cs;
|
---|
211 | qMemCopy(q_data, other.q_data, sizeof(q_data));
|
---|
212 | }
|
---|
213 | return *this;
|
---|
214 | }
|
---|
215 |
|
---|
216 | /*!
|
---|
217 | Sets the string that this string matcher will search for to \a
|
---|
218 | pattern.
|
---|
219 |
|
---|
220 | \sa pattern(), setCaseSensitivity(), indexIn()
|
---|
221 | */
|
---|
222 | void QStringMatcher::setPattern(const QString &pattern)
|
---|
223 | {
|
---|
224 | q_pattern = pattern;
|
---|
225 | p.uc = pattern.unicode();
|
---|
226 | p.len = pattern.size();
|
---|
227 | bm_init_skiptable((const ushort *)pattern.unicode(), pattern.size(), p.q_skiptable, q_cs);
|
---|
228 | }
|
---|
229 |
|
---|
230 | /*!
|
---|
231 | \fn QString QStringMatcher::pattern() const
|
---|
232 |
|
---|
233 | Returns the string pattern that this string matcher will search
|
---|
234 | for.
|
---|
235 |
|
---|
236 | \sa setPattern()
|
---|
237 | */
|
---|
238 |
|
---|
239 | QString QStringMatcher::pattern() const
|
---|
240 | {
|
---|
241 | if (!q_pattern.isEmpty())
|
---|
242 | return q_pattern;
|
---|
243 | return QString(p.uc, p.len);
|
---|
244 | }
|
---|
245 |
|
---|
246 | /*!
|
---|
247 | Sets the case sensitivity setting of this string matcher to \a
|
---|
248 | cs.
|
---|
249 |
|
---|
250 | \sa caseSensitivity(), setPattern(), indexIn()
|
---|
251 | */
|
---|
252 | void QStringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs)
|
---|
253 | {
|
---|
254 | if (cs == q_cs)
|
---|
255 | return;
|
---|
256 | bm_init_skiptable((const ushort *)q_pattern.unicode(), q_pattern.size(), p.q_skiptable, cs);
|
---|
257 | q_cs = cs;
|
---|
258 | }
|
---|
259 |
|
---|
260 | /*!
|
---|
261 | Searches the string \a str from character position \a from
|
---|
262 | (default 0, i.e. from the first character), for the string
|
---|
263 | pattern() that was set in the constructor or in the most recent
|
---|
264 | call to setPattern(). Returns the position where the pattern()
|
---|
265 | matched in \a str, or -1 if no match was found.
|
---|
266 |
|
---|
267 | \sa setPattern(), setCaseSensitivity()
|
---|
268 | */
|
---|
269 | int QStringMatcher::indexIn(const QString &str, int from) const
|
---|
270 | {
|
---|
271 | if (from < 0)
|
---|
272 | from = 0;
|
---|
273 | return bm_find((const ushort *)str.unicode(), str.size(), from,
|
---|
274 | (const ushort *)p.uc, p.len,
|
---|
275 | p.q_skiptable, q_cs);
|
---|
276 | }
|
---|
277 |
|
---|
278 | /*!
|
---|
279 | \since 4.5
|
---|
280 |
|
---|
281 | Searches the string starting at \a str (of length \a length) from
|
---|
282 | character position \a from (default 0, i.e. from the first
|
---|
283 | character), for the string pattern() that was set in the
|
---|
284 | constructor or in the most recent call to setPattern(). Returns
|
---|
285 | the position where the pattern() matched in \a str, or -1 if no
|
---|
286 | match was found.
|
---|
287 |
|
---|
288 | \sa setPattern(), setCaseSensitivity()
|
---|
289 | */
|
---|
290 | int QStringMatcher::indexIn(const QChar *str, int length, int from) const
|
---|
291 | {
|
---|
292 | if (from < 0)
|
---|
293 | from = 0;
|
---|
294 | return bm_find((const ushort *)str, length, from,
|
---|
295 | (const ushort *)p.uc, p.len,
|
---|
296 | p.q_skiptable, q_cs);
|
---|
297 | }
|
---|
298 |
|
---|
299 | /*!
|
---|
300 | \fn Qt::CaseSensitivity QStringMatcher::caseSensitivity() const
|
---|
301 |
|
---|
302 | Returns the case sensitivity setting for this string matcher.
|
---|
303 |
|
---|
304 | \sa setCaseSensitivity()
|
---|
305 | */
|
---|
306 |
|
---|
307 | /*!
|
---|
308 | \internal
|
---|
309 | */
|
---|
310 |
|
---|
311 | int qFindStringBoyerMoore(
|
---|
312 | const QChar *haystack, int haystackLen, int haystackOffset,
|
---|
313 | const QChar *needle, int needleLen, Qt::CaseSensitivity cs)
|
---|
314 | {
|
---|
315 | uchar skiptable[256];
|
---|
316 | bm_init_skiptable((const ushort *)needle, needleLen, skiptable, cs);
|
---|
317 | if (haystackOffset < 0)
|
---|
318 | haystackOffset = 0;
|
---|
319 | return bm_find((const ushort *)haystack, haystackLen, haystackOffset,
|
---|
320 | (const ushort *)needle, needleLen, skiptable, cs);
|
---|
321 | }
|
---|
322 |
|
---|
323 | QT_END_NAMESPACE
|
---|