source: trunk/src/corelib/tools/qbytearraymatcher.cpp@ 17

Last change on this file since 17 was 2, checked in by Dmitry A. Kuminov, 16 years ago

Initially imported qt-all-opensource-src-4.5.1 from Trolltech.

File size: 9.9 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4** Contact: Qt Software Information ([email protected])
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial Usage
10** Licensees holding valid Qt Commercial licenses may use this file in
11** accordance with the Qt Commercial License Agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Nokia.
14**
15** GNU Lesser General Public License Usage
16** Alternatively, this file may be used under the terms of the GNU Lesser
17** General Public License version 2.1 as published by the Free Software
18** Foundation and appearing in the file LICENSE.LGPL included in the
19** packaging of this file. Please review the following information to
20** ensure the GNU Lesser General Public License version 2.1 requirements
21** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22**
23** In addition, as a special exception, Nokia gives you certain
24** additional rights. These rights are described in the Nokia Qt LGPL
25** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26** package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you are unsure which license is appropriate for your use, please
37** contact the sales department at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "qbytearraymatcher.h"
43
44#include <limits.h>
45
46QT_BEGIN_NAMESPACE
47
48static inline void bm_init_skiptable(const uchar *cc, int len, uchar *skiptable)
49{
50 int l = qMin(len, 255);
51 memset(skiptable, l, 256*sizeof(uchar));
52 cc += len - l;
53 while (l--)
54 skiptable[*cc++] = l;
55}
56
57static inline int bm_find(const uchar *cc, int l, int index, const uchar *puc, uint pl,
58 const uchar *skiptable)
59{
60 if (pl == 0)
61 return index > l ? -1 : index;
62 const uint pl_minus_one = pl - 1;
63
64 register const uchar *current = cc + index + pl_minus_one;
65 const uchar *end = cc + l;
66 while (current < end) {
67 uint skip = skiptable[*current];
68 if (!skip) {
69 // possible match
70 while (skip < pl) {
71 if (*(current - skip) != puc[pl_minus_one - skip])
72 break;
73 skip++;
74 }
75 if (skip > pl_minus_one) // we have a match
76 return (current - cc) - skip + 1;
77
78 // in case we don't have a match we are a bit inefficient as we only skip by one
79 // when we have the non matching char in the string.
80 if (skiptable[*(current - skip)] == pl)
81 skip = pl - skip;
82 else
83 skip = 1;
84 }
85 if (current > end - skip)
86 break;
87 current += skip;
88 }
89 return -1; // not found
90}
91
92/*! \class QByteArrayMatcher
93 \brief The QByteArrayMatcher class holds a sequence of bytes that
94 can be quickly matched in a byte array.
95
96 \ingroup tools
97 \ingroup text
98
99 This class is useful when you have a sequence of bytes that you
100 want to repeatedly match against some byte arrays (perhaps in a
101 loop), or when you want to search for the same sequence of bytes
102 multiple times in the same byte array. Using a matcher object and
103 indexIn() is faster than matching a plain QByteArray with
104 QByteArray::indexOf() if repeated matching takes place. This
105 class offers no benefit if you are doing one-off byte array
106 matches.
107
108 Create the QByteArrayMatcher with the QByteArray you want to
109 search for. Then call indexIn() on the QByteArray that you want to
110 search.
111
112 \sa QByteArray, QStringMatcher
113*/
114
115/*!
116 Constructs an empty byte array matcher that won't match anything.
117 Call setPattern() to give it a pattern to match.
118*/
119QByteArrayMatcher::QByteArrayMatcher()
120 : d(0)
121{
122 p.p = 0;
123 qMemSet(p.q_skiptable, 0, sizeof(p.q_skiptable));
124}
125
126/*!
127 Constructs a byte array matcher from \a pattern. \a pattern
128 has the given \a length. \a pattern must remain in scope, but
129 the destructor does not delete \a pattern.
130 */
131QByteArrayMatcher::QByteArrayMatcher(const char *pattern, int length)
132 : d(0)
133{
134 p.p = reinterpret_cast<const uchar *>(pattern);
135 p.l = length;
136 bm_init_skiptable(p.p, p.l, p.q_skiptable);
137}
138
139/*!
140 Constructs a byte array matcher that will search for \a pattern.
141 Call indexIn() to perform a search.
142*/
143QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern)
144 : d(0), q_pattern(pattern)
145{
146 p.p = reinterpret_cast<const uchar *>(pattern.constData());
147 p.l = pattern.size();
148 bm_init_skiptable(p.p, p.l, p.q_skiptable);
149}
150
151/*!
152 Copies the \a other byte array matcher to this byte array matcher.
153*/
154QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other)
155 : d(0)
156{
157 operator=(other);
158}
159
160/*!
161 Destroys the byte array matcher.
162*/
163QByteArrayMatcher::~QByteArrayMatcher()
164{
165}
166
167/*!
168 Assigns the \a other byte array matcher to this byte array matcher.
169*/
170QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other)
171{
172 q_pattern = other.q_pattern;
173 qMemCopy(p.q_skiptable, other.p.q_skiptable, sizeof(p.q_skiptable));
174 return *this;
175}
176
177/*!
178 Sets the byte array that this byte array matcher will search for
179 to \a pattern.
180
181 \sa pattern(), indexIn()
182*/
183void QByteArrayMatcher::setPattern(const QByteArray &pattern)
184{
185 q_pattern = pattern;
186 p.p = reinterpret_cast<const uchar *>(pattern.constData());
187 p.l = pattern.size();
188 bm_init_skiptable(p.p, p.l, p.q_skiptable);
189}
190
191/*!
192 Searches the byte array \a ba, from byte position \a from (default
193 0, i.e. from the first byte), for the byte array pattern() that
194 was set in the constructor or in the most recent call to
195 setPattern(). Returns the position where the pattern() matched in
196 \a ba, or -1 if no match was found.
197*/
198int QByteArrayMatcher::indexIn(const QByteArray &ba, int from) const
199{
200 if (from < 0)
201 from = 0;
202 return bm_find(reinterpret_cast<const uchar *>(ba.constData()), ba.size(), from,
203 p.p, p.l, p.q_skiptable);
204}
205
206/*!
207 Searches the char string \a str, which has length \a len, from
208 byte position \a from (default 0, i.e. from the first byte), for
209 the byte array pattern() that was set in the constructor or in the
210 most recent call to setPattern(). Returns the position where the
211 pattern() matched in \a str, or -1 if no match was found.
212*/
213int QByteArrayMatcher::indexIn(const char *str, int len, int from) const
214{
215 if (from < 0)
216 from = 0;
217 return bm_find(reinterpret_cast<const uchar *>(str), len, from,
218 p.p, p.l, p.q_skiptable);
219}
220
221/*!
222 \fn QByteArray QByteArrayMatcher::pattern() const
223
224 Returns the byte array pattern that this byte array matcher will
225 search for.
226
227 \sa setPattern()
228*/
229
230
231static int findChar(const char *str, int len, char ch, int from)
232{
233 const uchar *s = (const uchar *)str;
234 uchar c = (uchar)ch;
235 if (from < 0)
236 from = qMax(from + len, 0);
237 if (from < len) {
238 const uchar *n = s + from - 1;
239 const uchar *e = s + len;
240 while (++n != e)
241 if (*n == c)
242 return n - s;
243 }
244 return -1;
245}
246
247/*! \internal
248 */
249static int qFindByteArrayBoyerMoore(
250 const char *haystack, int haystackLen, int haystackOffset,
251 const char *needle, int needleLen)
252{
253 uchar skiptable[256];
254 bm_init_skiptable((const uchar *)needle, needleLen, skiptable);
255 if (haystackOffset < 0)
256 haystackOffset = 0;
257 return bm_find((const uchar *)haystack, haystackLen, haystackOffset,
258 (const uchar *)needle, needleLen, skiptable);
259}
260
261#define REHASH(a) \
262 if (sl_minus_1 < sizeof(uint) * CHAR_BIT) \
263 hashHaystack -= (a) << sl_minus_1; \
264 hashHaystack <<= 1
265
266/*! \internal
267 */
268int qFindByteArray(
269 const char *haystack0, int haystackLen, int from,
270 const char *needle, int needleLen)
271{
272 const int l = haystackLen;
273 const int sl = needleLen;
274 if (from < 0)
275 from += l;
276 if (uint(sl + from) > (uint)l)
277 return -1;
278 if (!sl)
279 return from;
280 if (!l)
281 return -1;
282
283 if (sl == 1)
284 return findChar(haystack0, haystackLen, needle[0], from);
285
286 /*
287 We use the Boyer-Moore algorithm in cases where the overhead
288 for the skip table should pay off, otherwise we use a simple
289 hash function.
290 */
291 if (l > 500 && sl > 5)
292 return qFindByteArrayBoyerMoore(haystack0, haystackLen, from,
293 needle, needleLen);
294
295 /*
296 We use some hashing for efficiency's sake. Instead of
297 comparing strings, we compare the hash value of str with that
298 of a part of this QString. Only if that matches, we call memcmp().
299 */
300 const char *haystack = haystack0 + from;
301 const char *end = haystack0 + (l - sl);
302 const uint sl_minus_1 = sl - 1;
303 uint hashNeedle = 0, hashHaystack = 0;
304 int idx;
305 for (idx = 0; idx < sl; ++idx) {
306 hashNeedle = ((hashNeedle<<1) + needle[idx]);
307 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
308 }
309 hashHaystack -= *(haystack + sl_minus_1);
310
311 while (haystack <= end) {
312 hashHaystack += *(haystack + sl_minus_1);
313 if (hashHaystack == hashNeedle && *needle == *haystack
314 && memcmp(needle, haystack, sl) == 0)
315 return haystack - haystack0;
316
317 REHASH(*haystack);
318 ++haystack;
319 }
320 return -1;
321}
322
323QT_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.