1 | /****************************************************************************
|
---|
2 | **
|
---|
3 | ** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
|
---|
4 | ** All rights reserved.
|
---|
5 | ** Contact: Nokia Corporation ([email protected])
|
---|
6 | **
|
---|
7 | ** This file is part of the Qt Assistant of the Qt Toolkit.
|
---|
8 | **
|
---|
9 | ** $QT_BEGIN_LICENSE:LGPL$
|
---|
10 | ** Commercial Usage
|
---|
11 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
---|
12 | ** accordance with the Qt Commercial License Agreement provided with the
|
---|
13 | ** Software or, alternatively, in accordance with the terms contained in
|
---|
14 | ** a written agreement between you and Nokia.
|
---|
15 | **
|
---|
16 | ** GNU Lesser General Public License Usage
|
---|
17 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
---|
18 | ** General Public License version 2.1 as published by the Free Software
|
---|
19 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
---|
20 | ** packaging of this file. Please review the following information to
|
---|
21 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
---|
22 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
---|
23 | **
|
---|
24 | ** In addition, as a special exception, Nokia gives you certain additional
|
---|
25 | ** rights. These rights are described in the Nokia Qt LGPL Exception
|
---|
26 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
---|
27 | **
|
---|
28 | ** GNU General Public License Usage
|
---|
29 | ** Alternatively, this file may be used under the terms of the GNU
|
---|
30 | ** General Public License version 3.0 as published by the Free Software
|
---|
31 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
32 | ** packaging of this file. Please review the following information to
|
---|
33 | ** ensure the GNU General Public License version 3.0 requirements will be
|
---|
34 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
---|
35 | **
|
---|
36 | ** If you have questions regarding the use of this file, please contact
|
---|
37 | ** Nokia at [email protected].
|
---|
38 | ** $QT_END_LICENSE$
|
---|
39 | **
|
---|
40 | ****************************************************************************/
|
---|
41 |
|
---|
42 | #include "qhelpenginecore.h"
|
---|
43 | #include "qhelp_global.h"
|
---|
44 | #include "fulltextsearch/qhits_p.h"
|
---|
45 | #include "fulltextsearch/qquery_p.h"
|
---|
46 | #include "fulltextsearch/qanalyzer_p.h"
|
---|
47 | #include "fulltextsearch/qdocument_p.h"
|
---|
48 | #include "fulltextsearch/qsearchable_p.h"
|
---|
49 | #include "fulltextsearch/qindexreader_p.h"
|
---|
50 | #include "fulltextsearch/qindexwriter_p.h"
|
---|
51 | #include "qhelpsearchindexwriter_clucene_p.h"
|
---|
52 |
|
---|
53 | #include <QtCore/QDir>
|
---|
54 | #include <QtCore/QString>
|
---|
55 | #include <QtCore/QFileInfo>
|
---|
56 | #include <QtCore/QTextCodec>
|
---|
57 | #include <QtCore/QTextStream>
|
---|
58 |
|
---|
59 | #include <QtNetwork/QLocalSocket>
|
---|
60 | #include <QtNetwork/QLocalServer>
|
---|
61 |
|
---|
62 | #include "private/qfunctions_p.h"
|
---|
63 |
|
---|
64 | QT_BEGIN_NAMESPACE
|
---|
65 |
|
---|
66 | namespace qt {
|
---|
67 | namespace fulltextsearch {
|
---|
68 | namespace clucene {
|
---|
69 |
|
---|
70 | // taken from qtexthtmlparser
|
---|
71 | static const struct QTextHtmlEntity
|
---|
72 | {
|
---|
73 | const char *name;
|
---|
74 | quint16 code;
|
---|
75 | } entities[] = {
|
---|
76 | { "AElig", 0x00c6 },
|
---|
77 | { "AMP", 38 },
|
---|
78 | { "Aacute", 0x00c1 },
|
---|
79 | { "Acirc", 0x00c2 },
|
---|
80 | { "Agrave", 0x00c0 },
|
---|
81 | { "Alpha", 0x0391 },
|
---|
82 | { "Aring", 0x00c5 },
|
---|
83 | { "Atilde", 0x00c3 },
|
---|
84 | { "Auml", 0x00c4 },
|
---|
85 | { "Beta", 0x0392 },
|
---|
86 | { "Ccedil", 0x00c7 },
|
---|
87 | { "Chi", 0x03a7 },
|
---|
88 | { "Dagger", 0x2021 },
|
---|
89 | { "Delta", 0x0394 },
|
---|
90 | { "ETH", 0x00d0 },
|
---|
91 | { "Eacute", 0x00c9 },
|
---|
92 | { "Ecirc", 0x00ca },
|
---|
93 | { "Egrave", 0x00c8 },
|
---|
94 | { "Epsilon", 0x0395 },
|
---|
95 | { "Eta", 0x0397 },
|
---|
96 | { "Euml", 0x00cb },
|
---|
97 | { "GT", 62 },
|
---|
98 | { "Gamma", 0x0393 },
|
---|
99 | { "Iacute", 0x00cd },
|
---|
100 | { "Icirc", 0x00ce },
|
---|
101 | { "Igrave", 0x00cc },
|
---|
102 | { "Iota", 0x0399 },
|
---|
103 | { "Iuml", 0x00cf },
|
---|
104 | { "Kappa", 0x039a },
|
---|
105 | { "LT", 60 },
|
---|
106 | { "Lambda", 0x039b },
|
---|
107 | { "Mu", 0x039c },
|
---|
108 | { "Ntilde", 0x00d1 },
|
---|
109 | { "Nu", 0x039d },
|
---|
110 | { "OElig", 0x0152 },
|
---|
111 | { "Oacute", 0x00d3 },
|
---|
112 | { "Ocirc", 0x00d4 },
|
---|
113 | { "Ograve", 0x00d2 },
|
---|
114 | { "Omega", 0x03a9 },
|
---|
115 | { "Omicron", 0x039f },
|
---|
116 | { "Oslash", 0x00d8 },
|
---|
117 | { "Otilde", 0x00d5 },
|
---|
118 | { "Ouml", 0x00d6 },
|
---|
119 | { "Phi", 0x03a6 },
|
---|
120 | { "Pi", 0x03a0 },
|
---|
121 | { "Prime", 0x2033 },
|
---|
122 | { "Psi", 0x03a8 },
|
---|
123 | { "QUOT", 34 },
|
---|
124 | { "Rho", 0x03a1 },
|
---|
125 | { "Scaron", 0x0160 },
|
---|
126 | { "Sigma", 0x03a3 },
|
---|
127 | { "THORN", 0x00de },
|
---|
128 | { "Tau", 0x03a4 },
|
---|
129 | { "Theta", 0x0398 },
|
---|
130 | { "Uacute", 0x00da },
|
---|
131 | { "Ucirc", 0x00db },
|
---|
132 | { "Ugrave", 0x00d9 },
|
---|
133 | { "Upsilon", 0x03a5 },
|
---|
134 | { "Uuml", 0x00dc },
|
---|
135 | { "Xi", 0x039e },
|
---|
136 | { "Yacute", 0x00dd },
|
---|
137 | { "Yuml", 0x0178 },
|
---|
138 | { "Zeta", 0x0396 },
|
---|
139 | { "aacute", 0x00e1 },
|
---|
140 | { "acirc", 0x00e2 },
|
---|
141 | { "acute", 0x00b4 },
|
---|
142 | { "aelig", 0x00e6 },
|
---|
143 | { "agrave", 0x00e0 },
|
---|
144 | { "alefsym", 0x2135 },
|
---|
145 | { "alpha", 0x03b1 },
|
---|
146 | { "amp", 38 },
|
---|
147 | { "and", 0x22a5 },
|
---|
148 | { "ang", 0x2220 },
|
---|
149 | { "apos", 0x0027 },
|
---|
150 | { "aring", 0x00e5 },
|
---|
151 | { "asymp", 0x2248 },
|
---|
152 | { "atilde", 0x00e3 },
|
---|
153 | { "auml", 0x00e4 },
|
---|
154 | { "bdquo", 0x201e },
|
---|
155 | { "beta", 0x03b2 },
|
---|
156 | { "brvbar", 0x00a6 },
|
---|
157 | { "bull", 0x2022 },
|
---|
158 | { "cap", 0x2229 },
|
---|
159 | { "ccedil", 0x00e7 },
|
---|
160 | { "cedil", 0x00b8 },
|
---|
161 | { "cent", 0x00a2 },
|
---|
162 | { "chi", 0x03c7 },
|
---|
163 | { "circ", 0x02c6 },
|
---|
164 | { "clubs", 0x2663 },
|
---|
165 | { "cong", 0x2245 },
|
---|
166 | { "copy", 0x00a9 },
|
---|
167 | { "crarr", 0x21b5 },
|
---|
168 | { "cup", 0x222a },
|
---|
169 | { "curren", 0x00a4 },
|
---|
170 | { "dArr", 0x21d3 },
|
---|
171 | { "dagger", 0x2020 },
|
---|
172 | { "darr", 0x2193 },
|
---|
173 | { "deg", 0x00b0 },
|
---|
174 | { "delta", 0x03b4 },
|
---|
175 | { "diams", 0x2666 },
|
---|
176 | { "divide", 0x00f7 },
|
---|
177 | { "eacute", 0x00e9 },
|
---|
178 | { "ecirc", 0x00ea },
|
---|
179 | { "egrave", 0x00e8 },
|
---|
180 | { "empty", 0x2205 },
|
---|
181 | { "emsp", 0x2003 },
|
---|
182 | { "ensp", 0x2002 },
|
---|
183 | { "epsilon", 0x03b5 },
|
---|
184 | { "equiv", 0x2261 },
|
---|
185 | { "eta", 0x03b7 },
|
---|
186 | { "eth", 0x00f0 },
|
---|
187 | { "euml", 0x00eb },
|
---|
188 | { "euro", 0x20ac },
|
---|
189 | { "exist", 0x2203 },
|
---|
190 | { "fnof", 0x0192 },
|
---|
191 | { "forall", 0x2200 },
|
---|
192 | { "frac12", 0x00bd },
|
---|
193 | { "frac14", 0x00bc },
|
---|
194 | { "frac34", 0x00be },
|
---|
195 | { "frasl", 0x2044 },
|
---|
196 | { "gamma", 0x03b3 },
|
---|
197 | { "ge", 0x2265 },
|
---|
198 | { "gt", 62 },
|
---|
199 | { "hArr", 0x21d4 },
|
---|
200 | { "harr", 0x2194 },
|
---|
201 | { "hearts", 0x2665 },
|
---|
202 | { "hellip", 0x2026 },
|
---|
203 | { "iacute", 0x00ed },
|
---|
204 | { "icirc", 0x00ee },
|
---|
205 | { "iexcl", 0x00a1 },
|
---|
206 | { "igrave", 0x00ec },
|
---|
207 | { "image", 0x2111 },
|
---|
208 | { "infin", 0x221e },
|
---|
209 | { "int", 0x222b },
|
---|
210 | { "iota", 0x03b9 },
|
---|
211 | { "iquest", 0x00bf },
|
---|
212 | { "isin", 0x2208 },
|
---|
213 | { "iuml", 0x00ef },
|
---|
214 | { "kappa", 0x03ba },
|
---|
215 | { "lArr", 0x21d0 },
|
---|
216 | { "lambda", 0x03bb },
|
---|
217 | { "lang", 0x2329 },
|
---|
218 | { "laquo", 0x00ab },
|
---|
219 | { "larr", 0x2190 },
|
---|
220 | { "lceil", 0x2308 },
|
---|
221 | { "ldquo", 0x201c },
|
---|
222 | { "le", 0x2264 },
|
---|
223 | { "lfloor", 0x230a },
|
---|
224 | { "lowast", 0x2217 },
|
---|
225 | { "loz", 0x25ca },
|
---|
226 | { "lrm", 0x200e },
|
---|
227 | { "lsaquo", 0x2039 },
|
---|
228 | { "lsquo", 0x2018 },
|
---|
229 | { "lt", 60 },
|
---|
230 | { "macr", 0x00af },
|
---|
231 | { "mdash", 0x2014 },
|
---|
232 | { "micro", 0x00b5 },
|
---|
233 | { "middot", 0x00b7 },
|
---|
234 | { "minus", 0x2212 },
|
---|
235 | { "mu", 0x03bc },
|
---|
236 | { "nabla", 0x2207 },
|
---|
237 | { "nbsp", 0x00a0 },
|
---|
238 | { "ndash", 0x2013 },
|
---|
239 | { "ne", 0x2260 },
|
---|
240 | { "ni", 0x220b },
|
---|
241 | { "not", 0x00ac },
|
---|
242 | { "notin", 0x2209 },
|
---|
243 | { "nsub", 0x2284 },
|
---|
244 | { "ntilde", 0x00f1 },
|
---|
245 | { "nu", 0x03bd },
|
---|
246 | { "oacute", 0x00f3 },
|
---|
247 | { "ocirc", 0x00f4 },
|
---|
248 | { "oelig", 0x0153 },
|
---|
249 | { "ograve", 0x00f2 },
|
---|
250 | { "oline", 0x203e },
|
---|
251 | { "omega", 0x03c9 },
|
---|
252 | { "omicron", 0x03bf },
|
---|
253 | { "oplus", 0x2295 },
|
---|
254 | { "or", 0x22a6 },
|
---|
255 | { "ordf", 0x00aa },
|
---|
256 | { "ordm", 0x00ba },
|
---|
257 | { "oslash", 0x00f8 },
|
---|
258 | { "otilde", 0x00f5 },
|
---|
259 | { "otimes", 0x2297 },
|
---|
260 | { "ouml", 0x00f6 },
|
---|
261 | { "para", 0x00b6 },
|
---|
262 | { "part", 0x2202 },
|
---|
263 | { "percnt", 0x0025 },
|
---|
264 | { "permil", 0x2030 },
|
---|
265 | { "perp", 0x22a5 },
|
---|
266 | { "phi", 0x03c6 },
|
---|
267 | { "pi", 0x03c0 },
|
---|
268 | { "piv", 0x03d6 },
|
---|
269 | { "plusmn", 0x00b1 },
|
---|
270 | { "pound", 0x00a3 },
|
---|
271 | { "prime", 0x2032 },
|
---|
272 | { "prod", 0x220f },
|
---|
273 | { "prop", 0x221d },
|
---|
274 | { "psi", 0x03c8 },
|
---|
275 | { "quot", 34 },
|
---|
276 | { "rArr", 0x21d2 },
|
---|
277 | { "radic", 0x221a },
|
---|
278 | { "rang", 0x232a },
|
---|
279 | { "raquo", 0x00bb },
|
---|
280 | { "rarr", 0x2192 },
|
---|
281 | { "rceil", 0x2309 },
|
---|
282 | { "rdquo", 0x201d },
|
---|
283 | { "real", 0x211c },
|
---|
284 | { "reg", 0x00ae },
|
---|
285 | { "rfloor", 0x230b },
|
---|
286 | { "rho", 0x03c1 },
|
---|
287 | { "rlm", 0x200f },
|
---|
288 | { "rsaquo", 0x203a },
|
---|
289 | { "rsquo", 0x2019 },
|
---|
290 | { "sbquo", 0x201a },
|
---|
291 | { "scaron", 0x0161 },
|
---|
292 | { "sdot", 0x22c5 },
|
---|
293 | { "sect", 0x00a7 },
|
---|
294 | { "shy", 0x00ad },
|
---|
295 | { "sigma", 0x03c3 },
|
---|
296 | { "sigmaf", 0x03c2 },
|
---|
297 | { "sim", 0x223c },
|
---|
298 | { "spades", 0x2660 },
|
---|
299 | { "sub", 0x2282 },
|
---|
300 | { "sube", 0x2286 },
|
---|
301 | { "sum", 0x2211 },
|
---|
302 | { "sup", 0x2283 },
|
---|
303 | { "sup1", 0x00b9 },
|
---|
304 | { "sup2", 0x00b2 },
|
---|
305 | { "sup3", 0x00b3 },
|
---|
306 | { "supe", 0x2287 },
|
---|
307 | { "szlig", 0x00df },
|
---|
308 | { "tau", 0x03c4 },
|
---|
309 | { "there4", 0x2234 },
|
---|
310 | { "theta", 0x03b8 },
|
---|
311 | { "thetasym", 0x03d1 },
|
---|
312 | { "thinsp", 0x2009 },
|
---|
313 | { "thorn", 0x00fe },
|
---|
314 | { "tilde", 0x02dc },
|
---|
315 | { "times", 0x00d7 },
|
---|
316 | { "trade", 0x2122 },
|
---|
317 | { "uArr", 0x21d1 },
|
---|
318 | { "uacute", 0x00fa },
|
---|
319 | { "uarr", 0x2191 },
|
---|
320 | { "ucirc", 0x00fb },
|
---|
321 | { "ugrave", 0x00f9 },
|
---|
322 | { "uml", 0x00a8 },
|
---|
323 | { "upsih", 0x03d2 },
|
---|
324 | { "upsilon", 0x03c5 },
|
---|
325 | { "uuml", 0x00fc },
|
---|
326 | { "weierp", 0x2118 },
|
---|
327 | { "xi", 0x03be },
|
---|
328 | { "yacute", 0x00fd },
|
---|
329 | { "yen", 0x00a5 },
|
---|
330 | { "yuml", 0x00ff },
|
---|
331 | { "zeta", 0x03b6 },
|
---|
332 | { "zwj", 0x200d },
|
---|
333 | { "zwnj", 0x200c }
|
---|
334 | };
|
---|
335 |
|
---|
336 | Q_STATIC_GLOBAL_OPERATOR bool operator<(const QString &entityStr, const QTextHtmlEntity &entity)
|
---|
337 | {
|
---|
338 | return entityStr < QLatin1String(entity.name);
|
---|
339 | }
|
---|
340 |
|
---|
341 | Q_STATIC_GLOBAL_OPERATOR bool operator<(const QTextHtmlEntity &entity, const QString &entityStr)
|
---|
342 | {
|
---|
343 | return QLatin1String(entity.name) < entityStr;
|
---|
344 | }
|
---|
345 |
|
---|
346 | static QChar resolveEntity(const QString &entity)
|
---|
347 | {
|
---|
348 | const QTextHtmlEntity *start = &entities[0];
|
---|
349 | const QTextHtmlEntity *end = &entities[(sizeof(entities) / sizeof(entities[0]))];
|
---|
350 | const QTextHtmlEntity *e = qBinaryFind(start, end, entity);
|
---|
351 | if (e == end)
|
---|
352 | return QChar();
|
---|
353 | return e->code;
|
---|
354 | }
|
---|
355 |
|
---|
356 | static const uint latin1Extended[0xA0 - 0x80] = {
|
---|
357 | 0x20ac, // 0x80
|
---|
358 | 0x0081, // 0x81 direct mapping
|
---|
359 | 0x201a, // 0x82
|
---|
360 | 0x0192, // 0x83
|
---|
361 | 0x201e, // 0x84
|
---|
362 | 0x2026, // 0x85
|
---|
363 | 0x2020, // 0x86
|
---|
364 | 0x2021, // 0x87
|
---|
365 | 0x02C6, // 0x88
|
---|
366 | 0x2030, // 0x89
|
---|
367 | 0x0160, // 0x8A
|
---|
368 | 0x2039, // 0x8B
|
---|
369 | 0x0152, // 0x8C
|
---|
370 | 0x008D, // 0x8D direct mapping
|
---|
371 | 0x017D, // 0x8E
|
---|
372 | 0x008F, // 0x8F directmapping
|
---|
373 | 0x0090, // 0x90 directmapping
|
---|
374 | 0x2018, // 0x91
|
---|
375 | 0x2019, // 0x92
|
---|
376 | 0x201C, // 0x93
|
---|
377 | 0X201D, // 0x94
|
---|
378 | 0x2022, // 0x95
|
---|
379 | 0x2013, // 0x96
|
---|
380 | 0x2014, // 0x97
|
---|
381 | 0x02DC, // 0x98
|
---|
382 | 0x2122, // 0x99
|
---|
383 | 0x0161, // 0x9A
|
---|
384 | 0x203A, // 0x9B
|
---|
385 | 0x0153, // 0x9C
|
---|
386 | 0x009D, // 0x9D direct mapping
|
---|
387 | 0x017E, // 0x9E
|
---|
388 | 0x0178 // 0x9F
|
---|
389 | };
|
---|
390 | // end taken from qtexthtmlparser
|
---|
391 |
|
---|
392 | class DocumentHelper
|
---|
393 | {
|
---|
394 | public:
|
---|
395 | DocumentHelper(const QString &fileName, const QByteArray &data)
|
---|
396 | : fileName(fileName) , data(readData(data)) {}
|
---|
397 | ~DocumentHelper() {}
|
---|
398 |
|
---|
399 | bool addFieldsToDocument(QCLuceneDocument *document,
|
---|
400 | const QString &namespaceName, const QString &attributes = QString())
|
---|
401 | {
|
---|
402 | if (!document)
|
---|
403 | return false;
|
---|
404 |
|
---|
405 | if(!data.isEmpty()) {
|
---|
406 | QString parsedData = parseData();
|
---|
407 | QString parsedTitle = QHelpGlobal::documentTitle(data);
|
---|
408 |
|
---|
409 | if(!parsedData.isEmpty()) {
|
---|
410 | document->add(new QCLuceneField(QLatin1String("content"),
|
---|
411 | parsedData,QCLuceneField::INDEX_TOKENIZED));
|
---|
412 | document->add(new QCLuceneField(QLatin1String("path"), fileName,
|
---|
413 | QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED));
|
---|
414 | document->add(new QCLuceneField(QLatin1String("title"), parsedTitle,
|
---|
415 | QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED));
|
---|
416 | document->add(new QCLuceneField(QLatin1String("titleTokenized"), parsedTitle,
|
---|
417 | QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED));
|
---|
418 | document->add(new QCLuceneField(QLatin1String("namespace"), namespaceName,
|
---|
419 | QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED));
|
---|
420 | document->add(new QCLuceneField(QLatin1String("attribute"), attributes,
|
---|
421 | QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED));
|
---|
422 | return true;
|
---|
423 | }
|
---|
424 | }
|
---|
425 |
|
---|
426 | return false;
|
---|
427 | }
|
---|
428 |
|
---|
429 | private:
|
---|
430 | QString readData(const QByteArray &data)
|
---|
431 | {
|
---|
432 | QTextStream textStream(data);
|
---|
433 | const QByteArray &codec = QHelpGlobal::codecFromData(data).toLatin1();
|
---|
434 | textStream.setCodec(QTextCodec::codecForName(codec.constData()));
|
---|
435 |
|
---|
436 | QString stream = textStream.readAll();
|
---|
437 | if (stream.isNull() || stream.isEmpty())
|
---|
438 | return QString();
|
---|
439 |
|
---|
440 | return stream;
|
---|
441 | }
|
---|
442 |
|
---|
443 | QString parseData() const
|
---|
444 | {
|
---|
445 | const int length = data.length();
|
---|
446 | const QChar *buf = data.unicode();
|
---|
447 |
|
---|
448 | QString parsedContent;
|
---|
449 | parsedContent.reserve(length);
|
---|
450 |
|
---|
451 | bool valid = true;
|
---|
452 | int j = 0, count = 0;
|
---|
453 |
|
---|
454 | QChar c;
|
---|
455 | while (j < length) {
|
---|
456 | c = buf[j++];
|
---|
457 | if (c == QLatin1Char('<') || c == QLatin1Char('&')) {
|
---|
458 | if (count > 1 && c != QLatin1Char('&'))
|
---|
459 | parsedContent.append(QLatin1Char(' '));
|
---|
460 | else if (c == QLatin1Char('&')) {
|
---|
461 | // Note: this will modify the counter j, in case we sucessful parsed the entity
|
---|
462 | // we will have modified the counter to stay 1 before the closing ';', so
|
---|
463 | // the following if condition will be met with if (c == QLatin1Char(';'))
|
---|
464 | parsedContent.append(parseEntity(length, buf, j));
|
---|
465 | }
|
---|
466 |
|
---|
467 | count = 0;
|
---|
468 | valid = false;
|
---|
469 | continue;
|
---|
470 | }
|
---|
471 | if ((c == QLatin1Char('>') || c == QLatin1Char(';')) && !valid) {
|
---|
472 | valid = true;
|
---|
473 | continue;
|
---|
474 | }
|
---|
475 | if (!valid)
|
---|
476 | continue;
|
---|
477 |
|
---|
478 | if (c.isLetterOrNumber() || c.isPrint()) {
|
---|
479 | ++count;
|
---|
480 | parsedContent.append(c.toLower());
|
---|
481 | } else {
|
---|
482 | if (count > 1)
|
---|
483 | parsedContent.append(QLatin1Char(' '));
|
---|
484 | count = 0;
|
---|
485 | }
|
---|
486 | }
|
---|
487 |
|
---|
488 | return parsedContent;
|
---|
489 | }
|
---|
490 |
|
---|
491 | // taken from qtexthtmlparser
|
---|
492 | // parses an entity after "&", and returns it
|
---|
493 | QString parseEntity(int len, const QChar *buf, int &pos) const
|
---|
494 | {
|
---|
495 | int recover = pos;
|
---|
496 | QString entity;
|
---|
497 | while (pos < len) {
|
---|
498 | QChar c = buf[pos++];
|
---|
499 | if (c.isSpace() || pos - recover > 9) {
|
---|
500 | goto error;
|
---|
501 | }
|
---|
502 | if (c == QLatin1Char(';')) {
|
---|
503 | pos--;
|
---|
504 | break;
|
---|
505 | }
|
---|
506 | entity += c;
|
---|
507 | }
|
---|
508 | {
|
---|
509 | QChar resolved = resolveEntity(entity);
|
---|
510 | if (!resolved.isNull())
|
---|
511 | return QString(resolved);
|
---|
512 | }
|
---|
513 | if (entity.length() > 1 && entity.at(0) == QLatin1Char('#')) {
|
---|
514 | entity.remove(0, 1); // removing leading #
|
---|
515 |
|
---|
516 | int base = 10;
|
---|
517 | bool ok = false;
|
---|
518 |
|
---|
519 | if (entity.at(0).toLower() == QLatin1Char('x')) { // hex entity?
|
---|
520 | entity.remove(0, 1);
|
---|
521 | base = 16;
|
---|
522 | }
|
---|
523 |
|
---|
524 | uint uc = entity.toUInt(&ok, base);
|
---|
525 | if (ok) {
|
---|
526 | if (uc >= 0x80 && uc < 0x80 + (sizeof(latin1Extended) / sizeof(latin1Extended[0])))
|
---|
527 | uc = latin1Extended[uc - 0x80]; // windows latin 1 extended
|
---|
528 | QString str;
|
---|
529 | if (uc > 0xffff) {
|
---|
530 | // surrogate pair
|
---|
531 | uc -= 0x10000;
|
---|
532 | ushort high = uc/0x400 + 0xd800;
|
---|
533 | ushort low = uc%0x400 + 0xdc00;
|
---|
534 | str.append(QChar(high));
|
---|
535 | str.append(QChar(low));
|
---|
536 | } else {
|
---|
537 | str.append(QChar(uc));
|
---|
538 | }
|
---|
539 | return str;
|
---|
540 | }
|
---|
541 | }
|
---|
542 | error:
|
---|
543 | pos = recover;
|
---|
544 | return QLatin1String(" ");
|
---|
545 | }
|
---|
546 | // end taken from qtexthtmlparser
|
---|
547 |
|
---|
548 | private:
|
---|
549 | QString fileName;
|
---|
550 | QString data;
|
---|
551 | };
|
---|
552 |
|
---|
553 |
|
---|
554 | QHelpSearchIndexWriter::QHelpSearchIndexWriter()
|
---|
555 | : QThread(0)
|
---|
556 | , m_cancel(false)
|
---|
557 | {
|
---|
558 | // nothing todo
|
---|
559 | }
|
---|
560 |
|
---|
561 | QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
|
---|
562 | {
|
---|
563 | mutex.lock();
|
---|
564 | this->m_cancel = true;
|
---|
565 | waitCondition.wakeOne();
|
---|
566 | mutex.unlock();
|
---|
567 |
|
---|
568 | wait();
|
---|
569 | }
|
---|
570 |
|
---|
571 | void QHelpSearchIndexWriter::cancelIndexing()
|
---|
572 | {
|
---|
573 | mutex.lock();
|
---|
574 | this->m_cancel = true;
|
---|
575 | mutex.unlock();
|
---|
576 | }
|
---|
577 |
|
---|
578 | void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
|
---|
579 | const QString &indexFilesFolder, bool reindex)
|
---|
580 | {
|
---|
581 | wait();
|
---|
582 | mutex.lock();
|
---|
583 | this->m_cancel = false;
|
---|
584 | this->m_reindex = reindex;
|
---|
585 | this->m_collectionFile = collectionFile;
|
---|
586 | this->m_indexFilesFolder = indexFilesFolder;
|
---|
587 | mutex.unlock();
|
---|
588 |
|
---|
589 | start(QThread::NormalPriority);
|
---|
590 | }
|
---|
591 |
|
---|
592 | void QHelpSearchIndexWriter::optimizeIndex()
|
---|
593 | {
|
---|
594 | #if !defined(QT_NO_EXCEPTIONS)
|
---|
595 | try {
|
---|
596 | #endif
|
---|
597 | if (QCLuceneIndexReader::indexExists(m_indexFilesFolder)) {
|
---|
598 | if (QCLuceneIndexReader::isLocked(m_indexFilesFolder))
|
---|
599 | return;
|
---|
600 |
|
---|
601 | QCLuceneStandardAnalyzer analyzer;
|
---|
602 | QCLuceneIndexWriter writer(m_indexFilesFolder, analyzer, false);
|
---|
603 | writer.optimize();
|
---|
604 | writer.close();
|
---|
605 | }
|
---|
606 | #if !defined(QT_NO_EXCEPTIONS)
|
---|
607 | } catch (...) {
|
---|
608 | qWarning("Full Text Search, could not optimize index.");
|
---|
609 | return;
|
---|
610 | }
|
---|
611 | #endif
|
---|
612 | }
|
---|
613 |
|
---|
614 | void QHelpSearchIndexWriter::run()
|
---|
615 | {
|
---|
616 | QMutexLocker mutexLocker(&mutex);
|
---|
617 |
|
---|
618 | if (m_cancel)
|
---|
619 | return;
|
---|
620 |
|
---|
621 | const bool reindex = this->m_reindex;
|
---|
622 | const QString collectionFile(this->m_collectionFile);
|
---|
623 |
|
---|
624 | mutexLocker.unlock();
|
---|
625 |
|
---|
626 | QHelpEngineCore engine(collectionFile, 0);
|
---|
627 | if (!engine.setupData())
|
---|
628 | return;
|
---|
629 |
|
---|
630 | const QLatin1String key("CluceneIndexedNamespaces");
|
---|
631 | if (reindex)
|
---|
632 | engine.setCustomValue(key, QLatin1String(""));
|
---|
633 |
|
---|
634 | QMap<QString, QDateTime> indexMap;
|
---|
635 | const QLatin1String oldKey("CluceneSearchNamespaces");
|
---|
636 | if (!engine.customValue(oldKey, QString()).isNull()) {
|
---|
637 | // old style qhc file < 4.4.2, need to convert...
|
---|
638 | const QStringList indexedNamespaces = engine.customValue(oldKey).
|
---|
639 | toString().split(QLatin1String("|"), QString::SkipEmptyParts);
|
---|
640 | foreach (const QString &nameSpace, indexedNamespaces)
|
---|
641 | indexMap.insert(nameSpace, QDateTime());
|
---|
642 | engine.removeCustomValue(oldKey);
|
---|
643 | } else {
|
---|
644 | QDataStream dataStream(engine.customValue(key).toByteArray());
|
---|
645 | dataStream >> indexMap;
|
---|
646 | }
|
---|
647 |
|
---|
648 | QString indexPath = m_indexFilesFolder;
|
---|
649 |
|
---|
650 | QFileInfo fInfo(indexPath);
|
---|
651 | if (fInfo.exists() && !fInfo.isWritable()) {
|
---|
652 | qWarning("Full Text Search, could not create index (missing permissions for '%s').", qPrintable(indexPath));
|
---|
653 | return;
|
---|
654 | }
|
---|
655 |
|
---|
656 | emit indexingStarted();
|
---|
657 |
|
---|
658 | QCLuceneIndexWriter *writer = 0;
|
---|
659 | QCLuceneStandardAnalyzer analyzer;
|
---|
660 | const QStringList registeredDocs = engine.registeredDocumentations();
|
---|
661 |
|
---|
662 | QLocalSocket localSocket;
|
---|
663 | localSocket.connectToServer(QString(QLatin1String("QtAssistant%1"))
|
---|
664 | .arg(QLatin1String(QT_VERSION_STR)));
|
---|
665 |
|
---|
666 | QLocalServer localServer;
|
---|
667 | bool otherInstancesRunning = true;
|
---|
668 | if (!localSocket.waitForConnected()) {
|
---|
669 | otherInstancesRunning = false;
|
---|
670 | localServer.listen(QString(QLatin1String("QtAssistant%1"))
|
---|
671 | .arg(QLatin1String(QT_VERSION_STR)));
|
---|
672 | }
|
---|
673 |
|
---|
674 | #if !defined(QT_NO_EXCEPTIONS)
|
---|
675 | try {
|
---|
676 | #endif
|
---|
677 | // check if it's locked, and if the other instance is running
|
---|
678 | if (!otherInstancesRunning && QCLuceneIndexReader::isLocked(indexPath))
|
---|
679 | QCLuceneIndexReader::unlock(indexPath);
|
---|
680 |
|
---|
681 | if (QCLuceneIndexReader::isLocked(indexPath)) {
|
---|
682 | // poll unless indexing finished to fake progress
|
---|
683 | while (QCLuceneIndexReader::isLocked(indexPath)) {
|
---|
684 | mutexLocker.relock();
|
---|
685 | if (m_cancel)
|
---|
686 | break;
|
---|
687 | mutexLocker.unlock();
|
---|
688 | this->sleep(1);
|
---|
689 | }
|
---|
690 | emit indexingFinished();
|
---|
691 | return;
|
---|
692 | }
|
---|
693 |
|
---|
694 | if (QCLuceneIndexReader::indexExists(indexPath) && !reindex) {
|
---|
695 | foreach(const QString &namespaceName, registeredDocs) {
|
---|
696 | mutexLocker.relock();
|
---|
697 | if (m_cancel) {
|
---|
698 | emit indexingFinished();
|
---|
699 | return;
|
---|
700 | }
|
---|
701 | mutexLocker.unlock();
|
---|
702 |
|
---|
703 | if (!indexMap.contains(namespaceName)) {
|
---|
704 | // make sure we remove some partly indexed stuff
|
---|
705 | removeDocuments(indexPath, namespaceName);
|
---|
706 | } else {
|
---|
707 | QString path = engine.documentationFileName(namespaceName);
|
---|
708 | if (indexMap.value(namespaceName) < QFileInfo(path).lastModified()) {
|
---|
709 | // make sure we remove some outdated indexed stuff
|
---|
710 | indexMap.remove(namespaceName);
|
---|
711 | removeDocuments(indexPath, namespaceName);
|
---|
712 | }
|
---|
713 |
|
---|
714 | if (indexMap.contains(namespaceName)) {
|
---|
715 | // make sure we really have content indexed for namespace
|
---|
716 | // NOTE: Extra variable just for GCC 3.3.5
|
---|
717 | QLatin1String key("namespace");
|
---|
718 | QCLuceneTermQuery query(QCLuceneTerm(key, namespaceName));
|
---|
719 | QCLuceneIndexSearcher indexSearcher(indexPath);
|
---|
720 | QCLuceneHits hits = indexSearcher.search(query);
|
---|
721 | if (hits.length() <= 0)
|
---|
722 | indexMap.remove(namespaceName);
|
---|
723 | }
|
---|
724 | }
|
---|
725 | }
|
---|
726 | writer = new QCLuceneIndexWriter(indexPath, analyzer, false);
|
---|
727 | } else {
|
---|
728 | indexMap.clear();
|
---|
729 | writer = new QCLuceneIndexWriter(indexPath, analyzer, true);
|
---|
730 | }
|
---|
731 | #if !defined(QT_NO_EXCEPTIONS)
|
---|
732 | } catch (...) {
|
---|
733 | qWarning("Full Text Search, could not create index writer in '%s'.",
|
---|
734 | qPrintable(indexPath));
|
---|
735 | return;
|
---|
736 | }
|
---|
737 | #endif
|
---|
738 |
|
---|
739 | #if !defined(QT_NO_EXCEPTIONS)
|
---|
740 | try {
|
---|
741 | #endif
|
---|
742 | writer->setMergeFactor(100);
|
---|
743 | writer->setMinMergeDocs(1000);
|
---|
744 | writer->setMaxFieldLength(QCLuceneIndexWriter::DEFAULT_MAX_FIELD_LENGTH);
|
---|
745 | #if !defined(QT_NO_EXCEPTIONS)
|
---|
746 | } catch (...) {
|
---|
747 | qWarning("Full Text Search, could not set writer properties.");
|
---|
748 | return;
|
---|
749 | }
|
---|
750 | #endif
|
---|
751 |
|
---|
752 | QStringList namespaces;
|
---|
753 | foreach(const QString &namespaceName, registeredDocs) {
|
---|
754 | mutexLocker.relock();
|
---|
755 | if (m_cancel) {
|
---|
756 | closeIndexWriter(writer);
|
---|
757 | emit indexingFinished();
|
---|
758 | return;
|
---|
759 | }
|
---|
760 | mutexLocker.unlock();
|
---|
761 |
|
---|
762 | namespaces.append(namespaceName);
|
---|
763 | if (indexMap.contains(namespaceName))
|
---|
764 | continue;
|
---|
765 |
|
---|
766 | const QList<QStringList> attributeSets =
|
---|
767 | engine.filterAttributeSets(namespaceName);
|
---|
768 |
|
---|
769 | if (attributeSets.isEmpty()) {
|
---|
770 | const QList<QUrl> docFiles = indexableFiles(&engine, namespaceName,
|
---|
771 | QStringList());
|
---|
772 | if (!addDocuments(docFiles, engine, QStringList(), namespaceName,
|
---|
773 | writer, analyzer))
|
---|
774 | break;
|
---|
775 | } else {
|
---|
776 | bool bail = false;
|
---|
777 | foreach (const QStringList &attributes, attributeSets) {
|
---|
778 | const QList<QUrl> docFiles = indexableFiles(&engine,
|
---|
779 | namespaceName, attributes);
|
---|
780 | if (!addDocuments(docFiles, engine, attributes, namespaceName,
|
---|
781 | writer, analyzer)) {
|
---|
782 | bail = true;
|
---|
783 | break;
|
---|
784 | }
|
---|
785 | }
|
---|
786 | if (bail)
|
---|
787 | break;
|
---|
788 | }
|
---|
789 |
|
---|
790 | mutexLocker.relock();
|
---|
791 | if (!m_cancel) {
|
---|
792 | QString path(engine.documentationFileName(namespaceName));
|
---|
793 | indexMap.insert(namespaceName, QFileInfo(path).lastModified());
|
---|
794 | writeIndexMap(engine, indexMap);
|
---|
795 | }
|
---|
796 | mutexLocker.unlock();
|
---|
797 | }
|
---|
798 |
|
---|
799 | closeIndexWriter(writer);
|
---|
800 |
|
---|
801 | mutexLocker.relock();
|
---|
802 | if (!m_cancel) {
|
---|
803 | mutexLocker.unlock();
|
---|
804 |
|
---|
805 | QStringList indexedNamespaces = indexMap.keys();
|
---|
806 | foreach(const QString &namespaceName, indexedNamespaces) {
|
---|
807 | mutexLocker.relock();
|
---|
808 | if (m_cancel)
|
---|
809 | break;
|
---|
810 | mutexLocker.unlock();
|
---|
811 |
|
---|
812 | if (!namespaces.contains(namespaceName)) {
|
---|
813 | indexMap.remove(namespaceName);
|
---|
814 | writeIndexMap(engine, indexMap);
|
---|
815 | removeDocuments(indexPath, namespaceName);
|
---|
816 | }
|
---|
817 | }
|
---|
818 | }
|
---|
819 | emit indexingFinished();
|
---|
820 | }
|
---|
821 |
|
---|
822 | bool QHelpSearchIndexWriter::addDocuments(const QList<QUrl> docFiles,
|
---|
823 | const QHelpEngineCore &engine, const QStringList &attributes,
|
---|
824 | const QString &namespaceName, QCLuceneIndexWriter *writer,
|
---|
825 | QCLuceneAnalyzer &analyzer)
|
---|
826 | {
|
---|
827 | QMutexLocker locker(&mutex);
|
---|
828 | const QString attrList = attributes.join(QLatin1String(" "));
|
---|
829 |
|
---|
830 | locker.unlock();
|
---|
831 | foreach(const QUrl &url, docFiles) {
|
---|
832 | QCLuceneDocument document;
|
---|
833 | DocumentHelper helper(url.toString(), engine.fileData(url));
|
---|
834 | if (helper.addFieldsToDocument(&document, namespaceName, attrList)) {
|
---|
835 | #if !defined(QT_NO_EXCEPTIONS)
|
---|
836 | try {
|
---|
837 | #endif
|
---|
838 | writer->addDocument(document, analyzer);
|
---|
839 | #if !defined(QT_NO_EXCEPTIONS)
|
---|
840 | } catch (...) {
|
---|
841 | qWarning("Full Text Search, could not properly add documents.");
|
---|
842 | return false;
|
---|
843 | }
|
---|
844 | #endif
|
---|
845 | }
|
---|
846 | locker.relock();
|
---|
847 | if (m_cancel)
|
---|
848 | return false;
|
---|
849 | locker.unlock();
|
---|
850 | }
|
---|
851 | return true;
|
---|
852 | }
|
---|
853 |
|
---|
854 | void QHelpSearchIndexWriter::removeDocuments(const QString &indexPath,
|
---|
855 | const QString &namespaceName)
|
---|
856 | {
|
---|
857 | if (namespaceName.isEmpty() || QCLuceneIndexReader::isLocked(indexPath))
|
---|
858 | return;
|
---|
859 |
|
---|
860 | QCLuceneIndexReader reader = QCLuceneIndexReader::open(indexPath);
|
---|
861 | reader.deleteDocuments(QCLuceneTerm(QLatin1String("namespace"),
|
---|
862 | namespaceName));
|
---|
863 |
|
---|
864 | reader.close();
|
---|
865 | }
|
---|
866 |
|
---|
867 | bool QHelpSearchIndexWriter::writeIndexMap(QHelpEngineCore &engine,
|
---|
868 | const QMap<QString, QDateTime> &indexMap)
|
---|
869 | {
|
---|
870 | QByteArray bArray;
|
---|
871 |
|
---|
872 | QDataStream data(&bArray, QIODevice::ReadWrite);
|
---|
873 | data << indexMap;
|
---|
874 |
|
---|
875 | return engine.setCustomValue(QLatin1String("CluceneIndexedNamespaces"),
|
---|
876 | bArray);
|
---|
877 | }
|
---|
878 |
|
---|
879 | QList<QUrl> QHelpSearchIndexWriter::indexableFiles(QHelpEngineCore *helpEngine,
|
---|
880 | const QString &namespaceName, const QStringList &attributes) const
|
---|
881 | {
|
---|
882 | QList<QUrl> docFiles = helpEngine->files(namespaceName, attributes,
|
---|
883 | QLatin1String("html"));
|
---|
884 | docFiles += helpEngine->files(namespaceName, attributes, QLatin1String("htm"));
|
---|
885 | docFiles += helpEngine->files(namespaceName, attributes, QLatin1String("txt"));
|
---|
886 |
|
---|
887 | return docFiles;
|
---|
888 | }
|
---|
889 |
|
---|
890 | void QHelpSearchIndexWriter::closeIndexWriter(QCLuceneIndexWriter *writer)
|
---|
891 | {
|
---|
892 | #if !defined(QT_NO_EXCEPTIONS)
|
---|
893 | try {
|
---|
894 | #endif
|
---|
895 | writer->close();
|
---|
896 | delete writer;
|
---|
897 | #if !defined(QT_NO_EXCEPTIONS)
|
---|
898 | } catch (...) {
|
---|
899 | qWarning("Full Text Search, could not properly close index writer.");
|
---|
900 | }
|
---|
901 | #endif
|
---|
902 | }
|
---|
903 |
|
---|
904 | } // namespace clucene
|
---|
905 | } // namespace fulltextsearch
|
---|
906 | } // namespace qt
|
---|
907 |
|
---|
908 | QT_END_NAMESPACE
|
---|