source: trunk/tools/assistant/lib/qhelpsearchindexwriter_clucene.cpp@ 651

Last change on this file since 651 was 651, checked in by Dmitry A. Kuminov, 15 years ago

trunk: Merged in qt 4.6.2 sources.

File size: 26.5 KB
RevLine 
[2]1/****************************************************************************
2**
[651]3** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
[561]4** All rights reserved.
5** Contact: Nokia Corporation ([email protected])
[2]6**
7** This file is part of the Qt Assistant of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial Usage
11** Licensees holding valid Qt Commercial licenses may use this file in
12** accordance with the Qt Commercial License Agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and Nokia.
15**
16** GNU Lesser General Public License Usage
17** Alternatively, this file may be used under the terms of the GNU Lesser
18** General Public License version 2.1 as published by the Free Software
19** Foundation and appearing in the file LICENSE.LGPL included in the
20** packaging of this file. Please review the following information to
21** ensure the GNU Lesser General Public License version 2.1 requirements
22** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23**
[561]24** In addition, as a special exception, Nokia gives you certain additional
25** rights. These rights are described in the Nokia Qt LGPL Exception
26** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
[2]27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
[561]36** If you have questions regarding the use of this file, please contact
37** Nokia at [email protected].
[2]38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "qhelpenginecore.h"
43#include "qhelp_global.h"
44#include "fulltextsearch/qhits_p.h"
45#include "fulltextsearch/qquery_p.h"
46#include "fulltextsearch/qanalyzer_p.h"
47#include "fulltextsearch/qdocument_p.h"
48#include "fulltextsearch/qsearchable_p.h"
49#include "fulltextsearch/qindexreader_p.h"
50#include "fulltextsearch/qindexwriter_p.h"
51#include "qhelpsearchindexwriter_clucene_p.h"
52
53#include <QtCore/QDir>
54#include <QtCore/QString>
55#include <QtCore/QFileInfo>
56#include <QtCore/QTextCodec>
57#include <QtCore/QTextStream>
58
59#include <QtNetwork/QLocalSocket>
60#include <QtNetwork/QLocalServer>
61
62#include "private/qfunctions_p.h"
63
64QT_BEGIN_NAMESPACE
65
66namespace qt {
67 namespace fulltextsearch {
68 namespace clucene {
69
70// taken from qtexthtmlparser
71static const struct QTextHtmlEntity
72{
73 const char *name;
74 quint16 code;
75} entities[] = {
76 { "AElig", 0x00c6 },
77 { "AMP", 38 },
78 { "Aacute", 0x00c1 },
79 { "Acirc", 0x00c2 },
80 { "Agrave", 0x00c0 },
81 { "Alpha", 0x0391 },
82 { "Aring", 0x00c5 },
83 { "Atilde", 0x00c3 },
84 { "Auml", 0x00c4 },
85 { "Beta", 0x0392 },
86 { "Ccedil", 0x00c7 },
87 { "Chi", 0x03a7 },
88 { "Dagger", 0x2021 },
89 { "Delta", 0x0394 },
90 { "ETH", 0x00d0 },
91 { "Eacute", 0x00c9 },
92 { "Ecirc", 0x00ca },
93 { "Egrave", 0x00c8 },
94 { "Epsilon", 0x0395 },
95 { "Eta", 0x0397 },
96 { "Euml", 0x00cb },
97 { "GT", 62 },
98 { "Gamma", 0x0393 },
99 { "Iacute", 0x00cd },
100 { "Icirc", 0x00ce },
101 { "Igrave", 0x00cc },
102 { "Iota", 0x0399 },
103 { "Iuml", 0x00cf },
104 { "Kappa", 0x039a },
105 { "LT", 60 },
106 { "Lambda", 0x039b },
107 { "Mu", 0x039c },
108 { "Ntilde", 0x00d1 },
109 { "Nu", 0x039d },
110 { "OElig", 0x0152 },
111 { "Oacute", 0x00d3 },
112 { "Ocirc", 0x00d4 },
113 { "Ograve", 0x00d2 },
114 { "Omega", 0x03a9 },
115 { "Omicron", 0x039f },
116 { "Oslash", 0x00d8 },
117 { "Otilde", 0x00d5 },
118 { "Ouml", 0x00d6 },
119 { "Phi", 0x03a6 },
120 { "Pi", 0x03a0 },
121 { "Prime", 0x2033 },
122 { "Psi", 0x03a8 },
123 { "QUOT", 34 },
124 { "Rho", 0x03a1 },
125 { "Scaron", 0x0160 },
126 { "Sigma", 0x03a3 },
127 { "THORN", 0x00de },
128 { "Tau", 0x03a4 },
129 { "Theta", 0x0398 },
130 { "Uacute", 0x00da },
131 { "Ucirc", 0x00db },
132 { "Ugrave", 0x00d9 },
133 { "Upsilon", 0x03a5 },
134 { "Uuml", 0x00dc },
135 { "Xi", 0x039e },
136 { "Yacute", 0x00dd },
137 { "Yuml", 0x0178 },
138 { "Zeta", 0x0396 },
139 { "aacute", 0x00e1 },
140 { "acirc", 0x00e2 },
141 { "acute", 0x00b4 },
142 { "aelig", 0x00e6 },
143 { "agrave", 0x00e0 },
144 { "alefsym", 0x2135 },
145 { "alpha", 0x03b1 },
146 { "amp", 38 },
147 { "and", 0x22a5 },
148 { "ang", 0x2220 },
149 { "apos", 0x0027 },
150 { "aring", 0x00e5 },
151 { "asymp", 0x2248 },
152 { "atilde", 0x00e3 },
153 { "auml", 0x00e4 },
154 { "bdquo", 0x201e },
155 { "beta", 0x03b2 },
156 { "brvbar", 0x00a6 },
157 { "bull", 0x2022 },
158 { "cap", 0x2229 },
159 { "ccedil", 0x00e7 },
160 { "cedil", 0x00b8 },
161 { "cent", 0x00a2 },
162 { "chi", 0x03c7 },
163 { "circ", 0x02c6 },
164 { "clubs", 0x2663 },
165 { "cong", 0x2245 },
166 { "copy", 0x00a9 },
167 { "crarr", 0x21b5 },
168 { "cup", 0x222a },
169 { "curren", 0x00a4 },
170 { "dArr", 0x21d3 },
171 { "dagger", 0x2020 },
172 { "darr", 0x2193 },
173 { "deg", 0x00b0 },
174 { "delta", 0x03b4 },
175 { "diams", 0x2666 },
176 { "divide", 0x00f7 },
177 { "eacute", 0x00e9 },
178 { "ecirc", 0x00ea },
179 { "egrave", 0x00e8 },
180 { "empty", 0x2205 },
181 { "emsp", 0x2003 },
182 { "ensp", 0x2002 },
183 { "epsilon", 0x03b5 },
184 { "equiv", 0x2261 },
185 { "eta", 0x03b7 },
186 { "eth", 0x00f0 },
187 { "euml", 0x00eb },
188 { "euro", 0x20ac },
189 { "exist", 0x2203 },
190 { "fnof", 0x0192 },
191 { "forall", 0x2200 },
192 { "frac12", 0x00bd },
193 { "frac14", 0x00bc },
194 { "frac34", 0x00be },
195 { "frasl", 0x2044 },
196 { "gamma", 0x03b3 },
197 { "ge", 0x2265 },
198 { "gt", 62 },
199 { "hArr", 0x21d4 },
200 { "harr", 0x2194 },
201 { "hearts", 0x2665 },
202 { "hellip", 0x2026 },
203 { "iacute", 0x00ed },
204 { "icirc", 0x00ee },
205 { "iexcl", 0x00a1 },
206 { "igrave", 0x00ec },
207 { "image", 0x2111 },
208 { "infin", 0x221e },
209 { "int", 0x222b },
210 { "iota", 0x03b9 },
211 { "iquest", 0x00bf },
212 { "isin", 0x2208 },
213 { "iuml", 0x00ef },
214 { "kappa", 0x03ba },
215 { "lArr", 0x21d0 },
216 { "lambda", 0x03bb },
217 { "lang", 0x2329 },
218 { "laquo", 0x00ab },
219 { "larr", 0x2190 },
220 { "lceil", 0x2308 },
221 { "ldquo", 0x201c },
222 { "le", 0x2264 },
223 { "lfloor", 0x230a },
224 { "lowast", 0x2217 },
225 { "loz", 0x25ca },
226 { "lrm", 0x200e },
227 { "lsaquo", 0x2039 },
228 { "lsquo", 0x2018 },
229 { "lt", 60 },
230 { "macr", 0x00af },
231 { "mdash", 0x2014 },
232 { "micro", 0x00b5 },
233 { "middot", 0x00b7 },
234 { "minus", 0x2212 },
235 { "mu", 0x03bc },
236 { "nabla", 0x2207 },
237 { "nbsp", 0x00a0 },
238 { "ndash", 0x2013 },
239 { "ne", 0x2260 },
240 { "ni", 0x220b },
241 { "not", 0x00ac },
242 { "notin", 0x2209 },
243 { "nsub", 0x2284 },
244 { "ntilde", 0x00f1 },
245 { "nu", 0x03bd },
246 { "oacute", 0x00f3 },
247 { "ocirc", 0x00f4 },
248 { "oelig", 0x0153 },
249 { "ograve", 0x00f2 },
250 { "oline", 0x203e },
251 { "omega", 0x03c9 },
252 { "omicron", 0x03bf },
253 { "oplus", 0x2295 },
254 { "or", 0x22a6 },
255 { "ordf", 0x00aa },
256 { "ordm", 0x00ba },
257 { "oslash", 0x00f8 },
258 { "otilde", 0x00f5 },
259 { "otimes", 0x2297 },
260 { "ouml", 0x00f6 },
261 { "para", 0x00b6 },
262 { "part", 0x2202 },
263 { "percnt", 0x0025 },
264 { "permil", 0x2030 },
265 { "perp", 0x22a5 },
266 { "phi", 0x03c6 },
267 { "pi", 0x03c0 },
268 { "piv", 0x03d6 },
269 { "plusmn", 0x00b1 },
270 { "pound", 0x00a3 },
271 { "prime", 0x2032 },
272 { "prod", 0x220f },
273 { "prop", 0x221d },
274 { "psi", 0x03c8 },
275 { "quot", 34 },
276 { "rArr", 0x21d2 },
277 { "radic", 0x221a },
278 { "rang", 0x232a },
279 { "raquo", 0x00bb },
280 { "rarr", 0x2192 },
281 { "rceil", 0x2309 },
282 { "rdquo", 0x201d },
283 { "real", 0x211c },
284 { "reg", 0x00ae },
285 { "rfloor", 0x230b },
286 { "rho", 0x03c1 },
287 { "rlm", 0x200f },
288 { "rsaquo", 0x203a },
289 { "rsquo", 0x2019 },
290 { "sbquo", 0x201a },
291 { "scaron", 0x0161 },
292 { "sdot", 0x22c5 },
293 { "sect", 0x00a7 },
294 { "shy", 0x00ad },
295 { "sigma", 0x03c3 },
296 { "sigmaf", 0x03c2 },
297 { "sim", 0x223c },
298 { "spades", 0x2660 },
299 { "sub", 0x2282 },
300 { "sube", 0x2286 },
301 { "sum", 0x2211 },
302 { "sup", 0x2283 },
303 { "sup1", 0x00b9 },
304 { "sup2", 0x00b2 },
305 { "sup3", 0x00b3 },
306 { "supe", 0x2287 },
307 { "szlig", 0x00df },
308 { "tau", 0x03c4 },
309 { "there4", 0x2234 },
310 { "theta", 0x03b8 },
311 { "thetasym", 0x03d1 },
312 { "thinsp", 0x2009 },
313 { "thorn", 0x00fe },
314 { "tilde", 0x02dc },
315 { "times", 0x00d7 },
316 { "trade", 0x2122 },
317 { "uArr", 0x21d1 },
318 { "uacute", 0x00fa },
319 { "uarr", 0x2191 },
320 { "ucirc", 0x00fb },
321 { "ugrave", 0x00f9 },
322 { "uml", 0x00a8 },
323 { "upsih", 0x03d2 },
324 { "upsilon", 0x03c5 },
325 { "uuml", 0x00fc },
326 { "weierp", 0x2118 },
327 { "xi", 0x03be },
328 { "yacute", 0x00fd },
329 { "yen", 0x00a5 },
330 { "yuml", 0x00ff },
331 { "zeta", 0x03b6 },
332 { "zwj", 0x200d },
333 { "zwnj", 0x200c }
334};
335
336Q_STATIC_GLOBAL_OPERATOR bool operator<(const QString &entityStr, const QTextHtmlEntity &entity)
337{
338 return entityStr < QLatin1String(entity.name);
339}
340
341Q_STATIC_GLOBAL_OPERATOR bool operator<(const QTextHtmlEntity &entity, const QString &entityStr)
342{
343 return QLatin1String(entity.name) < entityStr;
344}
345
346static QChar resolveEntity(const QString &entity)
347{
348 const QTextHtmlEntity *start = &entities[0];
349 const QTextHtmlEntity *end = &entities[(sizeof(entities) / sizeof(entities[0]))];
350 const QTextHtmlEntity *e = qBinaryFind(start, end, entity);
351 if (e == end)
352 return QChar();
353 return e->code;
354}
355
356static const uint latin1Extended[0xA0 - 0x80] = {
357 0x20ac, // 0x80
358 0x0081, // 0x81 direct mapping
359 0x201a, // 0x82
360 0x0192, // 0x83
361 0x201e, // 0x84
362 0x2026, // 0x85
363 0x2020, // 0x86
364 0x2021, // 0x87
365 0x02C6, // 0x88
366 0x2030, // 0x89
367 0x0160, // 0x8A
368 0x2039, // 0x8B
369 0x0152, // 0x8C
370 0x008D, // 0x8D direct mapping
371 0x017D, // 0x8E
372 0x008F, // 0x8F directmapping
373 0x0090, // 0x90 directmapping
374 0x2018, // 0x91
375 0x2019, // 0x92
376 0x201C, // 0x93
377 0X201D, // 0x94
378 0x2022, // 0x95
379 0x2013, // 0x96
380 0x2014, // 0x97
381 0x02DC, // 0x98
382 0x2122, // 0x99
383 0x0161, // 0x9A
384 0x203A, // 0x9B
385 0x0153, // 0x9C
386 0x009D, // 0x9D direct mapping
387 0x017E, // 0x9E
388 0x0178 // 0x9F
389};
390// end taken from qtexthtmlparser
391
392class DocumentHelper
393{
394public:
395 DocumentHelper(const QString &fileName, const QByteArray &data)
396 : fileName(fileName) , data(readData(data)) {}
397 ~DocumentHelper() {}
398
399 bool addFieldsToDocument(QCLuceneDocument *document,
400 const QString &namespaceName, const QString &attributes = QString())
401 {
402 if (!document)
403 return false;
404
405 if(!data.isEmpty()) {
406 QString parsedData = parseData();
407 QString parsedTitle = QHelpGlobal::documentTitle(data);
408
409 if(!parsedData.isEmpty()) {
410 document->add(new QCLuceneField(QLatin1String("content"),
411 parsedData,QCLuceneField::INDEX_TOKENIZED));
412 document->add(new QCLuceneField(QLatin1String("path"), fileName,
413 QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED));
414 document->add(new QCLuceneField(QLatin1String("title"), parsedTitle,
415 QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED));
416 document->add(new QCLuceneField(QLatin1String("titleTokenized"), parsedTitle,
417 QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED));
418 document->add(new QCLuceneField(QLatin1String("namespace"), namespaceName,
419 QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED));
420 document->add(new QCLuceneField(QLatin1String("attribute"), attributes,
421 QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED));
422 return true;
423 }
424 }
425
426 return false;
427 }
428
429private:
430 QString readData(const QByteArray &data)
431 {
432 QTextStream textStream(data);
[561]433 const QByteArray &codec = QHelpGlobal::codecFromData(data).toLatin1();
434 textStream.setCodec(QTextCodec::codecForName(codec.constData()));
[2]435
436 QString stream = textStream.readAll();
437 if (stream.isNull() || stream.isEmpty())
438 return QString();
439
440 return stream;
441 }
442
443 QString parseData() const
444 {
445 const int length = data.length();
446 const QChar *buf = data.unicode();
447
448 QString parsedContent;
449 parsedContent.reserve(length);
450
451 bool valid = true;
452 int j = 0, count = 0;
453
454 QChar c;
455 while (j < length) {
456 c = buf[j++];
457 if (c == QLatin1Char('<') || c == QLatin1Char('&')) {
458 if (count > 1 && c != QLatin1Char('&'))
459 parsedContent.append(QLatin1Char(' '));
460 else if (c == QLatin1Char('&')) {
461 // Note: this will modify the counter j, in case we sucessful parsed the entity
462 // we will have modified the counter to stay 1 before the closing ';', so
463 // the following if condition will be met with if (c == QLatin1Char(';'))
464 parsedContent.append(parseEntity(length, buf, j));
465 }
466
467 count = 0;
468 valid = false;
469 continue;
470 }
471 if ((c == QLatin1Char('>') || c == QLatin1Char(';')) && !valid) {
472 valid = true;
473 continue;
474 }
475 if (!valid)
476 continue;
477
478 if (c.isLetterOrNumber() || c.isPrint()) {
479 ++count;
480 parsedContent.append(c.toLower());
481 } else {
482 if (count > 1)
483 parsedContent.append(QLatin1Char(' '));
484 count = 0;
485 }
486 }
487
488 return parsedContent;
489 }
490
491 // taken from qtexthtmlparser
492 // parses an entity after "&", and returns it
493 QString parseEntity(int len, const QChar *buf, int &pos) const
494 {
495 int recover = pos;
496 QString entity;
497 while (pos < len) {
498 QChar c = buf[pos++];
499 if (c.isSpace() || pos - recover > 9) {
500 goto error;
501 }
502 if (c == QLatin1Char(';')) {
503 pos--;
504 break;
505 }
506 entity += c;
507 }
508 {
509 QChar resolved = resolveEntity(entity);
510 if (!resolved.isNull())
511 return QString(resolved);
512 }
513 if (entity.length() > 1 && entity.at(0) == QLatin1Char('#')) {
514 entity.remove(0, 1); // removing leading #
515
516 int base = 10;
517 bool ok = false;
518
519 if (entity.at(0).toLower() == QLatin1Char('x')) { // hex entity?
520 entity.remove(0, 1);
521 base = 16;
522 }
523
524 uint uc = entity.toUInt(&ok, base);
525 if (ok) {
526 if (uc >= 0x80 && uc < 0x80 + (sizeof(latin1Extended) / sizeof(latin1Extended[0])))
527 uc = latin1Extended[uc - 0x80]; // windows latin 1 extended
528 QString str;
529 if (uc > 0xffff) {
530 // surrogate pair
531 uc -= 0x10000;
532 ushort high = uc/0x400 + 0xd800;
533 ushort low = uc%0x400 + 0xdc00;
534 str.append(QChar(high));
535 str.append(QChar(low));
536 } else {
537 str.append(QChar(uc));
538 }
539 return str;
540 }
541 }
542 error:
543 pos = recover;
544 return QLatin1String(" ");
545 }
546 // end taken from qtexthtmlparser
547
548private:
549 QString fileName;
550 QString data;
551};
552
553
554QHelpSearchIndexWriter::QHelpSearchIndexWriter()
555 : QThread(0)
556 , m_cancel(false)
557{
558 // nothing todo
559}
560
561QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
562{
563 mutex.lock();
564 this->m_cancel = true;
565 waitCondition.wakeOne();
566 mutex.unlock();
567
568 wait();
569}
570
571void QHelpSearchIndexWriter::cancelIndexing()
572{
573 mutex.lock();
574 this->m_cancel = true;
575 mutex.unlock();
576}
577
578void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
579 const QString &indexFilesFolder, bool reindex)
580{
[561]581 wait();
[2]582 mutex.lock();
583 this->m_cancel = false;
584 this->m_reindex = reindex;
585 this->m_collectionFile = collectionFile;
586 this->m_indexFilesFolder = indexFilesFolder;
587 mutex.unlock();
588
589 start(QThread::NormalPriority);
590}
591
592void QHelpSearchIndexWriter::optimizeIndex()
593{
[561]594#if !defined(QT_NO_EXCEPTIONS)
595 try {
596#endif
597 if (QCLuceneIndexReader::indexExists(m_indexFilesFolder)) {
598 if (QCLuceneIndexReader::isLocked(m_indexFilesFolder))
599 return;
[2]600
[561]601 QCLuceneStandardAnalyzer analyzer;
602 QCLuceneIndexWriter writer(m_indexFilesFolder, analyzer, false);
603 writer.optimize();
604 writer.close();
605 }
606#if !defined(QT_NO_EXCEPTIONS)
607 } catch (...) {
608 qWarning("Full Text Search, could not optimize index.");
609 return;
[2]610 }
[561]611#endif
[2]612}
613
614void QHelpSearchIndexWriter::run()
615{
616 QMutexLocker mutexLocker(&mutex);
617
618 if (m_cancel)
619 return;
620
621 const bool reindex = this->m_reindex;
622 const QString collectionFile(this->m_collectionFile);
623
624 mutexLocker.unlock();
625
626 QHelpEngineCore engine(collectionFile, 0);
627 if (!engine.setupData())
628 return;
629
630 const QLatin1String key("CluceneIndexedNamespaces");
631 if (reindex)
632 engine.setCustomValue(key, QLatin1String(""));
633
634 QMap<QString, QDateTime> indexMap;
635 const QLatin1String oldKey("CluceneSearchNamespaces");
636 if (!engine.customValue(oldKey, QString()).isNull()) {
637 // old style qhc file < 4.4.2, need to convert...
638 const QStringList indexedNamespaces = engine.customValue(oldKey).
639 toString().split(QLatin1String("|"), QString::SkipEmptyParts);
640 foreach (const QString &nameSpace, indexedNamespaces)
641 indexMap.insert(nameSpace, QDateTime());
642 engine.removeCustomValue(oldKey);
643 } else {
644 QDataStream dataStream(engine.customValue(key).toByteArray());
645 dataStream >> indexMap;
646 }
647
648 QString indexPath = m_indexFilesFolder;
649
650 QFileInfo fInfo(indexPath);
651 if (fInfo.exists() && !fInfo.isWritable()) {
[561]652 qWarning("Full Text Search, could not create index (missing permissions for '%s').", qPrintable(indexPath));
[2]653 return;
654 }
655
656 emit indexingStarted();
657
658 QCLuceneIndexWriter *writer = 0;
659 QCLuceneStandardAnalyzer analyzer;
660 const QStringList registeredDocs = engine.registeredDocumentations();
661
662 QLocalSocket localSocket;
663 localSocket.connectToServer(QString(QLatin1String("QtAssistant%1"))
664 .arg(QLatin1String(QT_VERSION_STR)));
665
666 QLocalServer localServer;
667 bool otherInstancesRunning = true;
668 if (!localSocket.waitForConnected()) {
669 otherInstancesRunning = false;
670 localServer.listen(QString(QLatin1String("QtAssistant%1"))
671 .arg(QLatin1String(QT_VERSION_STR)));
672 }
673
674#if !defined(QT_NO_EXCEPTIONS)
675 try {
676#endif
677 // check if it's locked, and if the other instance is running
678 if (!otherInstancesRunning && QCLuceneIndexReader::isLocked(indexPath))
679 QCLuceneIndexReader::unlock(indexPath);
680
681 if (QCLuceneIndexReader::isLocked(indexPath)) {
682 // poll unless indexing finished to fake progress
683 while (QCLuceneIndexReader::isLocked(indexPath)) {
684 mutexLocker.relock();
685 if (m_cancel)
686 break;
687 mutexLocker.unlock();
688 this->sleep(1);
689 }
690 emit indexingFinished();
691 return;
692 }
693
694 if (QCLuceneIndexReader::indexExists(indexPath) && !reindex) {
695 foreach(const QString &namespaceName, registeredDocs) {
696 mutexLocker.relock();
697 if (m_cancel) {
698 emit indexingFinished();
699 return;
700 }
701 mutexLocker.unlock();
702
703 if (!indexMap.contains(namespaceName)) {
704 // make sure we remove some partly indexed stuff
705 removeDocuments(indexPath, namespaceName);
706 } else {
707 QString path = engine.documentationFileName(namespaceName);
708 if (indexMap.value(namespaceName) < QFileInfo(path).lastModified()) {
709 // make sure we remove some outdated indexed stuff
710 indexMap.remove(namespaceName);
711 removeDocuments(indexPath, namespaceName);
712 }
713
714 if (indexMap.contains(namespaceName)) {
715 // make sure we really have content indexed for namespace
716 // NOTE: Extra variable just for GCC 3.3.5
717 QLatin1String key("namespace");
718 QCLuceneTermQuery query(QCLuceneTerm(key, namespaceName));
719 QCLuceneIndexSearcher indexSearcher(indexPath);
720 QCLuceneHits hits = indexSearcher.search(query);
721 if (hits.length() <= 0)
722 indexMap.remove(namespaceName);
723 }
724 }
725 }
726 writer = new QCLuceneIndexWriter(indexPath, analyzer, false);
727 } else {
728 indexMap.clear();
729 writer = new QCLuceneIndexWriter(indexPath, analyzer, true);
730 }
731#if !defined(QT_NO_EXCEPTIONS)
732 } catch (...) {
[561]733 qWarning("Full Text Search, could not create index writer in '%s'.",
734 qPrintable(indexPath));
[2]735 return;
736 }
737#endif
738
[561]739#if !defined(QT_NO_EXCEPTIONS)
740 try {
741#endif
742 writer->setMergeFactor(100);
743 writer->setMinMergeDocs(1000);
744 writer->setMaxFieldLength(QCLuceneIndexWriter::DEFAULT_MAX_FIELD_LENGTH);
745#if !defined(QT_NO_EXCEPTIONS)
746 } catch (...) {
747 qWarning("Full Text Search, could not set writer properties.");
748 return;
749 }
750#endif
[2]751
752 QStringList namespaces;
753 foreach(const QString &namespaceName, registeredDocs) {
754 mutexLocker.relock();
755 if (m_cancel) {
[561]756 closeIndexWriter(writer);
[2]757 emit indexingFinished();
758 return;
759 }
760 mutexLocker.unlock();
761
762 namespaces.append(namespaceName);
763 if (indexMap.contains(namespaceName))
764 continue;
765
766 const QList<QStringList> attributeSets =
767 engine.filterAttributeSets(namespaceName);
768
769 if (attributeSets.isEmpty()) {
770 const QList<QUrl> docFiles = indexableFiles(&engine, namespaceName,
771 QStringList());
772 if (!addDocuments(docFiles, engine, QStringList(), namespaceName,
773 writer, analyzer))
774 break;
775 } else {
776 bool bail = false;
777 foreach (const QStringList &attributes, attributeSets) {
778 const QList<QUrl> docFiles = indexableFiles(&engine,
779 namespaceName, attributes);
780 if (!addDocuments(docFiles, engine, attributes, namespaceName,
781 writer, analyzer)) {
782 bail = true;
783 break;
784 }
785 }
786 if (bail)
787 break;
788 }
789
790 mutexLocker.relock();
791 if (!m_cancel) {
792 QString path(engine.documentationFileName(namespaceName));
793 indexMap.insert(namespaceName, QFileInfo(path).lastModified());
794 writeIndexMap(engine, indexMap);
795 }
796 mutexLocker.unlock();
797 }
798
[561]799 closeIndexWriter(writer);
[2]800
801 mutexLocker.relock();
802 if (!m_cancel) {
803 mutexLocker.unlock();
804
805 QStringList indexedNamespaces = indexMap.keys();
806 foreach(const QString &namespaceName, indexedNamespaces) {
807 mutexLocker.relock();
808 if (m_cancel)
809 break;
810 mutexLocker.unlock();
811
812 if (!namespaces.contains(namespaceName)) {
813 indexMap.remove(namespaceName);
814 writeIndexMap(engine, indexMap);
815 removeDocuments(indexPath, namespaceName);
816 }
817 }
818 }
819 emit indexingFinished();
820}
821
822bool QHelpSearchIndexWriter::addDocuments(const QList<QUrl> docFiles,
823 const QHelpEngineCore &engine, const QStringList &attributes,
824 const QString &namespaceName, QCLuceneIndexWriter *writer,
825 QCLuceneAnalyzer &analyzer)
826{
827 QMutexLocker locker(&mutex);
828 const QString attrList = attributes.join(QLatin1String(" "));
829
830 locker.unlock();
831 foreach(const QUrl &url, docFiles) {
832 QCLuceneDocument document;
833 DocumentHelper helper(url.toString(), engine.fileData(url));
[561]834 if (helper.addFieldsToDocument(&document, namespaceName, attrList)) {
835#if !defined(QT_NO_EXCEPTIONS)
836 try {
837#endif
838 writer->addDocument(document, analyzer);
839#if !defined(QT_NO_EXCEPTIONS)
840 } catch (...) {
841 qWarning("Full Text Search, could not properly add documents.");
842 return false;
843 }
844#endif
845 }
[2]846 locker.relock();
847 if (m_cancel)
848 return false;
849 locker.unlock();
850 }
851 return true;
852}
853
854void QHelpSearchIndexWriter::removeDocuments(const QString &indexPath,
855 const QString &namespaceName)
856{
857 if (namespaceName.isEmpty() || QCLuceneIndexReader::isLocked(indexPath))
858 return;
859
860 QCLuceneIndexReader reader = QCLuceneIndexReader::open(indexPath);
861 reader.deleteDocuments(QCLuceneTerm(QLatin1String("namespace"),
862 namespaceName));
863
864 reader.close();
865}
866
867bool QHelpSearchIndexWriter::writeIndexMap(QHelpEngineCore &engine,
868 const QMap<QString, QDateTime> &indexMap)
869{
870 QByteArray bArray;
871
872 QDataStream data(&bArray, QIODevice::ReadWrite);
873 data << indexMap;
874
875 return engine.setCustomValue(QLatin1String("CluceneIndexedNamespaces"),
876 bArray);
877}
878
879QList<QUrl> QHelpSearchIndexWriter::indexableFiles(QHelpEngineCore *helpEngine,
880 const QString &namespaceName, const QStringList &attributes) const
881{
882 QList<QUrl> docFiles = helpEngine->files(namespaceName, attributes,
883 QLatin1String("html"));
884 docFiles += helpEngine->files(namespaceName, attributes, QLatin1String("htm"));
885 docFiles += helpEngine->files(namespaceName, attributes, QLatin1String("txt"));
886
887 return docFiles;
888}
889
[561]890void QHelpSearchIndexWriter::closeIndexWriter(QCLuceneIndexWriter *writer)
891{
892#if !defined(QT_NO_EXCEPTIONS)
893 try {
894#endif
895 writer->close();
896 delete writer;
897#if !defined(QT_NO_EXCEPTIONS)
898 } catch (...) {
899 qWarning("Full Text Search, could not properly close index writer.");
900 }
901#endif
902}
[2]903
904 } // namespace clucene
905 } // namespace fulltextsearch
906} // namespace qt
907
908QT_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.