source: trunk/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp@ 987

Last change on this file since 987 was 846, checked in by Dmitry A. Kuminov, 14 years ago

trunk: Merged in qt 4.7.2 sources from branches/vendor/nokia/qt.

File size: 17.4 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
4** All rights reserved.
5** Contact: Nokia Corporation ([email protected])
6**
7** This file is part of the Qt Assistant of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial Usage
11** Licensees holding valid Qt Commercial licenses may use this file in
12** accordance with the Qt Commercial License Agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and Nokia.
15**
16** GNU Lesser General Public License Usage
17** Alternatively, this file may be used under the terms of the GNU Lesser
18** General Public License version 2.1 as published by the Free Software
19** Foundation and appearing in the file LICENSE.LGPL included in the
20** packaging of this file. Please review the following information to
21** ensure the GNU Lesser General Public License version 2.1 requirements
22** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23**
24** In addition, as a special exception, Nokia gives you certain additional
25** rights. These rights are described in the Nokia Qt LGPL Exception
26** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you have questions regarding the use of this file, please contact
37** Nokia at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "fulltextsearch/qindexreader_p.h"
43#include "fulltextsearch/qqueryparser_p.h"
44#include "fulltextsearch/qsearchable_p.h"
45#include "qclucenefieldnames_p.h"
46#include "qhelpenginecore.h"
47
48#include "qhelpsearchindexreader_clucene_p.h"
49
50#include <QtCore/QDir>
51#include <QtCore/QSet>
52#include <QtCore/QString>
53#include <QtCore/QFileInfo>
54#include <QtCore/QSharedPointer>
55#include <QtCore/QStringList>
56#include <QtCore/QTextStream>
57#include <QtCore/QMutexLocker>
58
59QT_BEGIN_NAMESPACE
60
61namespace fulltextsearch {
62namespace clucene {
63
64QHelpSearchIndexReaderClucene::QHelpSearchIndexReaderClucene()
65 : QHelpSearchIndexReader()
66{
67 // nothing todo
68}
69
70QHelpSearchIndexReaderClucene::~QHelpSearchIndexReaderClucene()
71{
72}
73
74
75void QHelpSearchIndexReaderClucene::run()
76{
77 mutex.lock();
78
79 if (m_cancel) {
80 mutex.unlock();
81 return;
82 }
83
84 const QString collectionFile(this->m_collectionFile);
85 const QList<QHelpSearchQuery> &queryList = this->m_query;
86 const QString indexPath(m_indexFilesFolder);
87
88 mutex.unlock();
89
90 QHelpEngineCore engine(collectionFile, 0);
91 if (!engine.setupData())
92 return;
93
94 QFileInfo fInfo(indexPath);
95 if (fInfo.exists() && !fInfo.isWritable()) {
96 qWarning("Full Text Search, could not read index (missing permissions).");
97 return;
98 }
99
100 if(QCLuceneIndexReader::indexExists(indexPath)) {
101 mutex.lock();
102 if (m_cancel) {
103 mutex.unlock();
104 return;
105 }
106 mutex.unlock();
107
108 emit searchingStarted();
109
110#if !defined(QT_NO_EXCEPTIONS)
111 try {
112#endif
113 QCLuceneBooleanQuery booleanQueryTitle;
114 QCLuceneBooleanQuery booleanQueryContent;
115 QCLuceneStandardAnalyzer analyzer;
116 const QStringList& attribList =
117 engine.filterAttributes(engine.currentFilter());
118 bool titleQueryIsValid = buildQuery(queryList, TitleTokenizedField,
119 attribList, booleanQueryTitle, analyzer);
120 bool contentQueryIsValid = buildQuery(queryList, ContentField,
121 attribList, booleanQueryContent, analyzer);
122 if (!titleQueryIsValid && !contentQueryIsValid) {
123 emit searchingFinished(0);
124 return;
125 }
126
127 QCLuceneIndexSearcher indexSearcher(indexPath);
128
129 // QCLuceneHits object must be allocated on the heap, because
130 // there is no default constructor.
131 QSharedPointer<QCLuceneHits> titleHits;
132 QSharedPointer<QCLuceneHits> contentHits;
133 if (titleQueryIsValid) {
134 titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
135 indexSearcher.search(booleanQueryTitle)));
136 }
137 if (contentQueryIsValid) {
138 contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
139 indexSearcher.search(booleanQueryContent)));
140 }
141 bool boost = true;
142 if ((titleHits.isNull() || titleHits->length() == 0)
143 && (contentHits.isNull() || contentHits->length() == 0)) {
144 booleanQueryTitle = QCLuceneBooleanQuery();
145 booleanQueryContent = QCLuceneBooleanQuery();
146 titleQueryIsValid =
147 buildTryHarderQuery(queryList, TitleTokenizedField,
148 attribList, booleanQueryTitle, analyzer);
149 contentQueryIsValid =
150 buildTryHarderQuery(queryList, ContentField, attribList,
151 booleanQueryContent, analyzer);
152 if (!titleQueryIsValid && !contentQueryIsValid) {
153 emit searchingFinished(0);
154 return;
155 }
156 if (titleQueryIsValid) {
157 titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
158 indexSearcher.search(booleanQueryTitle)));
159 }
160 if (contentQueryIsValid) {
161 contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
162 indexSearcher.search(booleanQueryContent)));
163 }
164 boost = false;
165 }
166 QList<QSharedPointer<QCLuceneHits> > cluceneHitsList;
167 if (!titleHits.isNull())
168 cluceneHitsList.append(titleHits);
169 if (!contentHits.isNull())
170 cluceneHitsList.append(contentHits);
171
172 QSet<QString> pathSet;
173 QCLuceneDocument document;
174 const QStringList namespaceList = engine.registeredDocumentations();
175
176 foreach (const QSharedPointer<QCLuceneHits> &hits, cluceneHitsList) {
177 for (qint32 i = 0; i < hits->length(); i++) {
178 document = hits->document(i);
179 const QString path = document.get(PathField);
180 if (!pathSet.contains(path) && namespaceList.contains(
181 document.get(NamespaceField), Qt::CaseInsensitive)) {
182 pathSet.insert(path);
183 hitList.append(qMakePair(path, document.get(TitleField)));
184 }
185 document.clear();
186
187 mutex.lock();
188 if (m_cancel) {
189 mutex.unlock();
190 emit searchingFinished(0);
191 return;
192 }
193 mutex.unlock();
194 }
195 }
196
197 indexSearcher.close();
198 const int count = hitList.count();
199 if ((count > 0) && boost)
200 boostSearchHits(engine, hitList, queryList);
201 emit searchingFinished(hitList.count());
202
203#if !defined(QT_NO_EXCEPTIONS)
204 } catch(...) {
205 mutex.lock();
206 hitList.clear();
207 mutex.unlock();
208 emit searchingFinished(0);
209 }
210#endif
211 }
212}
213
214bool QHelpSearchIndexReaderClucene::buildQuery(
215 const QList<QHelpSearchQuery> &queries, const QString &fieldName,
216 const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery,
217 QCLuceneAnalyzer &analyzer)
218{
219 bool queryIsValid = false;
220 foreach (const QHelpSearchQuery &query, queries) {
221 if (fieldName != ContentField && isNegativeQuery(query)) {
222 queryIsValid = false;
223 break;
224 }
225 switch (query.fieldName) {
226 case QHelpSearchQuery::FUZZY:
227 if (addFuzzyQuery(query, fieldName, booleanQuery, analyzer))
228 queryIsValid = true;
229 break;
230 case QHelpSearchQuery::WITHOUT:
231 if (fieldName != ContentField)
232 return false;
233 if (addWithoutQuery(query, fieldName, booleanQuery))
234 queryIsValid = true;
235 break;
236 case QHelpSearchQuery::PHRASE:
237 if (addPhraseQuery(query, fieldName, booleanQuery))
238 queryIsValid = true;
239 break;
240 case QHelpSearchQuery::ALL:
241 if (addAllQuery(query, fieldName, booleanQuery))
242 queryIsValid = true;
243 break;
244 case QHelpSearchQuery::DEFAULT:
245 if (addDefaultQuery(query, fieldName, true, booleanQuery, analyzer))
246 queryIsValid = true;
247 break;
248 case QHelpSearchQuery::ATLEAST:
249 if (addAtLeastQuery(query, fieldName, booleanQuery, analyzer))
250 queryIsValid = true;
251 break;
252 default:
253 Q_ASSERT(!"Invalid field name");
254 }
255 }
256
257 if (queryIsValid && !filterAttributes.isEmpty()) {
258 queryIsValid =
259 addAttributesQuery(filterAttributes, booleanQuery, analyzer);
260 }
261
262 return queryIsValid;
263}
264
265bool QHelpSearchIndexReaderClucene::buildTryHarderQuery(
266 const QList<QHelpSearchQuery> &queries, const QString &fieldName,
267 const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery,
268 QCLuceneAnalyzer &analyzer)
269{
270 if (queries.isEmpty())
271 return false;
272 const QHelpSearchQuery &query = queries.front();
273 if (query.fieldName != QHelpSearchQuery::DEFAULT)
274 return false;
275 if (isNegativeQuery(query))
276 return false;
277 if (!addDefaultQuery(query, fieldName, false, booleanQuery, analyzer))
278 return false;
279 if (filterAttributes.isEmpty())
280 return true;
281 return addAttributesQuery(filterAttributes, booleanQuery, analyzer);
282}
283
284bool QHelpSearchIndexReaderClucene::isNegativeQuery(const QHelpSearchQuery &query) const
285{
286 const QString &search = query.wordList.join(" ");
287 return search.contains('!') || search.contains('-')
288 || search.contains(QLatin1String(" NOT "));
289}
290
291bool QHelpSearchIndexReaderClucene::addFuzzyQuery(const QHelpSearchQuery &query,
292 const QString &fieldName, QCLuceneBooleanQuery &booleanQuery,
293 QCLuceneAnalyzer &analyzer)
294{
295 bool queryIsValid = false;
296 const QLatin1String fuzzy("~");
297 foreach (const QString &term, query.wordList) {
298 if (!term.isEmpty()) {
299 QCLuceneQuery *lQuery =
300 QCLuceneQueryParser::parse(term + fuzzy, fieldName, analyzer);
301 if (lQuery != 0) {
302 booleanQuery.add(lQuery, true, false, false);
303 queryIsValid = true;
304 }
305 }
306 }
307 return queryIsValid;
308}
309
310bool QHelpSearchIndexReaderClucene::addWithoutQuery(const QHelpSearchQuery &query,
311 const QString &fieldName, QCLuceneBooleanQuery &booleanQuery)
312{
313 bool queryIsValid = false;
314 const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords();
315 foreach (const QString &term, query.wordList) {
316 if (stopWords.contains(term, Qt::CaseInsensitive))
317 continue;
318 QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm(
319 fieldName, term.toLower()));
320 booleanQuery.add(lQuery, true, false, true);
321 queryIsValid = true;
322 }
323 return queryIsValid;
324}
325
326bool QHelpSearchIndexReaderClucene::addPhraseQuery(const QHelpSearchQuery &query,
327 const QString &fieldName, QCLuceneBooleanQuery &booleanQuery)
328{
329 bool queryIsValid = false;
330 const QString &term = query.wordList.at(0).toLower();
331 if (term.contains(QLatin1Char(' '))) {
332 const QStringList termList = term.split(QLatin1String(" "));
333 QCLucenePhraseQuery *q = new QCLucenePhraseQuery();
334 const QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
335 foreach (const QString &term, termList) {
336 if (!stopWords.contains(term, Qt::CaseInsensitive))
337 q->addTerm(QCLuceneTerm(fieldName, term.toLower()));
338 }
339 if (!q->getTerms().isEmpty()) {
340 booleanQuery.add(q, true, true, false);
341 queryIsValid = true;
342 }
343 } else {
344 QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm(
345 fieldName, term.toLower()));
346 booleanQuery.add(lQuery, true, true, false);
347 queryIsValid = true;
348 }
349 return queryIsValid;
350}
351
352bool QHelpSearchIndexReaderClucene::addAllQuery(const QHelpSearchQuery &query,
353 const QString &fieldName, QCLuceneBooleanQuery &booleanQuery)
354{
355 bool queryIsValid = false;
356 const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords();
357 foreach (const QString &term, query.wordList) {
358 if (stopWords.contains(term, Qt::CaseInsensitive))
359 continue;
360 QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm(
361 fieldName, term.toLower()));
362 booleanQuery.add(lQuery, true, true, false);
363 queryIsValid = true;
364 }
365 return queryIsValid;
366}
367
368bool QHelpSearchIndexReaderClucene::addDefaultQuery(const QHelpSearchQuery &query,
369 const QString &fieldName, bool allTermsRequired,
370 QCLuceneBooleanQuery &booleanQuery,
371 QCLuceneAnalyzer &analyzer)
372{
373 bool queryIsValid = false;
374 foreach (const QString &term, query.wordList) {
375 QCLuceneQuery *lQuery =
376 QCLuceneQueryParser::parse(term.toLower(), fieldName, analyzer);
377 if (lQuery) {
378 booleanQuery.add(lQuery, true, allTermsRequired, false);
379 queryIsValid = true;
380 }
381 }
382 return queryIsValid;
383}
384
385bool QHelpSearchIndexReaderClucene::addAtLeastQuery(
386 const QHelpSearchQuery &query, const QString &fieldName,
387 QCLuceneBooleanQuery &booleanQuery, QCLuceneAnalyzer &analyzer)
388{
389 bool queryIsValid = false;
390 foreach (const QString &term, query.wordList) {
391 if (!term.isEmpty()) {
392 QCLuceneQuery *lQuery =
393 QCLuceneQueryParser::parse(term, fieldName, analyzer);
394 if (lQuery) {
395 booleanQuery.add(lQuery, true, false, false);
396 queryIsValid = true;
397 }
398 }
399 }
400 return queryIsValid;
401}
402
403bool QHelpSearchIndexReaderClucene::addAttributesQuery(
404 const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery,
405 QCLuceneAnalyzer &analyzer)
406{
407 QCLuceneQuery* lQuery = QCLuceneQueryParser::parse(QLatin1String("+")
408 + filterAttributes.join(QLatin1String(" +")), AttributeField, analyzer);
409 if (!lQuery)
410 return false;
411 booleanQuery.add(lQuery, true, true, false);
412 return true;
413}
414
415void QHelpSearchIndexReaderClucene::boostSearchHits(const QHelpEngineCore &engine,
416 QList<QHelpSearchEngine::SearchHit> &hitList, const QList<QHelpSearchQuery> &queryList)
417{
418 foreach (const QHelpSearchQuery &query, queryList) {
419 if (query.fieldName != QHelpSearchQuery::DEFAULT)
420 continue;
421
422 QString joinedQuery = query.wordList.join(QLatin1String(" "));
423
424 QCLuceneStandardAnalyzer analyzer;
425 QCLuceneQuery *parsedQuery = QCLuceneQueryParser::parse(
426 joinedQuery, ContentField, analyzer);
427
428 if (parsedQuery) {
429 joinedQuery = parsedQuery->toString();
430 delete parsedQuery;
431 }
432
433 const QString contentString(ContentField + QLatin1String(":"));
434 int length = contentString.length();
435 int index = joinedQuery.indexOf(contentString);
436
437 QString term;
438 int nextIndex = 0;
439 QStringList searchTerms;
440 while (index != -1) {
441 nextIndex = joinedQuery.indexOf(contentString, index + 1);
442 term = joinedQuery.mid(index + length, nextIndex - (length + index)).simplified();
443 if (term.startsWith(QLatin1String("\""))
444 && term.endsWith(QLatin1String("\""))) {
445 searchTerms.append(term.remove(QLatin1String("\"")));
446 } else {
447 searchTerms += term.split(QLatin1Char(' '));
448 }
449 index = nextIndex;
450 }
451 searchTerms.removeDuplicates();
452
453 int count = qMin(75, hitList.count());
454 QMap<int, QHelpSearchEngine::SearchHit> hitMap;
455 for (int i = 0; i < count; ++i) {
456 const QHelpSearchEngine::SearchHit &hit = hitList.at(i);
457 QString data = QString::fromUtf8(engine.fileData(hit.first));
458
459 int counter = 0;
460 foreach (const QString &term, searchTerms)
461 counter += data.count(term, Qt::CaseInsensitive);
462 hitMap.insertMulti(counter, hit);
463 }
464
465 QList<QHelpSearchEngine::SearchHit> boostedList;
466 QMap<int, QHelpSearchEngine::SearchHit>::const_iterator it = hitMap.constEnd();
467 do {
468 --it;
469 boostedList.append(it.value());
470 } while (it != hitMap.constBegin());
471 boostedList += hitList.mid(count, hitList.count());
472 mutex.lock();
473 hitList = boostedList;
474 mutex.unlock();
475 }
476}
477
478} // namespace clucene
479} // namespace fulltextsearch
480
481QT_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.