source: trunk/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp@ 275

Last change on this file since 275 was 2, checked in by Dmitry A. Kuminov, 16 years ago

Initially imported qt-all-opensource-src-4.5.1 from Trolltech.

File size: 14.9 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4** Contact: Qt Software Information ([email protected])
5**
6** This file is part of the Qt Assistant of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial Usage
10** Licensees holding valid Qt Commercial licenses may use this file in
11** accordance with the Qt Commercial License Agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Nokia.
14**
15** GNU Lesser General Public License Usage
16** Alternatively, this file may be used under the terms of the GNU Lesser
17** General Public License version 2.1 as published by the Free Software
18** Foundation and appearing in the file LICENSE.LGPL included in the
19** packaging of this file. Please review the following information to
20** ensure the GNU Lesser General Public License version 2.1 requirements
21** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22**
23** In addition, as a special exception, Nokia gives you certain
24** additional rights. These rights are described in the Nokia Qt LGPL
25** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26** package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you are unsure which license is appropriate for your use, please
37** contact the sales department at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "qhelpenginecore.h"
43#include "fulltextsearch/qsearchable_p.h"
44#include "fulltextsearch/qqueryparser_p.h"
45#include "fulltextsearch/qindexreader_p.h"
46#include "qhelpsearchindexreader_clucene_p.h"
47
48#include <QtCore/QDir>
49#include <QtCore/QSet>
50#include <QtCore/QString>
51#include <QtCore/QFileInfo>
52#include <QtCore/QStringList>
53#include <QtCore/QTextStream>
54#include <QtCore/QMutexLocker>
55
56QT_BEGIN_NAMESPACE
57
58namespace qt {
59 namespace fulltextsearch {
60 namespace clucene {
61
62QHelpSearchIndexReader::QHelpSearchIndexReader()
63 : QThread()
64 , m_cancel(false)
65{
66 // nothing todo
67}
68
69QHelpSearchIndexReader::~QHelpSearchIndexReader()
70{
71 mutex.lock();
72 this->m_cancel = true;
73 waitCondition.wakeOne();
74 mutex.unlock();
75
76 wait();
77}
78
79void QHelpSearchIndexReader::cancelSearching()
80{
81 mutex.lock();
82 this->m_cancel = true;
83 mutex.unlock();
84}
85
86void QHelpSearchIndexReader::search(const QString &collectionFile, const QString &indexFilesFolder,
87 const QList<QHelpSearchQuery> &queryList)
88{
89 QMutexLocker lock(&mutex);
90
91 this->hitList.clear();
92 this->m_cancel = false;
93 this->m_query = queryList;
94 this->m_collectionFile = collectionFile;
95 this->m_indexFilesFolder = indexFilesFolder;
96
97 start(QThread::NormalPriority);
98}
99
100int QHelpSearchIndexReader::hitsCount() const
101{
102 return hitList.count();
103}
104
105QHelpSearchEngine::SearchHit QHelpSearchIndexReader::hit(int index) const
106{
107 return hitList.at(index);
108}
109
110void QHelpSearchIndexReader::run()
111{
112 mutex.lock();
113
114 if (m_cancel) {
115 mutex.unlock();
116 return;
117 }
118
119 const QString collectionFile(this->m_collectionFile);
120 const QList<QHelpSearchQuery> &queryList = this->m_query;
121 const QString indexPath(m_indexFilesFolder);
122
123 mutex.unlock();
124
125 QHelpEngineCore engine(collectionFile, 0);
126 if (!engine.setupData())
127 return;
128
129 QFileInfo fInfo(indexPath);
130 if (fInfo.exists() && !fInfo.isWritable()) {
131 qWarning("Full Text Search, could not read index (missing permissions).");
132 return;
133 }
134
135 if(QCLuceneIndexReader::indexExists(indexPath)) {
136 mutex.lock();
137 if (m_cancel) {
138 mutex.unlock();
139 return;
140 }
141 mutex.unlock();
142
143 emit searchingStarted();
144
145#if !defined(QT_NO_EXCEPTIONS)
146 try {
147#endif
148 QCLuceneBooleanQuery booleanQuery;
149 QCLuceneStandardAnalyzer analyzer;
150 if (!buildQuery(booleanQuery, queryList, analyzer)) {
151 emit searchingFinished(0);
152 return;
153 }
154
155 const QStringList attribList = engine.filterAttributes(engine.currentFilter());
156 if (!attribList.isEmpty()) {
157 QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+")
158 + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), analyzer);
159
160 if (!query) {
161 emit searchingFinished(0);
162 return;
163 }
164 booleanQuery.add(query, true, true, false);
165 }
166
167 QCLuceneIndexSearcher indexSearcher(indexPath);
168 QCLuceneHits hits = indexSearcher.search(booleanQuery);
169
170 bool boost = true;
171 QCLuceneBooleanQuery tryHarderQuery;
172 if (hits.length() == 0) {
173 if (buildTryHarderQuery(tryHarderQuery, queryList, analyzer)) {
174 if (!attribList.isEmpty()) {
175 QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+")
176 + attribList.join(QLatin1String(" +")), QLatin1String("attribute"),
177 analyzer);
178 tryHarderQuery.add(query, true, true, false);
179 }
180 hits = indexSearcher.search(tryHarderQuery);
181 boost = (hits.length() == 0);
182 }
183 }
184
185 QSet<QString> pathSet;
186 QCLuceneDocument document;
187 const QStringList namespaceList = engine.registeredDocumentations();
188
189 for (qint32 i = 0; i < hits.length(); i++) {
190 document = hits.document(i);
191 const QString path = document.get(QLatin1String("path"));
192 if (!pathSet.contains(path) && namespaceList.contains(
193 document.get(QLatin1String("namespace")), Qt::CaseInsensitive)) {
194 pathSet.insert(path);
195 hitList.append(qMakePair(path, document.get(QLatin1String("title"))));
196 }
197 document.clear();
198
199 mutex.lock();
200 if (m_cancel) {
201 mutex.unlock();
202 emit searchingFinished(0);
203 return;
204 }
205 mutex.unlock();
206 }
207
208 indexSearcher.close();
209 const int count = hitList.count();
210 if ((count > 0) && boost)
211 boostSearchHits(engine, hitList, queryList);
212 emit searchingFinished(hitList.count());
213
214#if !defined(QT_NO_EXCEPTIONS)
215 } catch(...) {
216 hitList.clear();
217 emit searchingFinished(0);
218 }
219#endif
220 }
221}
222
223bool QHelpSearchIndexReader::defaultQuery(const QString &term, QCLuceneBooleanQuery &booleanQuery,
224 QCLuceneStandardAnalyzer &analyzer)
225{
226 const QLatin1String c("content");
227 const QLatin1String t("titleTokenized");
228
229 QCLuceneQuery *query = QCLuceneQueryParser::parse(term, c, analyzer);
230 QCLuceneQuery *query2 = QCLuceneQueryParser::parse(term, t, analyzer);
231 if (query && query2) {
232 booleanQuery.add(query, true, false, false);
233 booleanQuery.add(query2, true, false, false);
234 return true;
235 }
236
237 return false;
238}
239
240bool QHelpSearchIndexReader::buildQuery(QCLuceneBooleanQuery &booleanQuery,
241 const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer)
242{
243 foreach (const QHelpSearchQuery query, queryList) {
244 switch (query.fieldName) {
245 case QHelpSearchQuery::FUZZY: {
246 const QLatin1String fuzzy("~");
247 foreach (const QString &term, query.wordList) {
248 if (term.isEmpty()
249 || !defaultQuery(term.toLower() + fuzzy, booleanQuery, analyzer)) {
250 return false;
251 }
252 }
253 } break;
254
255 case QHelpSearchQuery::WITHOUT: {
256 QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
257 foreach (const QString &term, query.wordList) {
258 if (stopWords.contains(term, Qt::CaseInsensitive))
259 continue;
260
261 QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm(
262 QLatin1String("content"), term.toLower()));
263 QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm(
264 QLatin1String("titleTokenized"), term.toLower()));
265
266 if (query && query2) {
267 booleanQuery.add(query, true, false, true);
268 booleanQuery.add(query2, true, false, true);
269 } else {
270 return false;
271 }
272 }
273 } break;
274
275 case QHelpSearchQuery::PHRASE: {
276 const QString &term = query.wordList.at(0).toLower();
277 if (term.contains(QLatin1Char(' '))) {
278 QStringList termList = term.split(QLatin1String(" "));
279 QCLucenePhraseQuery *q = new QCLucenePhraseQuery();
280 QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
281 foreach (const QString &term, termList) {
282 if (!stopWords.contains(term, Qt::CaseInsensitive))
283 q->addTerm(QCLuceneTerm(QLatin1String("content"), term.toLower()));
284 }
285 booleanQuery.add(q, true, true, false);
286 } else {
287 QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm(
288 QLatin1String("content"), term.toLower()));
289 QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm(
290 QLatin1String("titleTokenized"), term.toLower()));
291
292 if (query && query2) {
293 booleanQuery.add(query, true, true, false);
294 booleanQuery.add(query2, true, false, false);
295 } else {
296 return false;
297 }
298 }
299 } break;
300
301 case QHelpSearchQuery::ALL: {
302 QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
303 foreach (const QString &term, query.wordList) {
304 if (stopWords.contains(term, Qt::CaseInsensitive))
305 continue;
306
307 QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm(
308 QLatin1String("content"), term.toLower()));
309
310 if (query) {
311 booleanQuery.add(query, true, true, false);
312 } else {
313 return false;
314 }
315 }
316 } break;
317
318 case QHelpSearchQuery::DEFAULT: {
319 foreach (const QString &term, query.wordList) {
320 QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(),
321 QLatin1String("content"), analyzer);
322
323 if (query)
324 booleanQuery.add(query, true, true, false);
325 }
326 } break;
327
328 case QHelpSearchQuery::ATLEAST: {
329 foreach (const QString &term, query.wordList) {
330 if (term.isEmpty() || !defaultQuery(term.toLower(), booleanQuery, analyzer))
331 return false;
332 }
333 }
334 }
335 }
336
337 return true;
338}
339
340bool QHelpSearchIndexReader::buildTryHarderQuery(QCLuceneBooleanQuery &booleanQuery,
341 const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer)
342{
343 bool retVal = false;
344 foreach (const QHelpSearchQuery query, queryList) {
345 switch (query.fieldName) {
346 default: break;
347 case QHelpSearchQuery::DEFAULT: {
348 foreach (const QString &term, query.wordList) {
349 QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(),
350 QLatin1String("content"), analyzer);
351
352 if (query) {
353 retVal = true;
354 booleanQuery.add(query, true, false, false);
355 }
356 }
357 } break;
358 }
359 }
360 return retVal;
361}
362
363void QHelpSearchIndexReader::boostSearchHits(const QHelpEngineCore &engine,
364 QList<QHelpSearchEngine::SearchHit> &hitList, const QList<QHelpSearchQuery> &queryList)
365{
366 foreach (const QHelpSearchQuery query, queryList) {
367 if (query.fieldName != QHelpSearchQuery::DEFAULT)
368 continue;
369
370 QString joinedQuery = query.wordList.join(QLatin1String(" "));
371
372 QCLuceneStandardAnalyzer analyzer;
373 QCLuceneQuery *parsedQuery = QCLuceneQueryParser::parse(
374 joinedQuery, QLatin1String("content"), analyzer);
375
376 if (parsedQuery) {
377 joinedQuery = parsedQuery->toString();
378 delete parsedQuery;
379 }
380
381 int length = QString(QLatin1String("content:")).length();
382 int index = joinedQuery.indexOf(QLatin1String("content:"));
383
384 QString term;
385 int nextIndex = 0;
386 QStringList searchTerms;
387 while (index != -1) {
388 nextIndex = joinedQuery.indexOf(QLatin1String("content:"), index + 1);
389 term = joinedQuery.mid(index + length, nextIndex - (length + index)).simplified();
390 if (term.startsWith(QLatin1String("\""))
391 && term.endsWith(QLatin1String("\""))) {
392 searchTerms.append(term.remove(QLatin1String("\"")));
393 } else {
394 searchTerms += term.split(QLatin1Char(' '));
395 }
396 index = nextIndex;
397 }
398 searchTerms.removeDuplicates();
399
400 int count = qMin(75, hitList.count());
401 QMap<int, QHelpSearchEngine::SearchHit> hitMap;
402 for (int i = 0; i < count; ++i) {
403 const QHelpSearchEngine::SearchHit &hit = hitList.at(i);
404 QString data = QString::fromUtf8(engine.fileData(hit.first));
405
406 int counter = 0;
407 foreach (const QString &term, searchTerms)
408 counter += data.count(term, Qt::CaseInsensitive);
409 hitMap.insertMulti(counter, hit);
410 }
411
412 QList<QHelpSearchEngine::SearchHit> boostedList;
413 QMap<int, QHelpSearchEngine::SearchHit>::const_iterator it = hitMap.constEnd();
414 do {
415 --it;
416 boostedList.append(it.value());
417 } while (it != hitMap.constBegin());
418 boostedList += hitList.mid(count, hitList.count());
419
420 hitList = boostedList;
421 }
422}
423
424 } // namespace clucene
425 } // namespace fulltextsearch
426} // namespace qt
427
428QT_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.