source: trunk/tools/assistant/lib/qhelpsearchindexwriter_default.cpp@ 317

Last change on this file since 317 was 2, checked in by Dmitry A. Kuminov, 16 years ago

Initially imported qt-all-opensource-src-4.5.1 from Trolltech.

File size: 11.3 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4** Contact: Qt Software Information ([email protected])
5**
6** This file is part of the Qt Assistant of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial Usage
10** Licensees holding valid Qt Commercial licenses may use this file in
11** accordance with the Qt Commercial License Agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Nokia.
14**
15** GNU Lesser General Public License Usage
16** Alternatively, this file may be used under the terms of the GNU Lesser
17** General Public License version 2.1 as published by the Free Software
18** Foundation and appearing in the file LICENSE.LGPL included in the
19** packaging of this file. Please review the following information to
20** ensure the GNU Lesser General Public License version 2.1 requirements
21** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22**
23** In addition, as a special exception, Nokia gives you certain
24** additional rights. These rights are described in the Nokia Qt LGPL
25** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26** package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you are unsure which license is appropriate for your use, please
37** contact the sales department at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "qhelpsearchindexwriter_default_p.h"
43#include "qhelp_global.h"
44#include "qhelpenginecore.h"
45
46#include <QtCore/QDir>
47#include <QtCore/QSet>
48#include <QtCore/QUrl>
49#include <QtCore/QFile>
50#include <QtCore/QRegExp>
51#include <QtCore/QVariant>
52#include <QtCore/QFileInfo>
53#include <QtCore/QTextCodec>
54#include <QtCore/QTextStream>
55
56QT_BEGIN_NAMESPACE
57
58namespace qt {
59 namespace fulltextsearch {
60 namespace std {
61
62Writer::Writer(const QString &path)
63 : indexPath(path)
64 , indexFile(QString())
65 , documentFile(QString())
66{
67 // nothing todo
68}
69
70Writer::~Writer()
71{
72 reset();
73}
74
75void Writer::reset()
76{
77 for(QHash<QString, Entry*>::ConstIterator it =
78 index.begin(); it != index.end(); ++it) {
79 delete it.value();
80 }
81
82 index.clear();
83 documentList.clear();
84}
85
86bool Writer::writeIndex() const
87{
88 bool status;
89 QFile idxFile(indexFile);
90 if (!(status = idxFile.open(QFile::WriteOnly)))
91 return status;
92
93 QDataStream indexStream(&idxFile);
94 for(QHash<QString, Entry*>::ConstIterator it =
95 index.begin(); it != index.end(); ++it) {
96 indexStream << it.key();
97 indexStream << it.value()->documents.count();
98 indexStream << it.value()->documents;
99 }
100 idxFile.close();
101
102 QFile docFile(documentFile);
103 if (!(status = docFile.open(QFile::WriteOnly)))
104 return status;
105
106 QDataStream docStream(&docFile);
107 foreach(const QStringList list, documentList) {
108 docStream << list.at(0);
109 docStream << list.at(1);
110 }
111 docFile.close();
112
113 return status;
114}
115
116void Writer::removeIndex() const
117{
118 QFile idxFile(indexFile);
119 if (idxFile.exists())
120 idxFile.remove();
121
122 QFile docFile(documentFile);
123 if (docFile.exists())
124 docFile.remove();
125}
126
127void Writer::setIndexFile(const QString &namespaceName, const QString &attributes)
128{
129 QString extention = namespaceName + QLatin1String("@") + attributes;
130 indexFile = indexPath + QLatin1String("/indexdb40.") + extention;
131 documentFile = indexPath + QLatin1String("/indexdoc40.") + extention;
132}
133
134void Writer::insertInIndex(const QString &string, int docNum)
135{
136 if (string == QLatin1String("amp") || string == QLatin1String("nbsp"))
137 return;
138
139 Entry *entry = 0;
140 if (index.count())
141 entry = index[string];
142
143 if (entry) {
144 if (entry->documents.last().docNumber != docNum)
145 entry->documents.append(Document(docNum, 1));
146 else
147 entry->documents.last().frequency++;
148 } else {
149 index.insert(string, new Entry(docNum));
150 }
151}
152
153void Writer::insertInDocumentList(const QString &title, const QString &url)
154{
155 documentList.append(QStringList(title) << url);
156}
157
158
159QHelpSearchIndexWriter::QHelpSearchIndexWriter()
160 : QThread()
161 , m_cancel(false)
162{
163 // nothing todo
164}
165
166QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
167{
168 mutex.lock();
169 this->m_cancel = true;
170 waitCondition.wakeOne();
171 mutex.unlock();
172
173 wait();
174}
175
176void QHelpSearchIndexWriter::cancelIndexing()
177{
178 mutex.lock();
179 this->m_cancel = true;
180 mutex.unlock();
181}
182
183void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
184 const QString &indexFilesFolder,
185 bool reindex)
186{
187 QMutexLocker lock(&mutex);
188
189 this->m_cancel = false;
190 this->m_reindex = reindex;
191 this->m_collectionFile = collectionFile;
192 this->m_indexFilesFolder = indexFilesFolder;
193
194 start(QThread::NormalPriority);
195}
196
197void QHelpSearchIndexWriter::run()
198{
199 mutex.lock();
200
201 if (m_cancel) {
202 mutex.unlock();
203 return;
204 }
205
206 const bool reindex(this->m_reindex);
207 const QLatin1String key("DefaultSearchNamespaces");
208 const QString collectionFile(this->m_collectionFile);
209 const QString indexPath = m_indexFilesFolder;
210
211 mutex.unlock();
212
213 QHelpEngineCore engine(collectionFile, 0);
214 if (!engine.setupData())
215 return;
216
217 if (reindex)
218 engine.setCustomValue(key, QLatin1String(""));
219
220 const QStringList registeredDocs = engine.registeredDocumentations();
221 const QStringList indexedNamespaces = engine.customValue(key).toString().
222 split(QLatin1String("|"), QString::SkipEmptyParts);
223
224 emit indexingStarted();
225
226 QStringList namespaces;
227 Writer writer(indexPath);
228 foreach(const QString namespaceName, registeredDocs) {
229 mutex.lock();
230 if (m_cancel) {
231 mutex.unlock();
232 return;
233 }
234 mutex.unlock();
235
236 // if indexed, continue
237 namespaces.append(namespaceName);
238 if (indexedNamespaces.contains(namespaceName))
239 continue;
240
241 const QList<QStringList> attributeSets =
242 engine.filterAttributeSets(namespaceName);
243
244 foreach (QStringList attributes, attributeSets) {
245 // cleanup maybe old or unfinished files
246 writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@")));
247 writer.removeIndex();
248
249 QSet<QString> documentsSet;
250 const QList<QUrl> docFiles = engine.files(namespaceName, attributes);
251 foreach(QUrl url, docFiles) {
252 if (m_cancel)
253 return;
254
255 // get rid of duplicated files
256 if (url.hasFragment())
257 url.setFragment(QString());
258
259 QString s = url.toString();
260 if (s.endsWith(QLatin1String(".html"))
261 || s.endsWith(QLatin1String(".htm"))
262 || s.endsWith(QLatin1String(".txt")))
263 documentsSet.insert(s);
264 }
265
266 int docNum = 0;
267 const QStringList documentsList(documentsSet.toList());
268 foreach(const QString url, documentsList) {
269 if (m_cancel)
270 return;
271
272 QByteArray data(engine.fileData(url));
273 if (data.isEmpty())
274 continue;
275
276 QTextStream s(data);
277 QString en = QHelpGlobal::charsetFromData(data);
278 s.setCodec(QTextCodec::codecForName(en.toLatin1().constData()));
279
280 QString text = s.readAll();
281 if (text.isNull())
282 continue;
283
284 QString title = QHelpGlobal::documentTitle(text);
285
286 int j = 0;
287 int i = 0;
288 bool valid = true;
289 const QChar *buf = text.unicode();
290 QChar str[64];
291 QChar c = buf[0];
292
293 while ( j < text.length() ) {
294 if (m_cancel)
295 return;
296
297 if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {
298 valid = false;
299 if ( i > 1 )
300 writer.insertInIndex(QString(str,i), docNum);
301 i = 0;
302 c = buf[++j];
303 continue;
304 }
305 if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {
306 valid = true;
307 c = buf[++j];
308 continue;
309 }
310 if ( !valid ) {
311 c = buf[++j];
312 continue;
313 }
314 if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {
315 str[i] = c.toLower();
316 ++i;
317 } else {
318 if ( i > 1 )
319 writer.insertInIndex(QString(str,i), docNum);
320 i = 0;
321 }
322 c = buf[++j];
323 }
324 if ( i > 1 )
325 writer.insertInIndex(QString(str,i), docNum);
326
327 docNum++;
328 writer.insertInDocumentList(title, url);
329 }
330
331 if (writer.writeIndex()) {
332 engine.setCustomValue(key, addNamespace(
333 engine.customValue(key).toString(), namespaceName));
334 }
335
336 writer.reset();
337 }
338 }
339
340 QStringListIterator qsli(indexedNamespaces);
341 while (qsli.hasNext()) {
342 const QString namespaceName = qsli.next();
343 if (namespaces.contains(namespaceName))
344 continue;
345
346 const QList<QStringList> attributeSets =
347 engine.filterAttributeSets(namespaceName);
348
349 foreach (QStringList attributes, attributeSets) {
350 writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@")));
351 writer.removeIndex();
352 }
353
354 engine.setCustomValue(key, removeNamespace(
355 engine.customValue(key).toString(), namespaceName));
356 }
357
358 emit indexingFinished();
359}
360
361QString QHelpSearchIndexWriter::addNamespace(const QString namespaces,
362 const QString &namespaceName)
363{
364 QString value = namespaces;
365 if (!value.contains(namespaceName))
366 value.append(namespaceName).append(QLatin1String("|"));
367
368 return value;
369}
370
371QString QHelpSearchIndexWriter::removeNamespace(const QString namespaces,
372 const QString &namespaceName)
373{
374 QString value = namespaces;
375 if (value.contains(namespaceName))
376 value.remove(namespaceName + QLatin1String("|"));
377
378 return value;
379}
380
381 } // namespace std
382 } // namespace fulltextsearch
383} // namespace qt
384
385QT_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.