source: trunk/tools/assistant/lib/qhelpsearchindexwriter_default.cpp@ 651

Last change on this file since 651 was 651, checked in by Dmitry A. Kuminov, 15 years ago

trunk: Merged in qt 4.6.2 sources.

File size: 11.3 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
4** All rights reserved.
5** Contact: Nokia Corporation ([email protected])
6**
7** This file is part of the Qt Assistant of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial Usage
11** Licensees holding valid Qt Commercial licenses may use this file in
12** accordance with the Qt Commercial License Agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and Nokia.
15**
16** GNU Lesser General Public License Usage
17** Alternatively, this file may be used under the terms of the GNU Lesser
18** General Public License version 2.1 as published by the Free Software
19** Foundation and appearing in the file LICENSE.LGPL included in the
20** packaging of this file. Please review the following information to
21** ensure the GNU Lesser General Public License version 2.1 requirements
22** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23**
24** In addition, as a special exception, Nokia gives you certain additional
25** rights. These rights are described in the Nokia Qt LGPL Exception
26** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you have questions regarding the use of this file, please contact
37** Nokia at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "qhelpsearchindexwriter_default_p.h"
43#include "qhelp_global.h"
44#include "qhelpenginecore.h"
45
46#include <QtCore/QDir>
47#include <QtCore/QSet>
48#include <QtCore/QUrl>
49#include <QtCore/QFile>
50#include <QtCore/QRegExp>
51#include <QtCore/QVariant>
52#include <QtCore/QFileInfo>
53#include <QtCore/QTextCodec>
54#include <QtCore/QTextStream>
55
56QT_BEGIN_NAMESPACE
57
58namespace qt {
59 namespace fulltextsearch {
60 namespace std {
61
62Writer::Writer(const QString &path)
63 : indexPath(path)
64 , indexFile(QString())
65 , documentFile(QString())
66{
67 // nothing todo
68}
69
70Writer::~Writer()
71{
72 reset();
73}
74
75void Writer::reset()
76{
77 for(QHash<QString, Entry*>::ConstIterator it =
78 index.begin(); it != index.end(); ++it) {
79 delete it.value();
80 }
81
82 index.clear();
83 documentList.clear();
84}
85
86bool Writer::writeIndex() const
87{
88 bool status;
89 QFile idxFile(indexFile);
90 if (!(status = idxFile.open(QFile::WriteOnly)))
91 return status;
92
93 QDataStream indexStream(&idxFile);
94 for(QHash<QString, Entry*>::ConstIterator it =
95 index.begin(); it != index.end(); ++it) {
96 indexStream << it.key();
97 indexStream << it.value()->documents.count();
98 indexStream << it.value()->documents;
99 }
100 idxFile.close();
101
102 QFile docFile(documentFile);
103 if (!(status = docFile.open(QFile::WriteOnly)))
104 return status;
105
106 QDataStream docStream(&docFile);
107 foreach(const QStringList list, documentList) {
108 docStream << list.at(0);
109 docStream << list.at(1);
110 }
111 docFile.close();
112
113 return status;
114}
115
116void Writer::removeIndex() const
117{
118 QFile idxFile(indexFile);
119 if (idxFile.exists())
120 idxFile.remove();
121
122 QFile docFile(documentFile);
123 if (docFile.exists())
124 docFile.remove();
125}
126
127void Writer::setIndexFile(const QString &namespaceName, const QString &attributes)
128{
129 QString extention = namespaceName + QLatin1String("@") + attributes;
130 indexFile = indexPath + QLatin1String("/indexdb40.") + extention;
131 documentFile = indexPath + QLatin1String("/indexdoc40.") + extention;
132}
133
134void Writer::insertInIndex(const QString &string, int docNum)
135{
136 if (string == QLatin1String("amp") || string == QLatin1String("nbsp"))
137 return;
138
139 Entry *entry = 0;
140 if (index.count())
141 entry = index[string];
142
143 if (entry) {
144 if (entry->documents.last().docNumber != docNum)
145 entry->documents.append(Document(docNum, 1));
146 else
147 entry->documents.last().frequency++;
148 } else {
149 index.insert(string, new Entry(docNum));
150 }
151}
152
153void Writer::insertInDocumentList(const QString &title, const QString &url)
154{
155 documentList.append(QStringList(title) << url);
156}
157
158
159QHelpSearchIndexWriter::QHelpSearchIndexWriter()
160 : QThread()
161 , m_cancel(false)
162{
163 // nothing todo
164}
165
166QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
167{
168 mutex.lock();
169 this->m_cancel = true;
170 waitCondition.wakeOne();
171 mutex.unlock();
172
173 wait();
174}
175
176void QHelpSearchIndexWriter::cancelIndexing()
177{
178 mutex.lock();
179 this->m_cancel = true;
180 mutex.unlock();
181}
182
183void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
184 const QString &indexFilesFolder,
185 bool reindex)
186{
187 wait();
188 QMutexLocker lock(&mutex);
189
190 this->m_cancel = false;
191 this->m_reindex = reindex;
192 this->m_collectionFile = collectionFile;
193 this->m_indexFilesFolder = indexFilesFolder;
194
195 start(QThread::NormalPriority);
196}
197
198void QHelpSearchIndexWriter::run()
199{
200 mutex.lock();
201
202 if (m_cancel) {
203 mutex.unlock();
204 return;
205 }
206
207 const bool reindex(this->m_reindex);
208 const QLatin1String key("DefaultSearchNamespaces");
209 const QString collectionFile(this->m_collectionFile);
210 const QString indexPath = m_indexFilesFolder;
211
212 mutex.unlock();
213
214 QHelpEngineCore engine(collectionFile, 0);
215 if (!engine.setupData())
216 return;
217
218 if (reindex)
219 engine.setCustomValue(key, QLatin1String(""));
220
221 const QStringList registeredDocs = engine.registeredDocumentations();
222 const QStringList indexedNamespaces = engine.customValue(key).toString().
223 split(QLatin1String("|"), QString::SkipEmptyParts);
224
225 emit indexingStarted();
226
227 QStringList namespaces;
228 Writer writer(indexPath);
229 foreach(const QString namespaceName, registeredDocs) {
230 mutex.lock();
231 if (m_cancel) {
232 mutex.unlock();
233 return;
234 }
235 mutex.unlock();
236
237 // if indexed, continue
238 namespaces.append(namespaceName);
239 if (indexedNamespaces.contains(namespaceName))
240 continue;
241
242 const QList<QStringList> attributeSets =
243 engine.filterAttributeSets(namespaceName);
244
245 foreach (QStringList attributes, attributeSets) {
246 // cleanup maybe old or unfinished files
247 writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@")));
248 writer.removeIndex();
249
250 QSet<QString> documentsSet;
251 const QList<QUrl> docFiles = engine.files(namespaceName, attributes);
252 foreach(QUrl url, docFiles) {
253 if (m_cancel)
254 return;
255
256 // get rid of duplicated files
257 if (url.hasFragment())
258 url.setFragment(QString());
259
260 QString s = url.toString();
261 if (s.endsWith(QLatin1String(".html"))
262 || s.endsWith(QLatin1String(".htm"))
263 || s.endsWith(QLatin1String(".txt")))
264 documentsSet.insert(s);
265 }
266
267 int docNum = 0;
268 const QStringList documentsList(documentsSet.toList());
269 foreach(const QString url, documentsList) {
270 if (m_cancel)
271 return;
272
273 QByteArray data(engine.fileData(url));
274 if (data.isEmpty())
275 continue;
276
277 QTextStream s(data);
278 QString en = QHelpGlobal::codecFromData(data);
279 s.setCodec(QTextCodec::codecForName(en.toLatin1().constData()));
280
281 QString text = s.readAll();
282 if (text.isNull())
283 continue;
284
285 QString title = QHelpGlobal::documentTitle(text);
286
287 int j = 0;
288 int i = 0;
289 bool valid = true;
290 const QChar *buf = text.unicode();
291 QChar str[64];
292 QChar c = buf[0];
293
294 while ( j < text.length() ) {
295 if (m_cancel)
296 return;
297
298 if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {
299 valid = false;
300 if ( i > 1 )
301 writer.insertInIndex(QString(str,i), docNum);
302 i = 0;
303 c = buf[++j];
304 continue;
305 }
306 if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {
307 valid = true;
308 c = buf[++j];
309 continue;
310 }
311 if ( !valid ) {
312 c = buf[++j];
313 continue;
314 }
315 if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {
316 str[i] = c.toLower();
317 ++i;
318 } else {
319 if ( i > 1 )
320 writer.insertInIndex(QString(str,i), docNum);
321 i = 0;
322 }
323 c = buf[++j];
324 }
325 if ( i > 1 )
326 writer.insertInIndex(QString(str,i), docNum);
327
328 docNum++;
329 writer.insertInDocumentList(title, url);
330 }
331
332 if (writer.writeIndex()) {
333 engine.setCustomValue(key, addNamespace(
334 engine.customValue(key).toString(), namespaceName));
335 }
336
337 writer.reset();
338 }
339 }
340
341 QStringListIterator qsli(indexedNamespaces);
342 while (qsli.hasNext()) {
343 const QString namespaceName = qsli.next();
344 if (namespaces.contains(namespaceName))
345 continue;
346
347 const QList<QStringList> attributeSets =
348 engine.filterAttributeSets(namespaceName);
349
350 foreach (QStringList attributes, attributeSets) {
351 writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@")));
352 writer.removeIndex();
353 }
354
355 engine.setCustomValue(key, removeNamespace(
356 engine.customValue(key).toString(), namespaceName));
357 }
358
359 emit indexingFinished();
360}
361
362QString QHelpSearchIndexWriter::addNamespace(const QString namespaces,
363 const QString &namespaceName)
364{
365 QString value = namespaces;
366 if (!value.contains(namespaceName))
367 value.append(namespaceName).append(QLatin1String("|"));
368
369 return value;
370}
371
372QString QHelpSearchIndexWriter::removeNamespace(const QString namespaces,
373 const QString &namespaceName)
374{
375 QString value = namespaces;
376 if (value.contains(namespaceName))
377 value.remove(namespaceName + QLatin1String("|"));
378
379 return value;
380}
381
382 } // namespace std
383 } // namespace fulltextsearch
384} // namespace qt
385
386QT_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.