source: trunk/tools/assistant/compat/index.cpp@ 315

Last change on this file since 315 was 2, checked in by Dmitry A. Kuminov, 16 years ago

Initially imported qt-all-opensource-src-4.5.1 from Trolltech.

File size: 16.2 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4** Contact: Qt Software Information ([email protected])
5**
6** This file is part of the Qt Assistant of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial Usage
10** Licensees holding valid Qt Commercial licenses may use this file in
11** accordance with the Qt Commercial License Agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Nokia.
14**
15** GNU Lesser General Public License Usage
16** Alternatively, this file may be used under the terms of the GNU Lesser
17** General Public License version 2.1 as published by the Free Software
18** Foundation and appearing in the file LICENSE.LGPL included in the
19** packaging of this file. Please review the following information to
20** ensure the GNU Lesser General Public License version 2.1 requirements
21** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22**
23** In addition, as a special exception, Nokia gives you certain
24** additional rights. These rights are described in the Nokia Qt LGPL
25** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26** package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you are unsure which license is appropriate for your use, please
37** contact the sales department at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "index.h"
43
44#include <QFile>
45#include <QDir>
46#include <QStringList>
47#include <QApplication>
48#include <QByteArray>
49#include <QTextStream>
50#include <QtAlgorithms>
51#include <QUrl>
52#include <QTextCodec>
53#include <ctype.h>
54#include <QTextDocument>
55
56QT_BEGIN_NAMESPACE
57
58struct Term {
59 Term() : frequency(-1) {}
60 Term( const QString &t, int f, QVector<Document> l ) : term( t ), frequency( f ), documents( l ) {}
61 QString term;
62 int frequency;
63 QVector<Document>documents;
64 bool operator<( const Term &i2 ) const { return frequency < i2.frequency; }
65};
66
67QDataStream &operator>>( QDataStream &s, Document &l )
68{
69 s >> l.docNumber;
70 s >> l.frequency;
71 return s;
72}
73
74QDataStream &operator<<( QDataStream &s, const Document &l )
75{
76 s << (qint16)l.docNumber;
77 s << (qint16)l.frequency;
78 return s;
79}
80
81Index::Index( const QString &dp, const QString &hp )
82 : QObject( 0 ), docPath( dp )
83{
84 Q_UNUSED(hp);
85
86 alreadyHaveDocList = false;
87 lastWindowClosed = false;
88 connect( qApp, SIGNAL(lastWindowClosed()),
89 this, SLOT(setLastWinClosed()) );
90}
91
92Index::Index( const QStringList &dl, const QString &hp )
93 : QObject( 0 )
94{
95 Q_UNUSED(hp);
96 docList = dl;
97 alreadyHaveDocList = true;
98 lastWindowClosed = false;
99 connect( qApp, SIGNAL(lastWindowClosed()),
100 this, SLOT(setLastWinClosed()) );
101}
102
103void Index::setLastWinClosed()
104{
105 lastWindowClosed = true;
106}
107
108void Index::setDictionaryFile( const QString &f )
109{
110 dictFile = f;
111}
112
113void Index::setDocListFile( const QString &f )
114{
115 docListFile = f;
116}
117
118void Index::setDocList( const QStringList &lst )
119{
120 docList = lst;
121}
122
123int Index::makeIndex()
124{
125 if ( !alreadyHaveDocList )
126 setupDocumentList();
127 if ( docList.isEmpty() )
128 return 1;
129 QStringList::Iterator it = docList.begin();
130 int steps = docList.count() / 100;
131 if ( !steps )
132 steps++;
133 int prog = 0;
134 for ( int i = 0; it != docList.end(); ++it, ++i ) {
135 if ( lastWindowClosed ) {
136 return -1;
137 }
138 QUrl url(*it);
139 parseDocument( url.toLocalFile(), i );
140 if ( i%steps == 0 ) {
141 prog++;
142 emit indexingProgress( prog );
143 }
144 }
145 return 0;
146}
147
148void Index::setupDocumentList()
149{
150 QDir d( docPath );
151 QStringList filters;
152 filters.append(QLatin1String("*.html"));
153 QStringList lst = d.entryList(filters);
154 QStringList::ConstIterator it = lst.constBegin();
155 for ( ; it != lst.constEnd(); ++it )
156 docList.append( QLatin1String("file:") + docPath + QLatin1String("/") + *it );
157}
158
159void Index::insertInDict( const QString &str, int docNum )
160{
161 if ( str == QLatin1String("amp") || str == QLatin1String("nbsp"))
162 return;
163 Entry *e = 0;
164 if ( dict.count() )
165 e = dict[ str ];
166
167 if ( e ) {
168 if ( e->documents.last().docNumber != docNum )
169 e->documents.append( Document(docNum, 1 ) );
170 else
171 e->documents.last().frequency++;
172 } else {
173 dict.insert( str, new Entry( docNum ) );
174 }
175}
176
177QString Index::getCharsetForDocument(QFile *file)
178{
179 QTextStream s(file);
180 QString contents = s.readAll();
181
182 QString encoding;
183 int start = contents.indexOf(QLatin1String("<meta"), 0, Qt::CaseInsensitive);
184 if (start > 0) {
185 int end = contents.indexOf(QLatin1String(">"), start);
186 QString meta = contents.mid(start+5, end-start);
187 meta = meta.toLower();
188 QRegExp r(QLatin1String("charset=([^\"\\s]+)"));
189 if (r.indexIn(meta) != -1) {
190 encoding = r.cap(1);
191 }
192 }
193
194 file->seek(0);
195 if (encoding.isEmpty())
196 return QLatin1String("utf-8");
197 return encoding;
198}
199
200void Index::parseDocument( const QString &filename, int docNum )
201{
202 QFile file( filename );
203 if ( !file.open(QFile::ReadOnly) ) {
204 qWarning( "can not open file %s", qPrintable(filename) );
205 return;
206 }
207
208 QTextStream s(&file);
209 QString en = getCharsetForDocument(&file);
210 s.setCodec(QTextCodec::codecForName(en.toLatin1().constData()));
211
212 QString text = s.readAll();
213 if (text.isNull())
214 return;
215
216 bool valid = true;
217 const QChar *buf = text.unicode();
218 QChar str[64];
219 QChar c = buf[0];
220 int j = 0;
221 int i = 0;
222 while ( j < text.length() ) {
223 if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {
224 valid = false;
225 if ( i > 1 )
226 insertInDict( QString(str,i), docNum );
227 i = 0;
228 c = buf[++j];
229 continue;
230 }
231 if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {
232 valid = true;
233 c = buf[++j];
234 continue;
235 }
236 if ( !valid ) {
237 c = buf[++j];
238 continue;
239 }
240 if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {
241 str[i] = c.toLower();
242 ++i;
243 } else {
244 if ( i > 1 )
245 insertInDict( QString(str,i), docNum );
246 i = 0;
247 }
248 c = buf[++j];
249 }
250 if ( i > 1 )
251 insertInDict( QString(str,i), docNum );
252 file.close();
253}
254
255void Index::writeDict()
256{
257 QFile f( dictFile );
258 if ( !f.open(QFile::WriteOnly ) )
259 return;
260 QDataStream s( &f );
261 for(QHash<QString, Entry *>::Iterator it = dict.begin(); it != dict.end(); ++it) {
262 s << it.key();
263 s << it.value()->documents.count();
264 s << it.value()->documents;
265 }
266 f.close();
267 writeDocumentList();
268}
269
270void Index::writeDocumentList()
271{
272 QFile f( docListFile );
273 if ( !f.open(QFile::WriteOnly ) )
274 return;
275 QDataStream s( &f );
276 s << docList;
277}
278
279void Index::readDict()
280{
281 QFile f( dictFile );
282 if ( !f.open(QFile::ReadOnly ) )
283 return;
284
285 dict.clear();
286 QDataStream s( &f );
287 QString key;
288 int numOfDocs;
289 QVector<Document> docs;
290 while ( !s.atEnd() ) {
291 s >> key;
292 s >> numOfDocs;
293 docs.resize(numOfDocs);
294 s >> docs;
295 dict.insert( key, new Entry( docs ) );
296 }
297 f.close();
298 readDocumentList();
299}
300
301void Index::readDocumentList()
302{
303 QFile f( docListFile );
304 if ( !f.open(QFile::ReadOnly ) )
305 return;
306 QDataStream s( &f );
307 s >> docList;
308}
309
310QStringList Index::query( const QStringList &terms, const QStringList &termSeq, const QStringList &seqWords )
311{
312 QList<Term> termList;
313 for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it ) {
314 Entry *e = 0;
315 if ( (*it).contains(QLatin1Char('*')) ) {
316 QVector<Document> wcts = setupDummyTerm( getWildcardTerms( *it ) );
317 termList.append( Term(QLatin1String("dummy"), wcts.count(), wcts ) );
318 } else if ( dict[ *it ] ) {
319 e = dict[ *it ];
320 termList.append( Term( *it, e->documents.count(), e->documents ) );
321 } else {
322 return QStringList();
323 }
324 }
325 if ( !termList.count() )
326 return QStringList();
327 qSort(termList);
328
329 QVector<Document> minDocs = termList.takeFirst().documents;
330 for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) {
331 Term *t = &(*it);
332 QVector<Document> docs = t->documents;
333 for(QVector<Document>::Iterator minDoc_it = minDocs.begin(); minDoc_it != minDocs.end(); ) {
334 bool found = false;
335 for (QVector<Document>::ConstIterator doc_it = docs.constBegin(); doc_it != docs.constEnd(); ++doc_it ) {
336 if ( (*minDoc_it).docNumber == (*doc_it).docNumber ) {
337 (*minDoc_it).frequency += (*doc_it).frequency;
338 found = true;
339 break;
340 }
341 }
342 if ( !found )
343 minDoc_it = minDocs.erase( minDoc_it );