1 | /****************************************************************************
|
---|
2 | **
|
---|
3 | ** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
|
---|
4 | ** All rights reserved.
|
---|
5 | ** Contact: Nokia Corporation ([email protected])
|
---|
6 | **
|
---|
7 | ** This file is part of the tools applications of the Qt Toolkit.
|
---|
8 | **
|
---|
9 | ** $QT_BEGIN_LICENSE:LGPL$
|
---|
10 | ** Commercial Usage
|
---|
11 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
---|
12 | ** accordance with the Qt Commercial License Agreement provided with the
|
---|
13 | ** Software or, alternatively, in accordance with the terms contained in
|
---|
14 | ** a written agreement between you and Nokia.
|
---|
15 | **
|
---|
16 | ** GNU Lesser General Public License Usage
|
---|
17 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
---|
18 | ** General Public License version 2.1 as published by the Free Software
|
---|
19 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
---|
20 | ** packaging of this file. Please review the following information to
|
---|
21 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
---|
22 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
---|
23 | **
|
---|
24 | ** In addition, as a special exception, Nokia gives you certain additional
|
---|
25 | ** rights. These rights are described in the Nokia Qt LGPL Exception
|
---|
26 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
---|
27 | **
|
---|
28 | ** GNU General Public License Usage
|
---|
29 | ** Alternatively, this file may be used under the terms of the GNU
|
---|
30 | ** General Public License version 3.0 as published by the Free Software
|
---|
31 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
32 | ** packaging of this file. Please review the following information to
|
---|
33 | ** ensure the GNU General Public License version 3.0 requirements will be
|
---|
34 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
---|
35 | **
|
---|
36 | ** If you have questions regarding the use of this file, please contact
|
---|
37 | ** Nokia at [email protected].
|
---|
38 | ** $QT_END_LICENSE$
|
---|
39 | **
|
---|
40 | ****************************************************************************/
|
---|
41 |
|
---|
42 | #include "config.h"
|
---|
43 | #include "tokenizer.h"
|
---|
44 |
|
---|
45 | #include <qdebug.h>
|
---|
46 | #include <qfile.h>
|
---|
47 | #include <qhash.h>
|
---|
48 | #include <qregexp.h>
|
---|
49 | #include <qstring.h>
|
---|
50 | #include <qtextcodec.h>
|
---|
51 |
|
---|
52 | #include <ctype.h>
|
---|
53 | #include <string.h>
|
---|
54 |
|
---|
55 | QT_BEGIN_NAMESPACE
|
---|
56 |
|
---|
57 | #define LANGUAGE_CPP "Cpp"
|
---|
58 |
|
---|
59 | /* qmake ignore Q_OBJECT */
|
---|
60 |
|
---|
61 | /*
|
---|
62 | Keep in sync with tokenizer.h.
|
---|
63 | */
|
---|
64 | static const char *kwords[] = {
|
---|
65 | "char", "class", "const", "double", "enum", "explicit",
|
---|
66 | "friend", "inline", "int", "long", "namespace", "operator",
|
---|
67 | "private", "protected", "public", "short", "signals", "signed",
|
---|
68 | "slots", "static", "struct", "template", "typedef", "typename",
|
---|
69 | "union", "unsigned", "using", "virtual", "void", "volatile",
|
---|
70 | "__int64",
|
---|
71 | "Q_OBJECT",
|
---|
72 | "Q_OVERRIDE",
|
---|
73 | "Q_PROPERTY",
|
---|
74 | "Q_PRIVATE_PROPERTY",
|
---|
75 | "Q_DECLARE_SEQUENTIAL_ITERATOR",
|
---|
76 | "Q_DECLARE_MUTABLE_SEQUENTIAL_ITERATOR",
|
---|
77 | "Q_DECLARE_ASSOCIATIVE_ITERATOR",
|
---|
78 | "Q_DECLARE_MUTABLE_ASSOCIATIVE_ITERATOR",
|
---|
79 | "Q_DECLARE_FLAGS",
|
---|
80 | "Q_SIGNALS",
|
---|
81 | "Q_SLOTS",
|
---|
82 | "QT_COMPAT",
|
---|
83 | "QT_COMPAT_CONSTRUCTOR",
|
---|
84 | "QT_DEPRECATED",
|
---|
85 | "QT_MOC_COMPAT",
|
---|
86 | "QT_MODULE",
|
---|
87 | "QT3_SUPPORT",
|
---|
88 | "QT3_SUPPORT_CONSTRUCTOR",
|
---|
89 | "QT3_MOC_SUPPORT",
|
---|
90 | "QDOC_PROPERTY"
|
---|
91 | };
|
---|
92 |
|
---|
93 | static const int KwordHashTableSize = 4096;
|
---|
94 | static int kwordHashTable[KwordHashTableSize];
|
---|
95 |
|
---|
96 | static QHash<QByteArray, bool> *ignoredTokensAndDirectives = 0;
|
---|
97 |
|
---|
98 | static QRegExp *comment = 0;
|
---|
99 | static QRegExp *versionX = 0;
|
---|
100 | static QRegExp *definedX = 0;
|
---|
101 |
|
---|
102 | static QRegExp *defines = 0;
|
---|
103 | static QRegExp *falsehoods = 0;
|
---|
104 |
|
---|
105 | static QTextCodec *sourceCodec = 0;
|
---|
106 |
|
---|
107 | /*
|
---|
108 | This function is a perfect hash function for the 37 keywords of C99
|
---|
109 | (with a hash table size of 512). It should perform well on our
|
---|
110 | Qt-enhanced C++ subset.
|
---|
111 | */
|
---|
112 | static int hashKword(const char *s, int len)
|
---|
113 | {
|
---|
114 | return (((uchar) s[0]) + (((uchar) s[2]) << 5) +
|
---|
115 | (((uchar) s[len - 1]) << 3)) % KwordHashTableSize;
|
---|
116 | }
|
---|
117 |
|
---|
118 | static void insertKwordIntoHash(const char *s, int number)
|
---|
119 | {
|
---|
120 | int k = hashKword(s, strlen(s));
|
---|
121 | while (kwordHashTable[k]) {
|
---|
122 | if (++k == KwordHashTableSize)
|
---|
123 | k = 0;
|
---|
124 | }
|
---|
125 | kwordHashTable[k] = number;
|
---|
126 | }
|
---|
127 |
|
---|
128 | Tokenizer::Tokenizer(const Location& loc, QFile &in)
|
---|
129 | {
|
---|
130 | init();
|
---|
131 | yyIn = in.readAll();
|
---|
132 | yyPos = 0;
|
---|
133 | start(loc);
|
---|
134 | }
|
---|
135 |
|
---|
136 | Tokenizer::Tokenizer(const Location& loc, const QByteArray &in)
|
---|
137 | : yyIn(in)
|
---|
138 | {
|
---|
139 | init();
|
---|
140 | yyPos = 0;
|
---|
141 | start(loc);
|
---|
142 | }
|
---|
143 |
|
---|
144 | Tokenizer::~Tokenizer()
|
---|
145 | {
|
---|
146 | delete[] yyLexBuf1;
|
---|
147 | delete[] yyLexBuf2;
|
---|
148 | }
|
---|
149 |
|
---|
150 | int Tokenizer::getToken()
|
---|
151 | {
|
---|
152 | char *t = yyPrevLex;
|
---|
153 | yyPrevLex = yyLex;
|
---|
154 | yyLex = t;
|
---|
155 |
|
---|
156 | while (yyCh != EOF) {
|
---|
157 | yyTokLoc = yyCurLoc;
|
---|
158 | yyLexLen = 0;
|
---|
159 |
|
---|
160 | if (isspace(yyCh)) {
|
---|
161 | do {
|
---|
162 | yyCh = getChar();
|
---|
163 | } while (isspace(yyCh));
|
---|
164 | }
|
---|
165 | else if (isalpha(yyCh) || yyCh == '_') {
|
---|
166 | do {
|
---|
167 | yyCh = getChar();
|
---|
168 | } while (isalnum(yyCh) || yyCh == '_');
|
---|
169 |
|
---|
170 | int k = hashKword(yyLex, yyLexLen);
|
---|
171 | for (;;) {
|
---|
172 | int i = kwordHashTable[k];
|
---|
173 | if (i == 0) {
|
---|
174 | return Tok_Ident;
|
---|
175 | }
|
---|
176 | else if (i == -1) {
|
---|
177 | if (!parsingMacro && ignoredTokensAndDirectives->contains(yyLex)) {
|
---|
178 | if (ignoredTokensAndDirectives->value(yyLex)) { // it's a directive
|
---|
179 | int parenDepth = 0;
|
---|
180 | while (yyCh != EOF && (yyCh != ')' || parenDepth > 1)) {
|
---|
181 | if (yyCh == '(')
|
---|
182 | ++parenDepth;
|
---|
183 | else if (yyCh == ')')
|
---|
184 | --parenDepth;
|
---|
185 | yyCh = getChar();
|
---|
186 | }
|
---|
187 | if (yyCh == ')')
|
---|
188 | yyCh = getChar();
|
---|
189 | }
|
---|
190 | break;
|
---|
191 | }
|
---|
192 | }
|
---|
193 | else if (strcmp(yyLex, kwords[i - 1]) == 0) {
|
---|
194 | int ret = (int) Tok_FirstKeyword + i - 1;
|
---|
195 | if (ret != Tok_explicit && ret != Tok_inline && ret != Tok_typename)
|
---|
196 | return ret;
|
---|
197 | break;
|
---|
198 | }
|
---|
199 |
|
---|
200 | if (++k == KwordHashTableSize)
|
---|
201 | k = 0;
|
---|
202 | }
|
---|
203 | }
|
---|
204 | else if (isdigit(yyCh)) {
|
---|
205 | do {
|
---|
206 | yyCh = getChar();
|
---|
207 | } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' ||
|
---|
208 | yyCh == '-');
|
---|
209 | return Tok_Number;
|
---|
210 | }
|
---|
211 | else {
|
---|
212 | switch (yyCh) {
|
---|
213 | case '!':
|
---|
214 | case '%':
|
---|
215 | yyCh = getChar();
|
---|
216 | if (yyCh == '=')
|
---|
217 | yyCh = getChar();
|
---|
218 | return Tok_SomeOperator;
|
---|
219 | case '"':
|
---|
220 | yyCh = getChar();
|
---|
221 |
|
---|
222 | while (yyCh != EOF && yyCh != '"') {
|
---|
223 | if (yyCh == '\\')
|
---|
224 | yyCh = getChar();
|
---|
225 | yyCh = getChar();
|
---|
226 | }
|
---|
227 | yyCh = getChar();
|
---|
228 |
|
---|
229 | if (yyCh == EOF)
|
---|
230 | yyTokLoc.warning(tr("Unterminated C++ string literal"),
|
---|
231 | tr("Maybe you forgot '/*!' at the beginning of the file?"));
|
---|
232 | else
|
---|
233 | return Tok_String;
|
---|
234 | break;
|
---|
235 | case '#':
|
---|
236 | return getTokenAfterPreprocessor();
|
---|
237 | case '&':
|
---|
238 | yyCh = getChar();
|
---|
239 | if (yyCh == '&' || yyCh == '=') {
|
---|
240 | yyCh = getChar();
|
---|
241 | return Tok_SomeOperator;
|
---|
242 | }
|
---|
243 | else {
|
---|
244 | return Tok_Ampersand;
|
---|
245 | }
|
---|
246 | case '\'':
|
---|
247 | yyCh = getChar();
|
---|
248 | if (yyCh == '\\')
|
---|
249 | yyCh = getChar();
|
---|
250 | do {
|
---|
251 | yyCh = getChar();
|
---|
252 | } while (yyCh != EOF && yyCh != '\'');
|
---|
253 |
|
---|
254 | if (yyCh == EOF) {
|
---|
255 | yyTokLoc.warning(tr("Unterminated C++ character"
|
---|
256 | " literal"));
|
---|
257 | }
|
---|
258 | else {
|
---|
259 | yyCh = getChar();
|
---|
260 | return Tok_Number;
|
---|
261 | }
|
---|
262 | break;
|
---|
263 | case '(':
|
---|
264 | yyCh = getChar();
|
---|
265 | if (yyNumPreprocessorSkipping == 0)
|
---|
266 | yyParenDepth++;
|
---|
267 | if (isspace(yyCh)) {
|
---|
268 | do {
|
---|
269 | yyCh = getChar();
|
---|
270 | } while (isspace(yyCh));
|
---|
271 | yyLexLen = 1;
|
---|
272 | yyLex[1] = '\0';
|
---|
273 | }
|
---|
274 | if (yyCh == '*') {
|
---|
275 | yyCh = getChar();
|
---|
276 | return Tok_LeftParenAster;
|
---|
277 | }
|
---|
278 | return Tok_LeftParen;
|
---|
279 | case ')':
|
---|
280 | yyCh = getChar();
|
---|
281 | if (yyNumPreprocessorSkipping == 0)
|
---|
282 | yyParenDepth--;
|
---|
283 | return Tok_RightParen;
|
---|
284 | case '*':
|
---|
285 | yyCh = getChar();
|
---|
286 | if (yyCh == '=') {
|
---|
287 | yyCh = getChar();
|
---|
288 | return Tok_SomeOperator;
|
---|
289 | } else {
|
---|
290 | return Tok_Aster;
|
---|
291 | }
|
---|
292 | case '^':
|
---|
293 | yyCh = getChar();
|
---|
294 | if (yyCh == '=') {
|
---|
295 | yyCh = getChar();
|
---|
296 | return Tok_SomeOperator;
|
---|
297 | } else {
|
---|
298 | return Tok_Caret;
|
---|
299 | }
|
---|
300 | case '+':
|
---|
301 | yyCh = getChar();
|
---|
302 | if (yyCh == '+' || yyCh == '=')
|
---|
303 | yyCh = getChar();
|
---|
304 | return Tok_SomeOperator;
|
---|
305 | case ',':
|
---|
306 | yyCh = getChar();
|
---|
307 | return Tok_Comma;
|
---|
308 | case '-':
|
---|
309 | yyCh = getChar();
|
---|
310 | if (yyCh == '-' || yyCh == '=') {
|
---|
311 | yyCh = getChar();
|
---|
312 | } else if (yyCh == '>') {
|
---|
313 | yyCh = getChar();
|
---|
314 | if (yyCh == '*')
|
---|
315 | yyCh = getChar();
|
---|
316 | }
|
---|
317 | return Tok_SomeOperator;
|
---|
318 | case '.':
|
---|
319 | yyCh = getChar();
|
---|
320 | if (yyCh == '*') {
|
---|
321 | yyCh = getChar();
|
---|
322 | } else if (yyCh == '.') {
|
---|
323 | do {
|
---|
324 | yyCh = getChar();
|
---|
325 | } while (yyCh == '.');
|
---|
326 | return Tok_Ellipsis;
|
---|
327 | } else if (isdigit(yyCh)) {
|
---|
328 | do {
|
---|
329 | yyCh = getChar();
|
---|
330 | } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' ||
|
---|
331 | yyCh == '-');
|
---|
332 | return Tok_Number;
|
---|
333 | }
|
---|
334 | return Tok_SomeOperator;
|
---|
335 | case '/':
|
---|
336 | yyCh = getChar();
|
---|
337 | if (yyCh == '/') {
|
---|
338 | do {
|
---|
339 | yyCh = getChar();
|
---|
340 | } while (yyCh != EOF && yyCh != '\n');
|
---|
341 | } else if (yyCh == '*') {
|
---|
342 | bool metDoc = false; // empty doc is no doc
|
---|
343 | bool metSlashAsterBang = false;
|
---|
344 | bool metAster = false;
|
---|
345 | bool metAsterSlash = false;
|
---|
346 |
|
---|
347 | yyCh = getChar();
|
---|
348 | if (yyCh == '!')
|
---|
349 | metSlashAsterBang = true;
|
---|
350 |
|
---|
351 | while (!metAsterSlash) {
|
---|
352 | if (yyCh == EOF) {
|
---|
353 | yyTokLoc.warning(tr("Unterminated C++ comment"));
|
---|
354 | break;
|
---|
355 | } else {
|
---|
356 | if (yyCh == '*') {
|
---|
357 | metAster = true;
|
---|
358 | } else if (metAster && yyCh == '/') {
|
---|
359 | metAsterSlash = true;
|
---|
360 | } else {
|
---|
361 | metAster = false;
|
---|
362 | if (isgraph(yyCh))
|
---|
363 | metDoc = true;
|
---|
364 | }
|
---|
365 | }
|
---|
366 | yyCh = getChar();
|
---|
367 | }
|
---|
368 | if (metSlashAsterBang && metDoc)
|
---|
369 | return Tok_Doc;
|
---|
370 | else if (yyParenDepth > 0)
|
---|
371 | return Tok_Comment;
|
---|
372 | } else {
|
---|
373 | if (yyCh == '=')
|
---|
374 | yyCh = getChar();
|
---|
375 | return Tok_SomeOperator;
|
---|
376 | }
|
---|
377 | break;
|
---|
378 | case ':':
|
---|
379 | yyCh = getChar();
|
---|
380 | if (yyCh == ':') {
|
---|
381 | yyCh = getChar();
|
---|
382 | return Tok_Gulbrandsen;
|
---|
383 | } else {
|
---|
384 | return Tok_Colon;
|
---|
385 | }
|
---|
386 | case ';':
|
---|
387 | yyCh = getChar();
|
---|
388 | return Tok_Semicolon;
|
---|
389 | case '<':
|
---|
390 | yyCh = getChar();
|
---|
391 | if (yyCh == '<') {
|
---|
392 | yyCh = getChar();
|
---|
393 | if (yyCh == '=')
|
---|
394 | yyCh = getChar();
|
---|
395 | return Tok_SomeOperator;
|
---|
396 | } else if (yyCh == '=') {
|
---|
397 | yyCh = getChar();
|
---|
398 | return Tok_SomeOperator;
|
---|
399 | } else {
|
---|
400 | return Tok_LeftAngle;
|
---|
401 | }
|
---|
402 | case '=':
|
---|
403 | yyCh = getChar();
|
---|
404 | if (yyCh == '=') {
|
---|
405 | yyCh = getChar();
|
---|
406 | return Tok_SomeOperator;
|
---|
407 | } else {
|
---|
408 | return Tok_Equal;
|
---|
409 | }
|
---|
410 | case '>':
|
---|
411 | yyCh = getChar();
|
---|
412 | if (yyCh == '>') {
|
---|
413 | yyCh = getChar();
|
---|
414 | if (yyCh == '=')
|
---|
415 | yyCh = getChar();
|
---|
416 | return Tok_SomeOperator;
|
---|
417 | } else if (yyCh == '=') {
|
---|
418 | yyCh = getChar();
|
---|
419 | return Tok_SomeOperator;
|
---|
420 | } else {
|
---|
421 | return Tok_RightAngle;
|
---|
422 | }
|
---|
423 | case '?':
|
---|
424 | yyCh = getChar();
|
---|
425 | return Tok_SomeOperator;
|
---|
426 | case '[':
|
---|
427 | yyCh = getChar();
|
---|
428 | if (yyNumPreprocessorSkipping == 0)
|
---|
429 | yyBracketDepth++;
|
---|
430 | return Tok_LeftBracket;
|
---|
431 | case '\\':
|
---|
432 | yyCh = getChar();
|
---|
433 | yyCh = getChar(); // skip one character
|
---|
434 | break;
|
---|
435 | case ']':
|
---|
436 | yyCh = getChar();
|
---|
437 | if (yyNumPreprocessorSkipping == 0)
|
---|
438 | yyBracketDepth--;
|
---|
439 | return Tok_RightBracket;
|
---|
440 | case '{':
|
---|
441 | yyCh = getChar();
|
---|
442 | if (yyNumPreprocessorSkipping == 0)
|
---|
443 | yyBraceDepth++;
|
---|
444 | return Tok_LeftBrace;
|
---|
445 | case '}':
|
---|
446 | yyCh = getChar();
|
---|
447 | if (yyNumPreprocessorSkipping == 0)
|
---|
448 | yyBraceDepth--;
|
---|
449 | return Tok_RightBrace;
|
---|
450 | case '|':
|
---|
451 | yyCh = getChar();
|
---|
452 | if (yyCh == '|' || yyCh == '=')
|
---|
453 | yyCh = getChar();
|
---|
454 | return Tok_SomeOperator;
|
---|
455 | case '~':
|
---|
456 | yyCh = getChar();
|
---|
457 | return Tok_Tilde;
|
---|
458 | case '@':
|
---|
459 | yyCh = getChar();
|
---|
460 | return Tok_At;
|
---|
461 | default:
|
---|
462 | // ### We should really prevent qdoc from looking at snippet files rather than
|
---|
463 | // ### suppress warnings when reading them.
|
---|
464 | if (yyNumPreprocessorSkipping == 0 && !yyTokLoc.fileName().endsWith(".qdoc")) {
|
---|
465 | yyTokLoc.warning(tr("Hostile character 0x%1 in C++ source")
|
---|
466 | .arg((uchar)yyCh, 1, 16));
|
---|
467 | }
|
---|
468 | yyCh = getChar();
|
---|
469 | }
|
---|
470 | }
|
---|
471 | }
|
---|
472 |
|
---|
473 | if (yyPreprocessorSkipping.count() > 1) {
|
---|
474 | yyTokLoc.warning(tr("Expected #endif before end of file"));
|
---|
475 | // clear it out or we get an infinite loop!
|
---|
476 | while (!yyPreprocessorSkipping.isEmpty()) {
|
---|
477 | popSkipping();
|
---|
478 | }
|
---|
479 | }
|
---|
480 |
|
---|
481 | strcpy(yyLex, "end-of-input");
|
---|
482 | yyLexLen = strlen(yyLex);
|
---|
483 | return Tok_Eoi;
|
---|
484 | }
|
---|
485 |
|
---|
486 | void Tokenizer::initialize(const Config &config)
|
---|
487 | {
|
---|
488 | QString versionSym = config.getString(CONFIG_VERSIONSYM);
|
---|
489 |
|
---|
490 | QString sourceEncoding = config.getString(CONFIG_SOURCEENCODING);
|
---|
491 | if (sourceEncoding.isEmpty())
|
---|
492 | sourceEncoding = QLatin1String("ISO-8859-1");
|
---|
493 | sourceCodec = QTextCodec::codecForName(sourceEncoding.toLocal8Bit());
|
---|
494 |
|
---|
495 | comment = new QRegExp("/(?:\\*.*\\*/|/.*\n|/[^\n]*$)");
|
---|
496 | comment->setMinimal(true);
|
---|
497 | versionX = new QRegExp("$cannot possibly match^");
|
---|
498 | if (!versionSym.isEmpty())
|
---|
499 | versionX->setPattern("[ \t]*(?:" + QRegExp::escape(versionSym)
|
---|
500 | + ")[ \t]+\"([^\"]*)\"[ \t]*");
|
---|
501 | definedX = new QRegExp("defined ?\\(?([A-Z_0-9a-z]+) ?\\)");
|
---|
502 |
|
---|
503 | QStringList d = config.getStringList(CONFIG_DEFINES);
|
---|
504 | d += "qdoc";
|
---|
505 | defines = new QRegExp(d.join("|"));
|
---|
506 | falsehoods = new QRegExp(config.getStringList(CONFIG_FALSEHOODS).join("|"));
|
---|
507 |
|
---|
508 | memset(kwordHashTable, 0, sizeof(kwordHashTable));
|
---|
509 | for (int i = 0; i < Tok_LastKeyword - Tok_FirstKeyword + 1; i++)
|
---|
510 | insertKwordIntoHash(kwords[i], i + 1);
|
---|
511 |
|
---|
512 | ignoredTokensAndDirectives = new QHash<QByteArray, bool>;
|
---|
513 |
|
---|
514 | QStringList tokens = config.getStringList(LANGUAGE_CPP + Config::dot + CONFIG_IGNORETOKENS);
|
---|
515 | foreach (const QString &t, tokens) {
|
---|
516 | const QByteArray tb = t.toAscii();
|
---|
517 | ignoredTokensAndDirectives->insert(tb, false);
|
---|
518 | insertKwordIntoHash(tb.data(), -1);
|
---|
519 | }
|
---|
520 |
|
---|
521 | QStringList directives = config.getStringList(LANGUAGE_CPP + Config::dot
|
---|
522 | + CONFIG_IGNOREDIRECTIVES);
|
---|
523 | foreach (const QString &d, directives) {
|
---|
524 | const QByteArray db = d.toAscii();
|
---|
525 | ignoredTokensAndDirectives->insert(db, true);
|
---|
526 | insertKwordIntoHash(db.data(), -1);
|
---|
527 | }
|
---|
528 | }
|
---|
529 |
|
---|
530 | void Tokenizer::terminate()
|
---|
531 | {
|
---|
532 | delete comment;
|
---|
533 | comment = 0;
|
---|
534 | delete versionX;
|
---|
535 | versionX = 0;
|
---|
536 | delete definedX;
|
---|
537 | definedX = 0;
|
---|
538 | delete defines;
|
---|
539 | defines = 0;
|
---|
540 | delete falsehoods;
|
---|
541 | falsehoods = 0;
|
---|
542 | delete ignoredTokensAndDirectives;
|
---|
543 | ignoredTokensAndDirectives = 0;
|
---|
544 | }
|
---|
545 |
|
---|
546 | void Tokenizer::init()
|
---|
547 | {
|
---|
548 | yyLexBuf1 = new char[(int) yyLexBufSize];
|
---|
549 | yyLexBuf2 = new char[(int) yyLexBufSize];
|
---|
550 | yyPrevLex = yyLexBuf1;
|
---|
551 | yyPrevLex[0] = '\0';
|
---|
552 | yyLex = yyLexBuf2;
|
---|
553 | yyLex[0] = '\0';
|
---|
554 | yyLexLen = 0;
|
---|
555 | yyPreprocessorSkipping.push(false);
|
---|
556 | yyNumPreprocessorSkipping = 0;
|
---|
557 | yyBraceDepth = 0;
|
---|
558 | yyParenDepth = 0;
|
---|
559 | yyBracketDepth = 0;
|
---|
560 | yyCh = '\0';
|
---|
561 | parsingMacro = false;
|
---|
562 | }
|
---|
563 |
|
---|
564 | void Tokenizer::start(const Location& loc)
|
---|
565 | {
|
---|
566 | yyTokLoc = loc;
|
---|
567 | yyCurLoc = loc;
|
---|
568 | yyCurLoc.start();
|
---|
569 | strcpy(yyPrevLex, "beginning-of-input");
|
---|
570 | strcpy(yyLex, "beginning-of-input");
|
---|
571 | yyLexLen = strlen(yyLex);
|
---|
572 | yyBraceDepth = 0;
|
---|
573 | yyParenDepth = 0;
|
---|
574 | yyBracketDepth = 0;
|
---|
575 | yyCh = '\0';
|
---|
576 | yyCh = getChar();
|
---|
577 | }
|
---|
578 |
|
---|
579 | /*
|
---|
580 | Returns the next token, if # was met. This function interprets the
|
---|
581 | preprocessor directive, skips over any #ifdef'd out tokens, and returns the
|
---|
582 | token after all of that.
|
---|
583 | */
|
---|
584 | int Tokenizer::getTokenAfterPreprocessor()
|
---|
585 | {
|
---|
586 | yyCh = getChar();
|
---|
587 | while (isspace(yyCh) && yyCh != '\n')
|
---|
588 | yyCh = getChar();
|
---|
589 |
|
---|
590 | /*
|
---|
591 | #directive condition
|
---|
592 | */
|
---|
593 | QString directive;
|
---|
594 | QString condition;
|
---|
595 |
|
---|
596 | while (isalpha(yyCh)) {
|
---|
597 | directive += QChar(yyCh);
|
---|
598 | yyCh = getChar();
|
---|
599 | }
|
---|
600 | if (!directive.isEmpty()) {
|
---|
601 | while (yyCh != EOF && yyCh != '\n') {
|
---|
602 | if (yyCh == '\\')
|
---|
603 | yyCh = getChar();
|
---|
604 | condition += yyCh;
|
---|
605 | yyCh = getChar();
|
---|
606 | }
|
---|
607 | condition.replace(*comment, "");
|
---|
608 | condition = condition.simplified();
|
---|
609 |
|
---|
610 | /*
|
---|
611 | The #if, #ifdef, #ifndef, #elif, #else, and #endif
|
---|
612 | directives have an effect on the skipping stack. For
|
---|
613 | instance, if the code processed so far is
|
---|
614 |
|
---|
615 | #if 1
|
---|
616 | #if 0
|
---|
617 | #if 1
|
---|
618 | // ...
|
---|
619 | #else
|
---|
620 |
|
---|
621 | the skipping stack contains, from bottom to top, false true
|
---|
622 | true (assuming 0 is false and 1 is true). If at least one
|
---|
623 | entry of the stack is true, the tokens are skipped.
|
---|
624 |
|
---|
625 | This mechanism is simple yet hard to understand.
|
---|
626 | */
|
---|
627 | if (directive[0] == QChar('i')) {
|
---|
628 | if (directive == QString("if"))
|
---|
629 | pushSkipping(!isTrue(condition));
|
---|
630 | else if (directive == QString("ifdef"))
|
---|
631 | pushSkipping(!defines->exactMatch(condition));
|
---|
632 | else if (directive == QString("ifndef"))
|
---|
633 | pushSkipping(defines->exactMatch(condition));
|
---|
634 | } else if (directive[0] == QChar('e')) {
|
---|
635 | if (directive == QString("elif")) {
|
---|
636 | bool old = popSkipping();
|
---|
637 | if (old)
|
---|
638 | pushSkipping(!isTrue(condition));
|
---|
639 | else
|
---|
640 | pushSkipping(true);
|
---|
641 | } else if (directive == QString("else")) {
|
---|
642 | pushSkipping(!popSkipping());
|
---|
643 | } else if (directive == QString("endif")) {
|
---|
644 | popSkipping();
|
---|
645 | }
|
---|
646 | } else if (directive == QString("define")) {
|
---|
647 | if (versionX->exactMatch(condition))
|
---|
648 | yyVersion = versionX->cap(1);
|
---|
649 | }
|
---|
650 | }
|
---|
651 |
|
---|
652 | int tok;
|
---|
653 | do {
|
---|
654 | /*
|
---|
655 | We set yyLex now, and after getToken() this will be
|
---|
656 | yyPrevLex. This way, we skip over the preprocessor
|
---|
657 | directive.
|
---|
658 | */
|
---|
659 | qstrcpy(yyLex, yyPrevLex);
|
---|
660 |
|
---|
661 | /*
|
---|
662 | If getToken() meets another #, it will call
|
---|
663 | getTokenAfterPreprocessor() once again, which could in turn
|
---|
664 | call getToken() again, etc. Unless there are 10,000 or so
|
---|
665 | preprocessor directives in a row, this shouldn't overflow
|
---|
666 | the stack.
|
---|
667 | */
|
---|
668 | tok = getToken();
|
---|
669 | } while (yyNumPreprocessorSkipping > 0);
|
---|
670 | return tok;
|
---|
671 | }
|
---|
672 |
|
---|
673 | /*
|
---|
674 | Pushes a new skipping value onto the stack. This corresponds to entering a
|
---|
675 | new #if block.
|
---|
676 | */
|
---|
677 | void Tokenizer::pushSkipping(bool skip)
|
---|
678 | {
|
---|
679 | yyPreprocessorSkipping.push(skip);
|
---|
680 | if (skip)
|
---|
681 | yyNumPreprocessorSkipping++;
|
---|
682 | }
|
---|
683 |
|
---|
684 | /*
|
---|
685 | Pops a skipping value from the stack. This corresponds to reaching a #endif.
|
---|
686 | */
|
---|
687 | bool Tokenizer::popSkipping()
|
---|
688 | {
|
---|
689 | if (yyPreprocessorSkipping.isEmpty()) {
|
---|
690 | yyTokLoc.warning(tr("Unexpected #elif, #else or #endif"));
|
---|
691 | return true;
|
---|
692 | }
|
---|
693 |
|
---|
694 | bool skip = yyPreprocessorSkipping.pop();
|
---|
695 | if (skip)
|
---|
696 | yyNumPreprocessorSkipping--;
|
---|
697 | return skip;
|
---|
698 | }
|
---|
699 |
|
---|
700 | /*
|
---|
701 | Returns true if the condition evaluates as true, otherwise false. The
|
---|
702 | condition is represented by a string. Unsophisticated parsing techniques are
|
---|
703 | used. The preprocessing method could be named StriNg-Oriented PreProcessing,
|
---|
704 | as SNOBOL stands for StriNg-Oriented symBOlic Language.
|
---|
705 | */
|
---|
706 | bool Tokenizer::isTrue(const QString &condition)
|
---|
707 | {
|
---|
708 | int firstOr = -1;
|
---|
709 | int firstAnd = -1;
|
---|
710 | int parenDepth = 0;
|
---|
711 |
|
---|
712 | /*
|
---|
713 | Find the first logical operator at top level, but be careful
|
---|
714 | about precedence. Examples:
|
---|
715 |
|
---|
716 | X || Y // the or
|
---|
717 | X || Y || Z // the leftmost or
|
---|
718 | X || Y && Z // the or
|
---|
719 | X && Y || Z // the or
|
---|
720 | (X || Y) && Z // the and
|
---|
721 | */
|
---|
722 | for (int i = 0; i < (int) condition.length() - 1; i++) {
|
---|
723 | QChar ch = condition[i];
|
---|
724 | if (ch == QChar('(')) {
|
---|
725 | parenDepth++;
|
---|
726 | } else if (ch == QChar(')')) {
|
---|
727 | parenDepth--;
|
---|
728 | } else if (parenDepth == 0) {
|
---|
729 | if (condition[i + 1] == ch) {
|
---|
730 | if (ch == QChar('|')) {
|
---|
731 | firstOr = i;
|
---|
732 | break;
|
---|
733 | } else if (ch == QChar('&')) {
|
---|
734 | if (firstAnd == -1)
|
---|
735 | firstAnd = i;
|
---|
736 | }
|
---|
737 | }
|
---|
738 | }
|
---|
739 | }
|
---|
740 | if (firstOr != -1)
|
---|
741 | return isTrue(condition.left(firstOr)) ||
|
---|
742 | isTrue(condition.mid(firstOr + 2));
|
---|
743 | if (firstAnd != -1)
|
---|
744 | return isTrue(condition.left(firstAnd)) &&
|
---|
745 | isTrue(condition.mid(firstAnd + 2));
|
---|
746 |
|
---|
747 | QString t = condition.simplified();
|
---|
748 | if (t.isEmpty())
|
---|
749 | return true;
|
---|
750 |
|
---|
751 | if (t[0] == QChar('!'))
|
---|
752 | return !isTrue(t.mid(1));
|
---|
753 | if (t[0] == QChar('(') && t.right(1)[0] == QChar(')'))
|
---|
754 | return isTrue(t.mid(1, t.length() - 2));
|
---|
755 |
|
---|
756 | if (definedX->exactMatch(t))
|
---|
757 | return defines->exactMatch(definedX->cap(1));
|
---|
758 | else
|
---|
759 | return !falsehoods->exactMatch(t);
|
---|
760 | }
|
---|
761 |
|
---|
762 | QString Tokenizer::lexeme() const
|
---|
763 | {
|
---|
764 | return sourceCodec->toUnicode(yyLex);
|
---|
765 | }
|
---|
766 |
|
---|
767 | QString Tokenizer::previousLexeme() const
|
---|
768 | {
|
---|
769 | return sourceCodec->toUnicode(yyPrevLex);
|
---|
770 | }
|
---|
771 |
|
---|
772 | QT_END_NAMESPACE
|
---|