source: trunk/src/tools/moc/preprocessor.cpp@ 701

Last change on this file since 701 was 651, checked in by Dmitry A. Kuminov, 16 years ago

trunk: Merged in qt 4.6.2 sources.

File size: 30.0 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
4** All rights reserved.
5** Contact: Nokia Corporation ([email protected])
6**
7** This file is part of the tools applications of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial Usage
11** Licensees holding valid Qt Commercial licenses may use this file in
12** accordance with the Qt Commercial License Agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and Nokia.
15**
16** GNU Lesser General Public License Usage
17** Alternatively, this file may be used under the terms of the GNU Lesser
18** General Public License version 2.1 as published by the Free Software
19** Foundation and appearing in the file LICENSE.LGPL included in the
20** packaging of this file. Please review the following information to
21** ensure the GNU Lesser General Public License version 2.1 requirements
22** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23**
24** In addition, as a special exception, Nokia gives you certain additional
25** rights. These rights are described in the Nokia Qt LGPL Exception
26** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you have questions regarding the use of this file, please contact
37** Nokia at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "preprocessor.h"
43#include "utils.h"
44#include <QStringList>
45#include <QFile>
46#include <QDir>
47#include <QFileInfo>
48
49QT_BEGIN_NAMESPACE
50
51#include "ppkeywords.cpp"
52#include "keywords.cpp"
53
54// transform \r\n into \n
55// \r into \n (os9 style)
56// backslash-newlines into newlines
57static QByteArray cleaned(const QByteArray &input)
58{
59 QByteArray result;
60 result.reserve(input.size());
61 const char *data = input;
62 char *output = result.data();
63
64 int newlines = 0;
65 while (*data) {
66 while (*data && is_space(*data))
67 ++data;
68 bool takeLine = (*data == '#');
69 if (*data == '%' && *(data+1) == ':') {
70 takeLine = true;
71 ++data;
72 }
73 if (takeLine) {
74 *output = '#';
75 ++output;
76 do ++data; while (*data && is_space(*data));
77 }
78 while (*data) {
79 // handle \\\n, \\\r\n and \\\r
80 if (*data == '\\') {
81 if (*(data + 1) == '\r') {
82 ++data;
83 }
84 if (*data && (*(data + 1) == '\n' || (*data) == '\r')) {
85 ++newlines;
86 data += 1;
87 if (*data != '\r')
88 data += 1;
89 continue;
90 }
91 } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n
92 ++data;
93 }
94
95 char ch = *data;
96 if (ch == '\r') // os9: replace \r with \n
97 ch = '\n';
98 *output = ch;
99 ++output;
100
101 if (*data == '\n') {
102 // output additional newlines to keep the correct line-numbering
103 // for the lines following the backslash-newline sequence(s)
104 while (newlines) {
105 *output = '\n';
106 ++output;
107 --newlines;
108 }
109 ++data;
110 break;
111 }
112 ++data;
113 }
114 }
115 result.resize(output - result.constData());
116 return result;
117}
118
119bool Preprocessor::preprocessOnly = false;
120void Preprocessor::skipUntilEndif()
121{
122 while(index < symbols.size() - 1 && symbols.at(index).token != PP_ENDIF){
123 switch (symbols.at(index).token) {
124 case PP_IF:
125 case PP_IFDEF:
126 case PP_IFNDEF:
127 ++index;
128 skipUntilEndif();
129 break;
130 default:
131 ;
132 }
133 ++index;
134 }
135}
136
137bool Preprocessor::skipBranch()
138{
139 while (index < symbols.size() - 1
140 && (symbols.at(index).token != PP_ENDIF
141 && symbols.at(index).token != PP_ELIF
142 && symbols.at(index).token != PP_ELSE)
143 ){
144 switch (symbols.at(index).token) {
145 case PP_IF:
146 case PP_IFDEF:
147 case PP_IFNDEF:
148 ++index;
149 skipUntilEndif();
150 break;
151 default:
152 ;
153 }
154 ++index;
155 }
156 return (index < symbols.size() - 1);
157}
158
159
160enum TokenizeMode { TokenizeCpp, TokenizePreprocessor, PreparePreprocessorStatement, TokenizePreprocessorStatement, TokenizeInclude };
161static Symbols tokenize(const QByteArray &input, int lineNum = 1, TokenizeMode mode = TokenizeCpp)
162{
163 Symbols symbols;
164 const char *begin = input;
165 const char *data = begin;
166 while (*data) {
167 if (mode == TokenizeCpp) {
168 int column = 0;
169
170 const char *lexem = data;
171 int state = 0;
172 Token token = NOTOKEN;
173 for (;;) {
174 if (static_cast<signed char>(*data) < 0) {
175 ++data;
176 continue;
177 }
178 int nextindex = keywords[state].next;
179 int next = 0;
180 if (*data == keywords[state].defchar)
181 next = keywords[state].defnext;
182 else if (!state || nextindex)
183 next = keyword_trans[nextindex][(int)*data];
184 if (!next)
185 break;
186 state = next;
187 token = keywords[state].token;
188 ++data;
189 }
190
191 // suboptimal, is_ident_char should use a table
192 if (keywords[state].ident && is_ident_char(*data))
193 token = keywords[state].ident;
194
195 if (token == NOTOKEN) {
196 // an error really
197 ++data;
198 continue;
199 }
200
201 ++column;
202
203 if (token > SPECIAL_TREATMENT_MARK) {
204 switch (token) {
205 case QUOTE:
206 data = skipQuote(data);
207 token = STRING_LITERAL;
208 // concatenate multi-line strings for easier
209 // STRING_LITERAAL handling in moc
210 if (!Preprocessor::preprocessOnly
211 && !symbols.isEmpty()
212 && symbols.last().token == STRING_LITERAL) {
213
214 QByteArray newString = symbols.last().unquotedLexem();
215 newString += input.mid(lexem - begin + 1, data - lexem - 2);
216 newString.prepend('\"');
217 newString.append('\"');
218 symbols.last() = Symbol(symbols.last().lineNum,
219 STRING_LITERAL,
220 newString);
221 continue;
222 }
223 break;
224 case SINGLEQUOTE:
225 while (*data && (*data != '\''
226 || (*(data-1)=='\\'
227 && *(data-2)!='\\')))
228 ++data;
229 if (*data)
230 ++data;
231 token = CHARACTER_LITERAL;
232 break;
233 case LANGLE_SCOPE:
234 // split <:: into two tokens, < and ::
235 token = LANGLE;
236 data -= 2;
237 break;
238 case DIGIT:
239 while (is_digit_char(*data))
240 ++data;
241 if (!*data || *data != '.') {
242 token = INTEGER_LITERAL;
243 if (data - lexem == 1 &&
244 (*data == 'x' || *data == 'X')
245 && *lexem == '0') {
246 ++data;
247 while (is_hex_char(*data))
248 ++data;
249 }
250 break;
251 }
252 token = FLOATING_LITERAL;
253 ++data;
254 // fall through
255 case FLOATING_LITERAL:
256 while (is_digit_char(*data))
257 ++data;
258 if (*data == '+' || *data == '-')
259 ++data;
260 if (*data == 'e' || *data == 'E') {
261 ++data;
262 while (is_digit_char(*data))
263 ++data;
264 }
265 if (*data == 'f' || *data == 'F'
266 || *data == 'l' || *data == 'L')
267 ++data;
268 break;
269 case HASH:
270 if (column == 1) {
271 mode = PreparePreprocessorStatement;
272 while (*data && (*data == ' ' || *data == '\t'))
273 ++data;
274 if (is_ident_char(*data))
275 mode = TokenizePreprocessorStatement;
276 continue;
277 }
278 break;
279 case NEWLINE:
280 ++lineNum;
281 continue;
282 case BACKSLASH:
283 {
284 const char *rewind = data;
285 while (*data && (*data == ' ' || *data == '\t'))
286 ++data;
287 if (*data && *data == '\n') {
288 ++data;
289 continue;
290 }
291 data = rewind;
292 } break;
293 case CHARACTER:
294 while (is_ident_char(*data))
295 ++data;
296 token = IDENTIFIER;
297 break;
298 case C_COMMENT:
299 if (*data) {
300 if (*data == '\n')
301 ++lineNum;
302 ++data;
303 if (*data) {
304 if (*data == '\n')
305 ++lineNum;
306 ++data;
307 }
308 }
309 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
310 if (*data == '\n')
311 ++lineNum;
312 ++data;
313 }
314 token = WHITESPACE; // one comment, one whitespace
315 // fall through;
316 case WHITESPACE:
317 if (column == 1)
318 column = 0;
319 while (*data && (*data == ' ' || *data == '\t'))
320 ++data;
321 if (Preprocessor::preprocessOnly) // tokenize whitespace
322 break;
323 continue;
324 case CPP_COMMENT:
325 while (*data && *data != '\n')
326 ++data;
327 continue; // ignore safely, the newline is a separator
328 default:
329 continue; //ignore
330 }
331 }
332#ifdef USE_LEXEM_STORE
333 if (!Preprocessor::preprocessOnly
334 && token != IDENTIFIER
335 && token != STRING_LITERAL
336 && token != FLOATING_LITERAL
337 && token != INTEGER_LITERAL)
338 symbols += Symbol(lineNum, token);
339 else
340#endif
341 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
342
343 } else { // Preprocessor
344
345 const char *lexem = data;
346 int state = 0;
347 Token token = NOTOKEN;
348 if (mode == TokenizePreprocessorStatement) {
349 state = pp_keyword_trans[0][(int)'#'];
350 mode = TokenizePreprocessor;
351 }
352 for (;;) {
353 if (static_cast<signed char>(*data) < 0) {
354 ++data;
355 continue;
356 }
357
358 int nextindex = pp_keywords[state].next;
359 int next = 0;
360 if (*data == pp_keywords[state].defchar)
361 next = pp_keywords[state].defnext;
362 else if (!state || nextindex)
363 next = pp_keyword_trans[nextindex][(int)*data];
364 if (!next)
365 break;
366 state = next;
367 token = pp_keywords[state].token;
368 ++data;
369 }
370 // suboptimal, is_ident_char should use a table
371 if (pp_keywords[state].ident && is_ident_char(*data))
372 token = pp_keywords[state].ident;
373
374 switch (token) {
375 case NOTOKEN:
376 ++data;
377 break;
378 case PP_IFDEF:
379 symbols += Symbol(lineNum, PP_IF);
380 symbols += Symbol(lineNum, PP_DEFINED);
381 continue;
382 case PP_IFNDEF:
383 symbols += Symbol(lineNum, PP_IF);
384 symbols += Symbol(lineNum, PP_NOT);
385 symbols += Symbol(lineNum, PP_DEFINED);
386 continue;
387 case PP_INCLUDE:
388 mode = TokenizeInclude;
389 break;
390 case PP_QUOTE:
391 data = skipQuote(data);
392 token = PP_STRING_LITERAL;
393 break;
394 case PP_SINGLEQUOTE:
395 while (*data && (*data != '\''
396 || (*(data-1)=='\\'
397 && *(data-2)!='\\')))
398 ++data;
399 if (*data)
400 ++data;
401 token = PP_CHARACTER_LITERAL;
402 break;
403 case PP_DIGIT:
404 while (is_digit_char(*data))
405 ++data;
406 if (!*data || *data != '.') {
407 token = PP_INTEGER_LITERAL;
408 if (data - lexem == 1 &&
409 (*data == 'x' || *data == 'X')
410 && *lexem == '0') {
411 ++data;
412 while (is_hex_char(*data))
413 ++data;
414 }
415 break;
416 }
417 token = PP_FLOATING_LITERAL;
418 ++data;
419 // fall through
420 case PP_FLOATING_LITERAL:
421 while (is_digit_char(*data))
422 ++data;
423 if (*data == '+' || *data == '-')
424 ++data;
425 if (*data == 'e' || *data == 'E') {
426 ++data;
427 while (is_digit_char(*data))
428 ++data;
429 }
430 if (*data == 'f' || *data == 'F'
431 || *data == 'l' || *data == 'L')
432 ++data;
433 break;
434 case PP_CHARACTER:
435 if (mode == PreparePreprocessorStatement) {
436 // rewind entire token to begin
437 data = lexem;
438 mode = TokenizePreprocessorStatement;
439 continue;
440 }
441 while (is_ident_char(*data))
442 ++data;
443 token = PP_IDENTIFIER;
444 break;
445 case PP_C_COMMENT:
446 if (*data) {
447 if (*data == '\n')
448 ++lineNum;
449 ++data;
450 if (*data) {
451 if (*data == '\n')
452 ++lineNum;
453 ++data;
454 }
455 }
456 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
457 if (*data == '\n')
458 ++lineNum;
459 ++data;
460 }
461 token = PP_WHITESPACE; // one comment, one whitespace
462 // fall through;
463 case PP_WHITESPACE:
464 while (*data && (*data == ' ' || *data == '\t'))
465 ++data;
466 continue; // the preprocessor needs no whitespace
467 case PP_CPP_COMMENT:
468 while (*data && *data != '\n')
469 ++data;
470 continue; // ignore safely, the newline is a separator
471 case PP_NEWLINE:
472 ++lineNum;
473 mode = TokenizeCpp;
474 break;
475 case PP_BACKSLASH:
476 {
477 const char *rewind = data;
478 while (*data && (*data == ' ' || *data == '\t'))
479 ++data;
480 if (*data && *data == '\n') {
481 ++data;
482 continue;
483 }
484 data = rewind;
485 } break;
486 case PP_LANGLE:
487 if (mode != TokenizeInclude)
488 break;
489 token = PP_STRING_LITERAL;
490 while (*data && *data != '\n' && *(data-1) != '>')
491 ++data;
492 break;
493 default:
494 break;
495 }
496 if (mode == PreparePreprocessorStatement)
497 continue;
498#ifdef USE_LEXEM_STORE
499 if (token != PP_IDENTIFIER
500 && token != PP_STRING_LITERAL
501 && token != PP_FLOATING_LITERAL
502 && token != PP_INTEGER_LITERAL)
503 symbols += Symbol(lineNum, token);
504 else
505#endif
506 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
507 }
508 }
509 symbols += Symbol(); // eof symbol
510 return symbols;
511}
512
513void Preprocessor::substituteMacro(const MacroName &macro, Symbols &substituted, MacroSafeSet safeset)
514{
515 Symbols saveSymbols = symbols;
516 int saveIndex = index;
517
518 symbols = macros.value(macro).symbols;
519 index = 0;
520
521 safeset += macro;
522 substituteUntilNewline(substituted, safeset);
523
524 symbols = saveSymbols;
525 index = saveIndex;
526}
527
528
529
530void Preprocessor::substituteUntilNewline(Symbols &substituted, MacroSafeSet safeset)
531{
532 while (hasNext()) {
533 Token token = next();
534 if (token == PP_IDENTIFIER) {
535 MacroName macro = symbol();
536 if (macros.contains(macro) && !safeset.contains(macro)) {
537 substituteMacro(macro, substituted, safeset);
538 continue;
539 }
540 } else if (token == PP_DEFINED) {
541 test(PP_LPAREN);
542 next(PP_IDENTIFIER);
543 Symbol definedOrNotDefined = symbol();
544 definedOrNotDefined.token = macros.contains(definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
545 substituted += definedOrNotDefined;
546 test(PP_RPAREN);
547 continue;
548 } else if (token == PP_NEWLINE) {
549 substituted += symbol();
550 break;
551 }
552 substituted += symbol();
553 }
554}
555
556
557class PP_Expression : public Parser
558{
559public:
560 int value() { index = 0; return unary_expression_lookup() ? conditional_expression() : 0; }
561
562 int conditional_expression();
563 int logical_OR_expression();
564 int logical_AND_expression();
565 int inclusive_OR_expression();
566 int exclusive_OR_expression();
567 int AND_expression();
568 int equality_expression();
569 int relational_expression();
570 int shift_expression();
571 int additive_expression();
572 int multiplicative_expression();
573 int unary_expression();
574 bool unary_expression_lookup();
575 int primary_expression();
576 bool primary_expression_lookup();
577};
578
579int PP_Expression::conditional_expression()
580{
581 int value = logical_OR_expression();
582 if (test(PP_QUESTION)) {
583 int alt1 = conditional_expression();
584 int alt2 = test(PP_COLON) ? conditional_expression() : 0;
585 return value ? alt1 : alt2;
586 }
587 return value;
588}
589
590int PP_Expression::logical_OR_expression()
591{
592 int value = logical_AND_expression();
593 if (test(PP_OROR))
594 return logical_OR_expression() || value;
595 return value;
596}
597
598int PP_Expression::logical_AND_expression()
599{
600 int value = inclusive_OR_expression();
601 if (test(PP_ANDAND))
602 return logical_AND_expression() && value;
603 return value;
604}
605
606int PP_Expression::inclusive_OR_expression()
607{
608 int value = exclusive_OR_expression();
609 if (test(PP_OR))
610 return value | inclusive_OR_expression();
611 return value;
612}
613
614int PP_Expression::exclusive_OR_expression()
615{
616 int value = AND_expression();
617 if (test(PP_HAT))
618 return value ^ exclusive_OR_expression();
619 return value;
620}
621
622int PP_Expression::AND_expression()
623{
624 int value = equality_expression();
625 if (test(PP_AND))
626 return value & AND_expression();
627 return value;
628}
629
630int PP_Expression::equality_expression()
631{
632 int value = relational_expression();
633 switch (next()) {
634 case PP_EQEQ:
635 return value == equality_expression();
636 case PP_NE:
637 return value != equality_expression();
638 default:
639 prev();
640 return value;
641 }
642}
643
644int PP_Expression::relational_expression()
645{
646 int value = shift_expression();
647 switch (next()) {
648 case PP_LANGLE:
649 return value < relational_expression();
650 case PP_RANGLE:
651 return value > relational_expression();
652 case PP_LE:
653 return value <= relational_expression();
654 case PP_GE:
655 return value >= relational_expression();
656 default:
657 prev();
658 return value;
659 }