1 | /****************************************************************************
|
---|
2 | **
|
---|
3 | ** Copyright (C) 2001-2004 Roberto Raggi
|
---|
4 | ** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
|
---|
5 | ** All rights reserved.
|
---|
6 | ** Contact: Nokia Corporation ([email protected])
|
---|
7 | **
|
---|
8 | ** This file is part of the qt3to4 porting application of the Qt Toolkit.
|
---|
9 | **
|
---|
10 | ** $QT_BEGIN_LICENSE:LGPL$
|
---|
11 | ** Commercial Usage
|
---|
12 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
---|
13 | ** accordance with the Qt Commercial License Agreement provided with the
|
---|
14 | ** Software or, alternatively, in accordance with the terms contained in
|
---|
15 | ** a written agreement between you and Nokia.
|
---|
16 | **
|
---|
17 | ** GNU Lesser General Public License Usage
|
---|
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
---|
19 | ** General Public License version 2.1 as published by the Free Software
|
---|
20 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
---|
21 | ** packaging of this file. Please review the following information to
|
---|
22 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
---|
23 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
---|
24 | **
|
---|
25 | ** In addition, as a special exception, Nokia gives you certain additional
|
---|
26 | ** rights. These rights are described in the Nokia Qt LGPL Exception
|
---|
27 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
---|
28 | **
|
---|
29 | ** GNU General Public License Usage
|
---|
30 | ** Alternatively, this file may be used under the terms of the GNU
|
---|
31 | ** General Public License version 3.0 as published by the Free Software
|
---|
32 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
33 | ** packaging of this file. Please review the following information to
|
---|
34 | ** ensure the GNU General Public License version 3.0 requirements will be
|
---|
35 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
---|
36 | **
|
---|
37 | ** If you have questions regarding the use of this file, please contact
|
---|
38 | ** Nokia at [email protected].
|
---|
39 | ** $QT_END_LICENSE$
|
---|
40 | **
|
---|
41 | ****************************************************************************/
|
---|
42 |
|
---|
43 | #include "rpplexer.h"
|
---|
44 | #include <QChar>
|
---|
45 | #include <ctype.h>
|
---|
46 |
|
---|
47 | QT_BEGIN_NAMESPACE
|
---|
48 |
|
---|
49 | using namespace TokenEngine;
|
---|
50 |
|
---|
51 | namespace Rpp {
|
---|
52 | RppLexer::RppLexer()
|
---|
53 | : m_buffer(0), m_ptr(0), m_len(0)
|
---|
54 | {
|
---|
55 | setupScanTable();
|
---|
56 | }
|
---|
57 |
|
---|
58 | void RppLexer::setupScanTable()
|
---|
59 | {
|
---|
60 | memset(s_attr_table, 0, 256);
|
---|
61 |
|
---|
62 | for (int i=0; i<128; ++i) {
|
---|
63 | switch (i) {
|
---|
64 | case ':':
|
---|
65 | case '*':
|
---|
66 | case '%':
|
---|
67 | case '^':
|
---|
68 | case '=':
|
---|
69 | case '!':
|
---|
70 | case '&':
|
---|
71 | case '|':
|
---|
72 | case '+':
|
---|
73 | case '<':
|
---|
74 | case '>':
|
---|
75 | case '-':
|
---|
76 | case '.':
|
---|
77 | s_scan_table[i] = &RppLexer::scanOperator;
|
---|
78 | break;
|
---|
79 |
|
---|
80 | case '\r':
|
---|
81 | case '\n':
|
---|
82 | s_scan_table[i] = &RppLexer::scanNewline;
|
---|
83 | break;
|
---|
84 |
|
---|
85 | case '\'':
|
---|
86 | s_scan_table[i] = &RppLexer::scanCharLiteral;
|
---|
87 | break;
|
---|
88 |
|
---|
89 | case '"':
|
---|
90 | s_scan_table[i] = &RppLexer::scanStringLiteral;
|
---|
91 | break;
|
---|
92 | case '#':
|
---|
93 | s_scan_table[i] = &RppLexer::scanPreprocessor;
|
---|
94 | break;
|
---|
95 |
|
---|
96 | case '/':
|
---|
97 | s_scan_table[i] = &RppLexer::scanComment;
|
---|
98 | break;
|
---|
99 |
|
---|
100 | default:
|
---|
101 | if (isspace(i)) {
|
---|
102 | s_scan_table[i] = &RppLexer::scanWhiteSpaces;
|
---|
103 | s_attr_table[i] |= A_Whitespace;
|
---|
104 | } else if (isalpha(i) || i == '_') {
|
---|
105 | s_scan_table[i] = &RppLexer::scanKeyword;
|
---|
106 | s_attr_table[i] |= A_Alpha;
|
---|
107 | } else if (isdigit(i)) {
|
---|
108 | s_scan_table[i] = &RppLexer::scanNumberLiteral;
|
---|
109 | s_attr_table[i] |= A_Digit;
|
---|
110 | } else
|
---|
111 | s_scan_table[i] = &RppLexer::scanChar;
|
---|
112 | }
|
---|
113 | }
|
---|
114 |
|
---|
115 | s_scan_table[128] = &RppLexer::scanUnicodeChar;
|
---|
116 | }
|
---|
117 |
|
---|
118 | QVector<Type> RppLexer::lex(const TokenContainer &tokenContainer)
|
---|
119 | {
|
---|
120 | QVector<Type> tokenTypes;
|
---|
121 | const int numTokens = tokenContainer.count();
|
---|
122 | tokenTypes.reserve(numTokens);
|
---|
123 | QByteArray text = tokenContainer.fullText();
|
---|
124 | m_buffer = text.constData();
|
---|
125 | for(int t=0; t<numTokens; ++t) {
|
---|
126 | TokenEngine::Token token = tokenContainer.token(t);
|
---|
127 | tokenTypes.append(indentify(token.start, token.length));
|
---|
128 | }
|
---|
129 | return tokenTypes;
|
---|
130 | }
|
---|
131 |
|
---|
132 | Type RppLexer::indentify(int pos, int length)
|
---|
133 | {
|
---|
134 | Q_ASSERT(length > 0);
|
---|
135 | m_ptr = pos;
|
---|
136 | m_len = length;
|
---|
137 | int kind = 0;
|
---|
138 | const unsigned char ch = m_buffer[pos];
|
---|
139 | (this->*s_scan_table[ch < 128 ? ch : 128])(&kind);
|
---|
140 | return (Type)kind;
|
---|
141 | }
|
---|
142 |
|
---|
143 | void RppLexer::scanChar(int *kind)
|
---|
144 | {
|
---|
145 | *kind = m_buffer[m_ptr];
|
---|
146 | }
|
---|
147 |
|
---|
148 | void RppLexer::scanWhiteSpaces(int *kind)
|
---|
149 | {
|
---|
150 | *kind = Token_whitespaces;
|
---|
151 |
|
---|
152 | while (unsigned char ch = m_buffer[m_ptr]) {
|
---|
153 | if (s_attr_table[ch] & A_Whitespace)
|
---|
154 | ++m_ptr;
|
---|
155 | else
|
---|
156 | break;
|
---|
157 | }
|
---|
158 | }
|
---|
159 |
|
---|
160 | void RppLexer::scanNewline(int *kind)
|
---|
161 | {
|
---|
162 | *kind = '\n';
|
---|
163 | }
|
---|
164 |
|
---|
165 | void RppLexer::scanUnicodeChar(int *kind)
|
---|
166 | {
|
---|
167 | *kind = m_buffer[m_ptr];
|
---|
168 | }
|
---|
169 |
|
---|
170 | void RppLexer::scanCharLiteral(int *kind)
|
---|
171 | {
|
---|
172 | *kind = Token_char_literal;
|
---|
173 | }
|
---|
174 |
|
---|
175 | void RppLexer::scanStringLiteral(int *kind)
|
---|
176 | {
|
---|
177 | *kind = Token_string_literal;
|
---|
178 | }
|
---|
179 |
|
---|
180 | void RppLexer::scanIdentifier(int *kind)
|
---|
181 | {
|
---|
182 | *kind = Token_identifier;
|
---|
183 | }
|
---|
184 |
|
---|
185 | void RppLexer::scanNumberLiteral(int *kind)
|
---|
186 | {
|
---|
187 | *kind = Token_number_literal;
|
---|
188 | }
|
---|
189 |
|
---|
190 | void RppLexer::scanPreprocessor(int *kind)
|
---|
191 | {
|
---|
192 | *kind = Token_preproc;
|
---|
193 | }
|
---|
194 |
|
---|
195 | void RppLexer::scanComment(int *kind)
|
---|
196 | {
|
---|
197 | switch(m_buffer[m_ptr + 1]) {
|
---|
198 | case '/':
|
---|
199 | *kind = Token_line_comment;
|
---|
200 | break;
|
---|
201 | case '*':
|
---|
202 | *kind = Token_multiline_comment;
|
---|
203 | break;
|
---|
204 | default:
|
---|
205 | scanOperator(kind);
|
---|
206 | }
|
---|
207 | }
|
---|
208 |
|
---|
209 | void RppLexer::scanOperator(int *kind)
|
---|
210 | {
|
---|
211 | switch (m_buffer[m_ptr]) {
|
---|
212 | case ':':
|
---|
213 | if (m_buffer[m_ptr+1] == ':') {
|
---|
214 | *kind = Token_scope;
|
---|
215 | return;
|
---|
216 | }
|
---|
217 | break;
|
---|
218 |
|
---|
219 | case '*':
|
---|
220 | case '/':
|
---|
221 | case '%':
|
---|
222 | case '^':
|
---|
223 | if (m_buffer[m_ptr+1] == '=') {
|
---|
224 | *kind = Token_assign;
|
---|
225 | return;
|
---|
226 | }
|
---|
227 | break;
|
---|
228 |
|
---|
229 | case '=':
|
---|
230 | if (m_buffer[m_ptr+1] == '=') {
|
---|
231 | *kind = Token_eq;
|
---|
232 | return;
|
---|
233 | }
|
---|
234 | break;
|
---|
235 | case '!':
|
---|
236 | if (m_buffer[m_ptr+1] == '=') {
|
---|
237 | *kind = Token_not_eq;
|
---|
238 | return;
|
---|
239 | }
|
---|
240 | break;
|
---|
241 |
|
---|
242 | case '&':
|
---|
243 | if (m_buffer[m_ptr+1] == '&') {
|
---|
244 | *kind = Token_and;
|
---|
245 | return;
|
---|
246 | } else if (m_buffer[m_ptr+1] == '=') {
|
---|
247 | *kind = Token_assign;
|
---|
248 | return;
|
---|
249 | }
|
---|
250 | break;
|
---|
251 |
|
---|
252 | case '|':
|
---|
253 | if (m_buffer[m_ptr+1] == '|' ) {
|
---|
254 | *kind = Token_or;
|
---|
255 | return;
|
---|
256 | } else if (m_buffer[m_ptr+1] == '=') {
|
---|
257 | *kind = Token_assign;
|
---|
258 | return;
|
---|
259 | }
|
---|
260 | break;
|
---|
261 |
|
---|
262 | case '+':
|
---|
263 | if (m_buffer[m_ptr+1] == '+' ) {
|
---|
264 | *kind = Token_incr;
|
---|
265 | return;
|
---|
266 | } else if (m_buffer[m_ptr+1] == '=') {
|
---|
267 | *kind = Token_assign;
|
---|
268 | return;
|
---|
269 | }
|
---|
270 | break;
|
---|
271 |
|
---|
272 | case '<':
|
---|
273 | if (m_buffer[m_ptr+1] == '<') {
|
---|
274 | if (m_buffer[m_ptr+2] == '=') {
|
---|
275 | *kind = Token_assign;
|
---|
276 | return;
|
---|
277 | }
|
---|
278 | *kind = Token_left_shift;
|
---|
279 | return;
|
---|
280 | } else if (m_buffer[m_ptr+1] == '=') {
|
---|
281 | *kind = Token_leq;
|
---|
282 | return;
|
---|
283 | }
|
---|
284 | break;
|
---|
285 |
|
---|
286 | case '>':
|
---|
287 | if (m_buffer[m_ptr+1] == '>') {
|
---|
288 | if (m_buffer[m_ptr+2] == '=') {
|
---|
289 | *kind = Token_assign;
|
---|
290 | return;
|
---|
291 | }
|
---|
292 | *kind = Token_right_shift;
|
---|
293 | return;
|
---|
294 | } else if (m_buffer[m_ptr+1] == '=') {
|
---|
295 | *kind = Token_geq;
|
---|
296 | return;
|
---|
297 | }
|
---|
298 | break;
|
---|
299 |
|
---|
300 | case '-':
|
---|
301 | if (m_buffer[m_ptr+1] == '>') {
|
---|
302 | if (m_buffer[m_ptr+2] == '*') {
|
---|
303 | *kind = Token_ptrmem;
|
---|
304 | return;
|
---|
305 | }
|
---|
306 | *kind = Token_arrow;
|
---|
307 | return;
|
---|
308 | } else if (m_buffer[m_ptr+1] == '-') {
|
---|
309 | *kind = Token_decr;
|
---|
310 | return;
|
---|
311 | } else if (m_buffer[m_ptr+1] == '=') {
|
---|
312 | *kind = Token_assign;
|
---|
313 | return;
|
---|
314 | }
|
---|
315 | break;
|
---|
316 |
|
---|
317 | case '.':
|
---|
318 | if (m_buffer[m_ptr+1] == '.' && m_buffer[m_ptr+2] == '.') {
|
---|
319 | *kind = Token_ellipsis;
|
---|
320 | return;
|
---|
321 | } else if (m_buffer[m_ptr+1] == '*') {
|
---|
322 | *kind = Token_ptrmem;
|
---|
323 | return;
|
---|
324 | }
|
---|
325 | break;
|
---|
326 |
|
---|
327 | }
|
---|
328 |
|
---|
329 | *kind = m_buffer[m_ptr++];
|
---|
330 | }
|
---|
331 |
|
---|
332 | bool RppLexer::match(const char *buf, int len)
|
---|
333 | {
|
---|
334 | if (m_len != len)
|
---|
335 | return false;
|
---|
336 | for (int i = 0; i < len; ++i) {
|
---|
337 | if(m_buffer[m_ptr + i] != buf[i])
|
---|
338 | return false;
|
---|
339 | }
|
---|
340 | return true;
|
---|
341 | }
|
---|
342 |
|
---|
343 | void RppLexer::scanKeyword(int *kind)
|
---|
344 | {
|
---|
345 | if(match("if", 2))
|
---|
346 | *kind = Token_directive_if;
|
---|
347 | else if(match("elif", 4))
|
---|
348 | *kind = Token_directive_elif;
|
---|
349 | else if(match("else", 4))
|
---|
350 | *kind = Token_directive_else;
|
---|
351 | else if(match("line", 4))
|
---|
352 | *kind = Token_directive_line;
|
---|
353 | else if(match("else", 4))
|
---|
354 | *kind = Token_directive_else;
|
---|
355 | else if(match("line", 4))
|
---|
356 | *kind = Token_directive_line;
|
---|
357 | else if(match("endif", 5))
|
---|
358 | *kind = Token_directive_endif;
|
---|
359 | else if(match("ifdef", 5))
|
---|
360 | *kind = Token_directive_ifdef;
|
---|
361 | else if(match("error", 5))
|
---|
362 | *kind = Token_directive_error;
|
---|
363 | else if(match("undef", 5))
|
---|
364 | *kind = Token_directive_undef;
|
---|
365 | else if(match("pragma", 6))
|
---|
366 | *kind = Token_directive_pragma;
|
---|
367 | else if(match("ifndef", 6))
|
---|
368 | *kind = Token_directive_ifndef;
|
---|
369 | else if(match("define", 6))
|
---|
370 | *kind = Token_directive_define;
|
---|
371 | else if(match("include", 7))
|
---|
372 | *kind = Token_directive_include;
|
---|
373 | else if(match("defined", 7))
|
---|
374 | *kind = Token_defined;
|
---|
375 | else
|
---|
376 | *kind = Token_identifier;
|
---|
377 | }
|
---|
378 |
|
---|
379 | } //namespace Rpp
|
---|
380 |
|
---|
381 | QT_END_NAMESPACE
|
---|