[2] | 1 | /****************************************************************************
|
---|
| 2 | **
|
---|
[561] | 3 | ** Copyright (C) 2001-2004 Roberto Raggi
|
---|
[846] | 4 | ** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
|
---|
[561] | 5 | ** All rights reserved.
|
---|
| 6 | ** Contact: Nokia Corporation ([email protected])
|
---|
[2] | 7 | **
|
---|
| 8 | ** This file is part of the qt3to4 porting application of the Qt Toolkit.
|
---|
| 9 | **
|
---|
| 10 | ** $QT_BEGIN_LICENSE:LGPL$
|
---|
| 11 | ** Commercial Usage
|
---|
| 12 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
---|
| 13 | ** accordance with the Qt Commercial License Agreement provided with the
|
---|
| 14 | ** Software or, alternatively, in accordance with the terms contained in
|
---|
| 15 | ** a written agreement between you and Nokia.
|
---|
| 16 | **
|
---|
| 17 | ** GNU Lesser General Public License Usage
|
---|
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
---|
| 19 | ** General Public License version 2.1 as published by the Free Software
|
---|
| 20 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
---|
| 21 | ** packaging of this file. Please review the following information to
|
---|
| 22 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
---|
| 23 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
---|
| 24 | **
|
---|
[561] | 25 | ** In addition, as a special exception, Nokia gives you certain additional
|
---|
| 26 | ** rights. These rights are described in the Nokia Qt LGPL Exception
|
---|
| 27 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
---|
[2] | 28 | **
|
---|
| 29 | ** GNU General Public License Usage
|
---|
| 30 | ** Alternatively, this file may be used under the terms of the GNU
|
---|
| 31 | ** General Public License version 3.0 as published by the Free Software
|
---|
| 32 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
| 33 | ** packaging of this file. Please review the following information to
|
---|
| 34 | ** ensure the GNU General Public License version 3.0 requirements will be
|
---|
| 35 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
---|
| 36 | **
|
---|
[561] | 37 | ** If you have questions regarding the use of this file, please contact
|
---|
| 38 | ** Nokia at [email protected].
|
---|
[2] | 39 | ** $QT_END_LICENSE$
|
---|
| 40 | **
|
---|
| 41 | ****************************************************************************/
|
---|
| 42 |
|
---|
| 43 | #include "rpplexer.h"
|
---|
| 44 | #include <QChar>
|
---|
| 45 | #include <ctype.h>
|
---|
| 46 |
|
---|
| 47 | QT_BEGIN_NAMESPACE
|
---|
| 48 |
|
---|
| 49 | using namespace TokenEngine;
|
---|
| 50 |
|
---|
| 51 | namespace Rpp {
|
---|
| 52 | RppLexer::RppLexer()
|
---|
| 53 | : m_buffer(0), m_ptr(0), m_len(0)
|
---|
| 54 | {
|
---|
| 55 | setupScanTable();
|
---|
| 56 | }
|
---|
| 57 |
|
---|
| 58 | void RppLexer::setupScanTable()
|
---|
| 59 | {
|
---|
| 60 | memset(s_attr_table, 0, 256);
|
---|
| 61 |
|
---|
| 62 | for (int i=0; i<128; ++i) {
|
---|
| 63 | switch (i) {
|
---|
| 64 | case ':':
|
---|
| 65 | case '*':
|
---|
| 66 | case '%':
|
---|
| 67 | case '^':
|
---|
| 68 | case '=':
|
---|
| 69 | case '!':
|
---|
| 70 | case '&':
|
---|
| 71 | case '|':
|
---|
| 72 | case '+':
|
---|
| 73 | case '<':
|
---|
| 74 | case '>':
|
---|
| 75 | case '-':
|
---|
| 76 | case '.':
|
---|
| 77 | s_scan_table[i] = &RppLexer::scanOperator;
|
---|
| 78 | break;
|
---|
| 79 |
|
---|
| 80 | case '\r':
|
---|
| 81 | case '\n':
|
---|
| 82 | s_scan_table[i] = &RppLexer::scanNewline;
|
---|
| 83 | break;
|
---|
| 84 |
|
---|
| 85 | case '\'':
|
---|
| 86 | s_scan_table[i] = &RppLexer::scanCharLiteral;
|
---|
| 87 | break;
|
---|
| 88 |
|
---|
| 89 | case '"':
|
---|
| 90 | s_scan_table[i] = &RppLexer::scanStringLiteral;
|
---|
| 91 | break;
|
---|
| 92 | case '#':
|
---|
| 93 | s_scan_table[i] = &RppLexer::scanPreprocessor;
|
---|
| 94 | break;
|
---|
| 95 |
|
---|
| 96 | case '/':
|
---|
| 97 | s_scan_table[i] = &RppLexer::scanComment;
|
---|
| 98 | break;
|
---|
| 99 |
|
---|
| 100 | default:
|
---|
| 101 | if (isspace(i)) {
|
---|
| 102 | s_scan_table[i] = &RppLexer::scanWhiteSpaces;
|
---|
| 103 | s_attr_table[i] |= A_Whitespace;
|
---|
| 104 | } else if (isalpha(i) || i == '_') {
|
---|
| 105 | s_scan_table[i] = &RppLexer::scanKeyword;
|
---|
| 106 | s_attr_table[i] |= A_Alpha;
|
---|
| 107 | } else if (isdigit(i)) {
|
---|
| 108 | s_scan_table[i] = &RppLexer::scanNumberLiteral;
|
---|
| 109 | s_attr_table[i] |= A_Digit;
|
---|
| 110 | } else
|
---|
| 111 | s_scan_table[i] = &RppLexer::scanChar;
|
---|
| 112 | }
|
---|
| 113 | }
|
---|
| 114 |
|
---|
| 115 | s_scan_table[128] = &RppLexer::scanUnicodeChar;
|
---|
| 116 | }
|
---|
| 117 |
|
---|
| 118 | QVector<Type> RppLexer::lex(const TokenContainer &tokenContainer)
|
---|
| 119 | {
|
---|
| 120 | QVector<Type> tokenTypes;
|
---|
| 121 | const int numTokens = tokenContainer.count();
|
---|
| 122 | tokenTypes.reserve(numTokens);
|
---|
| 123 | QByteArray text = tokenContainer.fullText();
|
---|
| 124 | m_buffer = text.constData();
|
---|
| 125 | for(int t=0; t<numTokens; ++t) {
|
---|
| 126 | TokenEngine::Token token = tokenContainer.token(t);
|
---|
| 127 | tokenTypes.append(indentify(token.start, token.length));
|
---|
| 128 | }
|
---|
| 129 | return tokenTypes;
|
---|
| 130 | }
|
---|
| 131 |
|
---|
| 132 | Type RppLexer::indentify(int pos, int length)
|
---|
| 133 | {
|
---|
| 134 | Q_ASSERT(length > 0);
|
---|
| 135 | m_ptr = pos;
|
---|
| 136 | m_len = length;
|
---|
| 137 | int kind = 0;
|
---|
| 138 | const unsigned char ch = m_buffer[pos];
|
---|
| 139 | (this->*s_scan_table[ch < 128 ? ch : 128])(&kind);
|
---|
| 140 | return (Type)kind;
|
---|
| 141 | }
|
---|
| 142 |
|
---|
| 143 | void RppLexer::scanChar(int *kind)
|
---|
| 144 | {
|
---|
| 145 | *kind = m_buffer[m_ptr];
|
---|
| 146 | }
|
---|
| 147 |
|
---|
| 148 | void RppLexer::scanWhiteSpaces(int *kind)
|
---|
| 149 | {
|
---|
| 150 | *kind = Token_whitespaces;
|
---|
| 151 |
|
---|
| 152 | while (unsigned char ch = m_buffer[m_ptr]) {
|
---|
| 153 | if (s_attr_table[ch] & A_Whitespace)
|
---|
| 154 | ++m_ptr;
|
---|
| 155 | else
|
---|
| 156 | break;
|
---|
| 157 | }
|
---|
| 158 | }
|
---|
| 159 |
|
---|
| 160 | void RppLexer::scanNewline(int *kind)
|
---|
| 161 | {
|
---|
| 162 | *kind = '\n';
|
---|
| 163 | }
|
---|
| 164 |
|
---|
| 165 | void RppLexer::scanUnicodeChar(int *kind)
|
---|
| 166 | {
|
---|
| 167 | *kind = m_buffer[m_ptr];
|
---|
| 168 | }
|
---|
| 169 |
|
---|
| 170 | void RppLexer::scanCharLiteral(int *kind)
|
---|
| 171 | {
|
---|
| 172 | *kind = Token_char_literal;
|
---|
| 173 | }
|
---|
| 174 |
|
---|
| 175 | void RppLexer::scanStringLiteral(int *kind)
|
---|
| 176 | {
|
---|
| 177 | *kind = Token_string_literal;
|
---|
| 178 | }
|
---|
| 179 |
|
---|
| 180 | void RppLexer::scanIdentifier(int *kind)
|
---|
| 181 | {
|
---|
| 182 | *kind = Token_identifier;
|
---|
| 183 | }
|
---|
| 184 |
|
---|
| 185 | void RppLexer::scanNumberLiteral(int *kind)
|
---|
| 186 | {
|
---|
| 187 | *kind = Token_number_literal;
|
---|
| 188 | }
|
---|
| 189 |
|
---|
| 190 | void RppLexer::scanPreprocessor(int *kind)
|
---|
| 191 | {
|
---|
| 192 | *kind = Token_preproc;
|
---|
| 193 | }
|
---|
| 194 |
|
---|
| 195 | void RppLexer::scanComment(int *kind)
|
---|
| 196 | {
|
---|
| 197 | switch(m_buffer[m_ptr + 1]) {
|
---|
| 198 | case '/':
|
---|
| 199 | *kind = Token_line_comment;
|
---|
| 200 | break;
|
---|
| 201 | case '*':
|
---|
| 202 | *kind = Token_multiline_comment;
|
---|
| 203 | break;
|
---|
| 204 | default:
|
---|
| 205 | scanOperator(kind);
|
---|
| 206 | }
|
---|
| 207 | }
|
---|
| 208 |
|
---|
| 209 | void RppLexer::scanOperator(int *kind)
|
---|
| 210 | {
|
---|
| 211 | switch (m_buffer[m_ptr]) {
|
---|
| 212 | case ':':
|
---|
| 213 | if (m_buffer[m_ptr+1] == ':') {
|
---|
| 214 | *kind = Token_scope;
|
---|
| 215 | return;
|
---|
| 216 | }
|
---|
| 217 | break;
|
---|
| 218 |
|
---|
| 219 | case '*':
|
---|
| 220 | case '/':
|
---|
| 221 | case '%':
|
---|
| 222 | case '^':
|
---|
| 223 | if (m_buffer[m_ptr+1] == '=') {
|
---|
| 224 | *kind = Token_assign;
|
---|
| 225 | return;
|
---|
| 226 | }
|
---|
| 227 | break;
|
---|
| 228 |
|
---|
| 229 | case '=':
|
---|
| 230 | if (m_buffer[m_ptr+1] == '=') {
|
---|
| 231 | *kind = Token_eq;
|
---|
| 232 | return;
|
---|
| 233 | }
|
---|
| 234 | break;
|
---|
| 235 | case '!':
|
---|
| 236 | if (m_buffer[m_ptr+1] == '=') {
|
---|
| 237 | *kind = Token_not_eq;
|
---|
| 238 | return;
|
---|
| 239 | }
|
---|
| 240 | break;
|
---|
| 241 |
|
---|
| 242 | case '&':
|
---|
| 243 | if (m_buffer[m_ptr+1] == '&') {
|
---|
| 244 | *kind = Token_and;
|
---|
| 245 | return;
|
---|
| 246 | } else if (m_buffer[m_ptr+1] == '=') {
|
---|
| 247 | *kind = Token_assign;
|
---|
| 248 | return;
|
---|
| 249 | }
|
---|
| 250 | break;
|
---|
| 251 |
|
---|
| 252 | case '|':
|
---|
| 253 | if (m_buffer[m_ptr+1] == '|' ) {
|
---|
| 254 | *kind = Token_or;
|
---|
| 255 | return;
|
---|
| 256 | } else if (m_buffer[m_ptr+1] == '=') {
|
---|
| 257 | *kind = Token_assign;
|
---|
| 258 | return;
|
---|
| 259 | }
|
---|
| 260 | break;
|
---|
| 261 |
|
---|
| 262 | case '+':
|
---|
| 263 | if (m_buffer[m_ptr+1] == '+' ) {
|
---|
| 264 | *kind = Token_incr;
|
---|
| 265 | return;
|
---|
| 266 | } else if (m_buffer[m_ptr+1] == '=') {
|
---|
| 267 | *kind = Token_assign;
|
---|
| 268 | return;
|
---|
| 269 | }
|
---|
| 270 | break;
|
---|
| 271 |
|
---|
| 272 | case '<':
|
---|
| 273 | if (m_buffer[m_ptr+1] == '<') {
|
---|
| 274 | if (m_buffer[m_ptr+2] == '=') {
|
---|
| 275 | *kind = Token_assign;
|
---|
| 276 | return;
|
---|
| 277 | }
|
---|
| 278 | *kind = Token_left_shift;
|
---|
| 279 | return;
|
---|
| 280 | } else if (m_buffer[m_ptr+1] == '=') {
|
---|
| 281 | *kind = Token_leq;
|
---|
| 282 | return;
|
---|
| 283 | }
|
---|
| 284 | break;
|
---|
| 285 |
|
---|
| 286 | case '>':
|
---|
| 287 | if (m_buffer[m_ptr+1] == '>') {
|
---|
| 288 | if (m_buffer[m_ptr+2] == '=') {
|
---|
| 289 | *kind = Token_assign;
|
---|
| 290 | return;
|
---|
| 291 | }
|
---|
| 292 | *kind = Token_right_shift;
|
---|
| 293 | return;
|
---|
| 294 | } else if (m_buffer[m_ptr+1] == '=') {
|
---|
| 295 | *kind = Token_geq;
|
---|
| 296 | return;
|
---|
| 297 | }
|
---|
| 298 | break;
|
---|
| 299 |
|
---|
| 300 | case '-':
|
---|
| 301 | if (m_buffer[m_ptr+1] == '>') {
|
---|
| 302 | if (m_buffer[m_ptr+2] == '*') {
|
---|
| 303 | *kind = Token_ptrmem;
|
---|
| 304 | return;
|
---|
| 305 | }
|
---|
| 306 | *kind = Token_arrow;
|
---|
| 307 | return;
|
---|
| 308 | } else if (m_buffer[m_ptr+1] == '-') {
|
---|
| 309 | *kind = Token_decr;
|
---|
| 310 | return;
|
---|
| 311 | } else if (m_buffer[m_ptr+1] == '=') {
|
---|
| 312 | *kind = Token_assign;
|
---|
| 313 | return;
|
---|
| 314 | }
|
---|
| 315 | break;
|
---|
| 316 |
|
---|
| 317 | case '.':
|
---|
| 318 | if (m_buffer[m_ptr+1] == '.' && m_buffer[m_ptr+2] == '.') {
|
---|
| 319 | *kind = Token_ellipsis;
|
---|
| 320 | return;
|
---|
| 321 | } else if (m_buffer[m_ptr+1] == '*') {
|
---|
| 322 | *kind = Token_ptrmem;
|
---|
| 323 | return;
|
---|
| 324 | }
|
---|
| 325 | break;
|
---|
| 326 |
|
---|
| 327 | }
|
---|
| 328 |
|
---|
| 329 | *kind = m_buffer[m_ptr++];
|
---|
| 330 | }
|
---|
| 331 |
|
---|
| 332 | bool RppLexer::match(const char *buf, int len)
|
---|
| 333 | {
|
---|
| 334 | if (m_len != len)
|
---|
| 335 | return false;
|
---|
| 336 | for (int i = 0; i < len; ++i) {
|
---|
| 337 | if(m_buffer[m_ptr + i] != buf[i])
|
---|
| 338 | return false;
|
---|
| 339 | }
|
---|
| 340 | return true;
|
---|
| 341 | }
|
---|
| 342 |
|
---|
| 343 | void RppLexer::scanKeyword(int *kind)
|
---|
| 344 | {
|
---|
| 345 | if(match("if", 2))
|
---|
| 346 | *kind = Token_directive_if;
|
---|
| 347 | else if(match("elif", 4))
|
---|
| 348 | *kind = Token_directive_elif;
|
---|
| 349 | else if(match("else", 4))
|
---|
| 350 | *kind = Token_directive_else;
|
---|
| 351 | else if(match("line", 4))
|
---|
| 352 | *kind = Token_directive_line;
|
---|
| 353 | else if(match("else", 4))
|
---|
| 354 | *kind = Token_directive_else;
|
---|
| 355 | else if(match("line", 4))
|
---|
| 356 | *kind = Token_directive_line;
|
---|
| 357 | else if(match("endif", 5))
|
---|
| 358 | *kind = Token_directive_endif;
|
---|
| 359 | else if(match("ifdef", 5))
|
---|
| 360 | *kind = Token_directive_ifdef;
|
---|
| 361 | else if(match("error", 5))
|
---|
| 362 | *kind = Token_directive_error;
|
---|
| 363 | else if(match("undef", 5))
|
---|
| 364 | *kind = Token_directive_undef;
|
---|
| 365 | else if(match("pragma", 6))
|
---|
| 366 | *kind = Token_directive_pragma;
|
---|
| 367 | else if(match("ifndef", 6))
|
---|
| 368 | *kind = Token_directive_ifndef;
|
---|
| 369 | else if(match("define", 6))
|
---|
| 370 | *kind = Token_directive_define;
|
---|
| 371 | else if(match("include", 7))
|
---|
| 372 | *kind = Token_directive_include;
|
---|
| 373 | else if(match("defined", 7))
|
---|
| 374 | *kind = Token_defined;
|
---|
| 375 | else
|
---|
| 376 | *kind = Token_identifier;
|
---|
| 377 | }
|
---|
| 378 |
|
---|
| 379 | } //namespace Rpp
|
---|
| 380 |
|
---|
| 381 | QT_END_NAMESPACE
|
---|