| 1 | /****************************************************************************
|
|---|
| 2 | **
|
|---|
| 3 | ** Copyright (C) 2001-2004 Roberto Raggi
|
|---|
| 4 | ** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
|
|---|
| 5 | ** All rights reserved.
|
|---|
| 6 | ** Contact: Nokia Corporation ([email protected])
|
|---|
| 7 | **
|
|---|
| 8 | ** This file is part of the qt3to4 porting application of the Qt Toolkit.
|
|---|
| 9 | **
|
|---|
| 10 | ** $QT_BEGIN_LICENSE:LGPL$
|
|---|
| 11 | ** Commercial Usage
|
|---|
| 12 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
|---|
| 13 | ** accordance with the Qt Commercial License Agreement provided with the
|
|---|
| 14 | ** Software or, alternatively, in accordance with the terms contained in
|
|---|
| 15 | ** a written agreement between you and Nokia.
|
|---|
| 16 | **
|
|---|
| 17 | ** GNU Lesser General Public License Usage
|
|---|
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
|---|
| 19 | ** General Public License version 2.1 as published by the Free Software
|
|---|
| 20 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
|---|
| 21 | ** packaging of this file. Please review the following information to
|
|---|
| 22 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
|---|
| 23 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
|---|
| 24 | **
|
|---|
| 25 | ** In addition, as a special exception, Nokia gives you certain additional
|
|---|
| 26 | ** rights. These rights are described in the Nokia Qt LGPL Exception
|
|---|
| 27 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
|---|
| 28 | **
|
|---|
| 29 | ** GNU General Public License Usage
|
|---|
| 30 | ** Alternatively, this file may be used under the terms of the GNU
|
|---|
| 31 | ** General Public License version 3.0 as published by the Free Software
|
|---|
| 32 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
|---|
| 33 | ** packaging of this file. Please review the following information to
|
|---|
| 34 | ** ensure the GNU General Public License version 3.0 requirements will be
|
|---|
| 35 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
|---|
| 36 | **
|
|---|
| 37 | ** If you have questions regarding the use of this file, please contact
|
|---|
| 38 | ** Nokia at [email protected].
|
|---|
| 39 | ** $QT_END_LICENSE$
|
|---|
| 40 | **
|
|---|
| 41 | ****************************************************************************/
|
|---|
| 42 |
|
|---|
| 43 | #include "rpplexer.h"
|
|---|
| 44 | #include <QChar>
|
|---|
| 45 | #include <ctype.h>
|
|---|
| 46 |
|
|---|
| 47 | QT_BEGIN_NAMESPACE
|
|---|
| 48 |
|
|---|
| 49 | using namespace TokenEngine;
|
|---|
| 50 |
|
|---|
| 51 | namespace Rpp {
|
|---|
| 52 | RppLexer::RppLexer()
|
|---|
| 53 | : m_buffer(0), m_ptr(0), m_len(0)
|
|---|
| 54 | {
|
|---|
| 55 | setupScanTable();
|
|---|
| 56 | }
|
|---|
| 57 |
|
|---|
| 58 | void RppLexer::setupScanTable()
|
|---|
| 59 | {
|
|---|
| 60 | memset(s_attr_table, 0, 256);
|
|---|
| 61 |
|
|---|
| 62 | for (int i=0; i<128; ++i) {
|
|---|
| 63 | switch (i) {
|
|---|
| 64 | case ':':
|
|---|
| 65 | case '*':
|
|---|
| 66 | case '%':
|
|---|
| 67 | case '^':
|
|---|
| 68 | case '=':
|
|---|
| 69 | case '!':
|
|---|
| 70 | case '&':
|
|---|
| 71 | case '|':
|
|---|
| 72 | case '+':
|
|---|
| 73 | case '<':
|
|---|
| 74 | case '>':
|
|---|
| 75 | case '-':
|
|---|
| 76 | case '.':
|
|---|
| 77 | s_scan_table[i] = &RppLexer::scanOperator;
|
|---|
| 78 | break;
|
|---|
| 79 |
|
|---|
| 80 | case '\r':
|
|---|
| 81 | case '\n':
|
|---|
| 82 | s_scan_table[i] = &RppLexer::scanNewline;
|
|---|
| 83 | break;
|
|---|
| 84 |
|
|---|
| 85 | case '\'':
|
|---|
| 86 | s_scan_table[i] = &RppLexer::scanCharLiteral;
|
|---|
| 87 | break;
|
|---|
| 88 |
|
|---|
| 89 | case '"':
|
|---|
| 90 | s_scan_table[i] = &RppLexer::scanStringLiteral;
|
|---|
| 91 | break;
|
|---|
| 92 | case '#':
|
|---|
| 93 | s_scan_table[i] = &RppLexer::scanPreprocessor;
|
|---|
| 94 | break;
|
|---|
| 95 |
|
|---|
| 96 | case '/':
|
|---|
| 97 | s_scan_table[i] = &RppLexer::scanComment;
|
|---|
| 98 | break;
|
|---|
| 99 |
|
|---|
| 100 | default:
|
|---|
| 101 | if (isspace(i)) {
|
|---|
| 102 | s_scan_table[i] = &RppLexer::scanWhiteSpaces;
|
|---|
| 103 | s_attr_table[i] |= A_Whitespace;
|
|---|
| 104 | } else if (isalpha(i) || i == '_') {
|
|---|
| 105 | s_scan_table[i] = &RppLexer::scanKeyword;
|
|---|
| 106 | s_attr_table[i] |= A_Alpha;
|
|---|
| 107 | } else if (isdigit(i)) {
|
|---|
| 108 | s_scan_table[i] = &RppLexer::scanNumberLiteral;
|
|---|
| 109 | s_attr_table[i] |= A_Digit;
|
|---|
| 110 | } else
|
|---|
| 111 | s_scan_table[i] = &RppLexer::scanChar;
|
|---|
| 112 | }
|
|---|
| 113 | }
|
|---|
| 114 |
|
|---|
| 115 | s_scan_table[128] = &RppLexer::scanUnicodeChar;
|
|---|
| 116 | }
|
|---|
| 117 |
|
|---|
| 118 | QVector<Type> RppLexer::lex(const TokenContainer &tokenContainer)
|
|---|
| 119 | {
|
|---|
| 120 | QVector<Type> tokenTypes;
|
|---|
| 121 | const int numTokens = tokenContainer.count();
|
|---|
| 122 | tokenTypes.reserve(numTokens);
|
|---|
| 123 | QByteArray text = tokenContainer.fullText();
|
|---|
| 124 | m_buffer = text.constData();
|
|---|
| 125 | for(int t=0; t<numTokens; ++t) {
|
|---|
| 126 | TokenEngine::Token token = tokenContainer.token(t);
|
|---|
| 127 | tokenTypes.append(indentify(token.start, token.length));
|
|---|
| 128 | }
|
|---|
| 129 | return tokenTypes;
|
|---|
| 130 | }
|
|---|
| 131 |
|
|---|
| 132 | Type RppLexer::indentify(int pos, int length)
|
|---|
| 133 | {
|
|---|
| 134 | Q_ASSERT(length > 0);
|
|---|
| 135 | m_ptr = pos;
|
|---|
| 136 | m_len = length;
|
|---|
| 137 | int kind = 0;
|
|---|
| 138 | const unsigned char ch = m_buffer[pos];
|
|---|
| 139 | (this->*s_scan_table[ch < 128 ? ch : 128])(&kind);
|
|---|
| 140 | return (Type)kind;
|
|---|
| 141 | }
|
|---|
| 142 |
|
|---|
| 143 | void RppLexer::scanChar(int *kind)
|
|---|
| 144 | {
|
|---|
| 145 | *kind = m_buffer[m_ptr];
|
|---|
| 146 | }
|
|---|
| 147 |
|
|---|
| 148 | void RppLexer::scanWhiteSpaces(int *kind)
|
|---|
| 149 | {
|
|---|
| 150 | *kind = Token_whitespaces;
|
|---|
| 151 |
|
|---|
| 152 | while (unsigned char ch = m_buffer[m_ptr]) {
|
|---|
| 153 | if (s_attr_table[ch] & A_Whitespace)
|
|---|
| 154 | ++m_ptr;
|
|---|
| 155 | else
|
|---|
| 156 | break;
|
|---|
| 157 | }
|
|---|
| 158 | }
|
|---|
| 159 |
|
|---|
| 160 | void RppLexer::scanNewline(int *kind)
|
|---|
| 161 | {
|
|---|
| 162 | *kind = '\n';
|
|---|
| 163 | }
|
|---|
| 164 |
|
|---|
| 165 | void RppLexer::scanUnicodeChar(int *kind)
|
|---|
| 166 | {
|
|---|
| 167 | *kind = m_buffer[m_ptr];
|
|---|
| 168 | }
|
|---|
| 169 |
|
|---|
| 170 | void RppLexer::scanCharLiteral(int *kind)
|
|---|
| 171 | {
|
|---|
| 172 | *kind = Token_char_literal;
|
|---|
| 173 | }
|
|---|
| 174 |
|
|---|
| 175 | void RppLexer::scanStringLiteral(int *kind)
|
|---|
| 176 | {
|
|---|
| 177 | *kind = Token_string_literal;
|
|---|
| 178 | }
|
|---|
| 179 |
|
|---|
| 180 | void RppLexer::scanIdentifier(int *kind)
|
|---|
| 181 | {
|
|---|
| 182 | *kind = Token_identifier;
|
|---|
| 183 | }
|
|---|
| 184 |
|
|---|
| 185 | void RppLexer::scanNumberLiteral(int *kind)
|
|---|
| 186 | {
|
|---|
| 187 | *kind = Token_number_literal;
|
|---|
| 188 | }
|
|---|
| 189 |
|
|---|
| 190 | void RppLexer::scanPreprocessor(int *kind)
|
|---|
| 191 | {
|
|---|
| 192 | *kind = Token_preproc;
|
|---|
| 193 | }
|
|---|
| 194 |
|
|---|
| 195 | void RppLexer::scanComment(int *kind)
|
|---|
| 196 | {
|
|---|
| 197 | switch(m_buffer[m_ptr + 1]) {
|
|---|
| 198 | case '/':
|
|---|
| 199 | *kind = Token_line_comment;
|
|---|
| 200 | break;
|
|---|
| 201 | case '*':
|
|---|
| 202 | *kind = Token_multiline_comment;
|
|---|
| 203 | break;
|
|---|
| 204 | default:
|
|---|
| 205 | scanOperator(kind);
|
|---|
| 206 | }
|
|---|
| 207 | }
|
|---|
| 208 |
|
|---|
| 209 | void RppLexer::scanOperator(int *kind)
|
|---|
| 210 | {
|
|---|
| 211 | switch (m_buffer[m_ptr]) {
|
|---|
| 212 | case ':':
|
|---|
| 213 | if (m_buffer[m_ptr+1] == ':') {
|
|---|
| 214 | *kind = Token_scope;
|
|---|
| 215 | return;
|
|---|
| 216 | }
|
|---|
| 217 | break;
|
|---|
| 218 |
|
|---|
| 219 | case '*':
|
|---|
| 220 | case '/':
|
|---|
| 221 | case '%':
|
|---|
| 222 | case '^':
|
|---|
| 223 | if (m_buffer[m_ptr+1] == '=') {
|
|---|
| 224 | *kind = Token_assign;
|
|---|
| 225 | return;
|
|---|
| 226 | }
|
|---|
| 227 | break;
|
|---|
| 228 |
|
|---|
| 229 | case '=':
|
|---|
| 230 | if (m_buffer[m_ptr+1] == '=') {
|
|---|
| 231 | *kind = Token_eq;
|
|---|
| 232 | return;
|
|---|
| 233 | }
|
|---|
| 234 | break;
|
|---|
| 235 | case '!':
|
|---|
| 236 | if (m_buffer[m_ptr+1] == '=') {
|
|---|
| 237 | *kind = Token_not_eq;
|
|---|
| 238 | return;
|
|---|
| 239 | }
|
|---|
| 240 | break;
|
|---|
| 241 |
|
|---|
| 242 | case '&':
|
|---|
| 243 | if (m_buffer[m_ptr+1] == '&') {
|
|---|
| 244 | *kind = Token_and;
|
|---|
| 245 | return;
|
|---|
| 246 | } else if (m_buffer[m_ptr+1] == '=') {
|
|---|
| 247 | *kind = Token_assign;
|
|---|
| 248 | return;
|
|---|
| 249 | }
|
|---|
| 250 | break;
|
|---|
| 251 |
|
|---|
| 252 | case '|':
|
|---|
| 253 | if (m_buffer[m_ptr+1] == '|' ) {
|
|---|
| 254 | *kind = Token_or;
|
|---|
| 255 | return;
|
|---|
| 256 | } else if (m_buffer[m_ptr+1] == '=') {
|
|---|
| 257 | *kind = Token_assign;
|
|---|
| 258 | return;
|
|---|
| 259 | }
|
|---|
| 260 | break;
|
|---|
| 261 |
|
|---|
| 262 | case '+':
|
|---|
| 263 | if (m_buffer[m_ptr+1] == '+' ) {
|
|---|
| 264 | *kind = Token_incr;
|
|---|
| 265 | return;
|
|---|
| 266 | } else if (m_buffer[m_ptr+1] == '=') {
|
|---|
| 267 | *kind = Token_assign;
|
|---|
| 268 | return;
|
|---|
| 269 | }
|
|---|
| 270 | break;
|
|---|
| 271 |
|
|---|
| 272 | case '<':
|
|---|
| 273 | if (m_buffer[m_ptr+1] == '<') {
|
|---|
| 274 | if (m_buffer[m_ptr+2] == '=') {
|
|---|
| 275 | *kind = Token_assign;
|
|---|
| 276 | return;
|
|---|
| 277 | }
|
|---|
| 278 | *kind = Token_left_shift;
|
|---|
| 279 | return;
|
|---|
| 280 | } else if (m_buffer[m_ptr+1] == '=') {
|
|---|
| 281 | *kind = Token_leq;
|
|---|
| 282 | return;
|
|---|
| 283 | }
|
|---|
| 284 | break;
|
|---|
| 285 |
|
|---|
| 286 | case '>':
|
|---|
| 287 | if (m_buffer[m_ptr+1] == '>') {
|
|---|
| 288 | if (m_buffer[m_ptr+2] == '=') {
|
|---|
| 289 | *kind = Token_assign;
|
|---|
| 290 | return;
|
|---|
| 291 | }
|
|---|
| 292 | *kind = Token_right_shift;
|
|---|
| 293 | return;
|
|---|
| 294 | } else if (m_buffer[m_ptr+1] == '=') {
|
|---|
| 295 | *kind = Token_geq;
|
|---|
| 296 | return;
|
|---|
| 297 | }
|
|---|
| 298 | break;
|
|---|
| 299 |
|
|---|
| 300 | case '-':
|
|---|
| 301 | if (m_buffer[m_ptr+1] == '>') {
|
|---|
| 302 | if (m_buffer[m_ptr+2] == '*') {
|
|---|
| 303 | *kind = Token_ptrmem;
|
|---|
| 304 | return;
|
|---|
| 305 | }
|
|---|
| 306 | *kind = Token_arrow;
|
|---|
| 307 | return;
|
|---|
| 308 | } else if (m_buffer[m_ptr+1] == '-') {
|
|---|
| 309 | *kind = Token_decr;
|
|---|
| 310 | return;
|
|---|
| 311 | } else if (m_buffer[m_ptr+1] == '=') {
|
|---|
| 312 | *kind = Token_assign;
|
|---|
| 313 | return;
|
|---|
| 314 | }
|
|---|
| 315 | break;
|
|---|
| 316 |
|
|---|
| 317 | case '.':
|
|---|
| 318 | if (m_buffer[m_ptr+1] == '.' && m_buffer[m_ptr+2] == '.') {
|
|---|
| 319 | *kind = Token_ellipsis;
|
|---|
| 320 | return;
|
|---|
| 321 | } else if (m_buffer[m_ptr+1] == '*') {
|
|---|
| 322 | *kind = Token_ptrmem;
|
|---|
| 323 | return;
|
|---|
| 324 | }
|
|---|
| 325 | break;
|
|---|
| 326 |
|
|---|
| 327 | }
|
|---|
| 328 |
|
|---|
| 329 | *kind = m_buffer[m_ptr++];
|
|---|
| 330 | }
|
|---|
| 331 |
|
|---|
| 332 | bool RppLexer::match(const char *buf, int len)
|
|---|
| 333 | {
|
|---|
| 334 | if (m_len != len)
|
|---|
| 335 | return false;
|
|---|
| 336 | for (int i = 0; i < len; ++i) {
|
|---|
| 337 | if(m_buffer[m_ptr + i] != buf[i])
|
|---|
| 338 | return false;
|
|---|
| 339 | }
|
|---|
| 340 | return true;
|
|---|
| 341 | }
|
|---|
| 342 |
|
|---|
| 343 | void RppLexer::scanKeyword(int *kind)
|
|---|
| 344 | {
|
|---|
| 345 | if(match("if", 2))
|
|---|
| 346 | *kind = Token_directive_if;
|
|---|
| 347 | else if(match("elif", 4))
|
|---|
| 348 | *kind = Token_directive_elif;
|
|---|
| 349 | else if(match("else", 4))
|
|---|
| 350 | *kind = Token_directive_else;
|
|---|
| 351 | else if(match("line", 4))
|
|---|
| 352 | *kind = Token_directive_line;
|
|---|
| 353 | else if(match("else", 4))
|
|---|
| 354 | *kind = Token_directive_else;
|
|---|
| 355 | else if(match("line", 4))
|
|---|
| 356 | *kind = Token_directive_line;
|
|---|
| 357 | else if(match("endif", 5))
|
|---|
| 358 | *kind = Token_directive_endif;
|
|---|
| 359 | else if(match("ifdef", 5))
|
|---|
| 360 | *kind = Token_directive_ifdef;
|
|---|
| 361 | else if(match("error", 5))
|
|---|
| 362 | *kind = Token_directive_error;
|
|---|
| 363 | else if(match("undef", 5))
|
|---|
| 364 | *kind = Token_directive_undef;
|
|---|
| 365 | else if(match("pragma", 6))
|
|---|
| 366 | *kind = Token_directive_pragma;
|
|---|
| 367 | else if(match("ifndef", 6))
|
|---|
| 368 | *kind = Token_directive_ifndef;
|
|---|
| 369 | else if(match("define", 6))
|
|---|
| 370 | *kind = Token_directive_define;
|
|---|
| 371 | else if(match("include", 7))
|
|---|
| 372 | *kind = Token_directive_include;
|
|---|
| 373 | else if(match("defined", 7))
|
|---|
| 374 | *kind = Token_defined;
|
|---|
| 375 | else
|
|---|
| 376 | *kind = Token_identifier;
|
|---|
| 377 | }
|
|---|
| 378 |
|
|---|
| 379 | } //namespace Rpp
|
|---|
| 380 |
|
|---|
| 381 | QT_END_NAMESPACE
|
|---|