1 | /****************************************************************************
|
---|
2 | **
|
---|
3 | ** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
|
---|
4 | ** All rights reserved.
|
---|
5 | ** Contact: Nokia Corporation ([email protected])
|
---|
6 | **
|
---|
7 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit.
|
---|
8 | **
|
---|
9 | ** $QT_BEGIN_LICENSE:LGPL$
|
---|
10 | ** Commercial Usage
|
---|
11 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
---|
12 | ** accordance with the Qt Commercial License Agreement provided with the
|
---|
13 | ** Software or, alternatively, in accordance with the terms contained in
|
---|
14 | ** a written agreement between you and Nokia.
|
---|
15 | **
|
---|
16 | ** GNU Lesser General Public License Usage
|
---|
17 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
---|
18 | ** General Public License version 2.1 as published by the Free Software
|
---|
19 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
---|
20 | ** packaging of this file. Please review the following information to
|
---|
21 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
---|
22 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
---|
23 | **
|
---|
24 | ** In addition, as a special exception, Nokia gives you certain additional
|
---|
25 | ** rights. These rights are described in the Nokia Qt LGPL Exception
|
---|
26 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
---|
27 | **
|
---|
28 | ** GNU General Public License Usage
|
---|
29 | ** Alternatively, this file may be used under the terms of the GNU
|
---|
30 | ** General Public License version 3.0 as published by the Free Software
|
---|
31 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
32 | ** packaging of this file. Please review the following information to
|
---|
33 | ** ensure the GNU General Public License version 3.0 requirements will be
|
---|
34 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
---|
35 | **
|
---|
36 | ** If you have questions regarding the use of this file, please contact
|
---|
37 | ** Nokia at [email protected].
|
---|
38 | ** $QT_END_LICENSE$
|
---|
39 | **
|
---|
40 | ****************************************************************************/
|
---|
41 |
|
---|
42 | #include <QByteArray>
|
---|
43 |
|
---|
44 | #include "qquerytransformparser_p.h"
|
---|
45 |
|
---|
46 | #include "qxquerytokenizer_p.h"
|
---|
47 |
|
---|
48 | #include "qtokenlookup.cpp"
|
---|
49 |
|
---|
50 | QT_BEGIN_NAMESPACE
|
---|
51 |
|
---|
52 | namespace QPatternist
|
---|
53 | {
|
---|
54 |
|
---|
55 | #define handleWhitespace() \
|
---|
56 | { \
|
---|
57 | const TokenType t = consumeWhitespace(); \
|
---|
58 | if(t != SUCCESS) \
|
---|
59 | return Token(t); \
|
---|
60 | }
|
---|
61 |
|
---|
62 | XQueryTokenizer::XQueryTokenizer(const QString &query,
|
---|
63 | const QUrl &location,
|
---|
64 | const State startingState) : Tokenizer(location)
|
---|
65 | , m_data(query)
|
---|
66 | , m_length(query.length())
|
---|
67 | , m_state(startingState)
|
---|
68 | , m_pos(0)
|
---|
69 | , m_line(1)
|
---|
70 | , m_columnOffset(0)
|
---|
71 | , m_scanOnly(false)
|
---|
72 | {
|
---|
73 | Q_ASSERT(location.isValid() || location.isEmpty());
|
---|
74 | }
|
---|
75 |
|
---|
76 | const QChar XQueryTokenizer::current() const
|
---|
77 | {
|
---|
78 | if(m_pos < m_length)
|
---|
79 | return m_data.at(m_pos);
|
---|
80 | else
|
---|
81 | return QChar();
|
---|
82 | }
|
---|
83 |
|
---|
84 | char XQueryTokenizer::peekCurrent() const
|
---|
85 | {
|
---|
86 | return current().toAscii();
|
---|
87 | }
|
---|
88 |
|
---|
89 | int XQueryTokenizer::peekForColonColon() const
|
---|
90 | {
|
---|
91 | /* Note, we don't modify m_pos in this function, so we need to do offset
|
---|
92 | * calculations. */
|
---|
93 | int pos = m_pos;
|
---|
94 |
|
---|
95 | while(pos < m_length)
|
---|
96 | {
|
---|
97 | switch(m_data.at(pos).toAscii())
|
---|
98 | {
|
---|
99 | /* Fallthrough these four. */
|
---|
100 | case ' ':
|
---|
101 | case '\t':
|
---|
102 | case '\n':
|
---|
103 | case '\r':
|
---|
104 | break;
|
---|
105 | case ':':
|
---|
106 | {
|
---|
107 | if(peekAhead((pos - m_pos) + 1) == ':')
|
---|
108 | return pos - m_pos;
|
---|
109 | /* Fallthrough. */
|
---|
110 | }
|
---|
111 | default:
|
---|
112 | return -1;
|
---|
113 | }
|
---|
114 | ++pos;
|
---|
115 | }
|
---|
116 |
|
---|
117 | return -1;
|
---|
118 | }
|
---|
119 |
|
---|
120 | Tokenizer::Token XQueryTokenizer::tokenAndChangeState(const TokenType code,
|
---|
121 | const State s,
|
---|
122 | const int advance)
|
---|
123 | {
|
---|
124 | Q_ASSERT(advance >= 0);
|
---|
125 | m_pos += advance;
|
---|
126 | setState(s);
|
---|
127 | return Token(code);
|
---|
128 | }
|
---|
129 |
|
---|
130 | Tokenizer::Token XQueryTokenizer::tokenAndChangeState(const TokenType code,
|
---|
131 | const QString &value,
|
---|
132 | const State s)
|
---|
133 | {
|
---|
134 | setState(s);
|
---|
135 | return Token(code, value);
|
---|
136 | }
|
---|
137 |
|
---|
138 | Tokenizer::Token XQueryTokenizer::tokenAndAdvance(const TokenType code,
|
---|
139 | const int advance)
|
---|
140 | {
|
---|
141 | Q_ASSERT(advance >= 0);
|
---|
142 | m_pos += advance;
|
---|
143 | return Token(code);
|
---|
144 | }
|
---|
145 |
|
---|
146 | QString XQueryTokenizer::normalizeEOL(const QString &input,
|
---|
147 | const CharacterSkips &characterSkips)
|
---|
148 | {
|
---|
149 | const int len = input.count();
|
---|
150 | QString result;
|
---|
151 |
|
---|
152 | /* The likely hood is rather high it'll be the same content. */
|
---|
153 | result.reserve(len);
|
---|
154 |
|
---|
155 | for(int i = 0; i < len; ++i)
|
---|
156 | {
|
---|
157 | const QChar &at = input.at(i);
|
---|
158 |
|
---|
159 | if(characterSkips.contains(i))
|
---|
160 | {
|
---|
161 | result.append(at);
|
---|
162 | continue;
|
---|
163 | }
|
---|
164 | switch(input.at(i).unicode())
|
---|
165 | {
|
---|
166 | case '\r':
|
---|
167 | {
|
---|
168 | if(i + 1 < len && input.at(i + 1) == QLatin1Char('\n'))
|
---|
169 | ++i;
|
---|
170 |
|
---|
171 | /* Else, fallthrough. */
|
---|
172 | }
|
---|
173 | case '\n':
|
---|
174 | {
|
---|
175 | result.append(QLatin1Char('\n'));
|
---|
176 | continue;
|
---|
177 | }
|
---|
178 | default:
|
---|
179 | {
|
---|
180 | result.append(at);
|
---|
181 | }
|
---|
182 | }
|
---|
183 | }
|
---|
184 |
|
---|
185 | return result;
|
---|
186 | }
|
---|
187 |
|
---|
188 | Tokenizer::TokenType XQueryTokenizer::consumeComment()
|
---|
189 | {
|
---|
190 | /* Below, we return ERROR instead of END_OF_FILE such that the parser
|
---|
191 | * sees an invalid comment. */
|
---|
192 | while(m_pos < m_length)
|
---|
193 | {
|
---|
194 | switch(peekCurrent())
|
---|
195 | {
|
---|
196 | case ':':
|
---|
197 | {
|
---|
198 | ++m_pos; /* Consume ':' */
|
---|
199 | if(atEnd())
|
---|
200 | return ERROR;
|
---|
201 |
|
---|
202 | if(peekCurrent() == ')')
|
---|
203 | {
|
---|
204 | ++m_pos; /* Consume ')' */
|
---|
205 | return SUCCESS; /* The comment closed nicely. */
|
---|
206 | }
|
---|
207 | continue; /* We don't want to increment m_pos twice. */
|
---|
208 | }
|
---|
209 | case '(':
|
---|
210 | { /* It looks like the start of a comment. */
|
---|
211 | ++m_pos;
|
---|
212 |
|
---|
213 | if(atEnd())
|
---|
214 | return END_OF_FILE;
|
---|
215 | else if(peekCurrent() == ':')
|
---|
216 | {
|
---|
217 | /* And it is a nested comment -- parse it. */
|
---|
218 | const TokenType retval = consumeComment();
|
---|
219 | if(retval == SUCCESS)
|
---|
220 | continue; /* Continue with our "own" comment. */
|
---|
221 | else
|
---|
222 | return retval; /* Return the error in the nested comment. */
|
---|
223 | }
|
---|
224 | break;
|
---|
225 | }
|
---|
226 | case '\n':
|
---|
227 | /* Fallthrough. */
|
---|
228 | case '\r':
|
---|
229 | {
|
---|
230 | /* We want to count \r\n as a single line break. */
|
---|
231 | if(peekAhead() == '\n')
|
---|
232 | ++m_pos;
|
---|
233 |
|
---|
234 | m_columnOffset = m_pos;
|
---|
235 | ++m_line;
|
---|
236 |
|
---|
237 | break;
|
---|
238 | }
|
---|
239 | }
|
---|
240 | ++m_pos;
|
---|
241 | }
|
---|
242 |
|
---|
243 | return ERROR; /* Error: we reached the end while inside a comment. */
|
---|
244 | }
|
---|
245 |
|
---|
246 | bool XQueryTokenizer::consumeRawWhitespace()
|
---|
247 | {
|
---|
248 | while(m_pos < m_length)
|
---|
249 | {
|
---|
250 | switch(peekCurrent())
|
---|
251 | {
|
---|
252 | case ' ':
|
---|
253 | case '\t':
|
---|
254 | break;
|
---|
255 | case '\n':
|
---|
256 | case '\r':
|
---|
257 | {
|
---|
258 | if(peekAhead() == '\n')
|
---|
259 | ++m_pos;
|
---|
260 |
|
---|
261 | m_columnOffset = m_pos;
|
---|
262 | ++m_line;
|
---|
263 |
|
---|
264 | break;
|
---|
265 | }
|
---|
266 | default:
|
---|
267 | return false;
|
---|
268 | }
|
---|
269 | ++m_pos;
|
---|
270 | }
|
---|
271 | return true;
|
---|
272 | }
|
---|
273 |
|
---|
274 | Tokenizer::TokenType XQueryTokenizer::consumeWhitespace()
|
---|
275 | {
|
---|
276 | while(m_pos < m_length)
|
---|
277 | {
|
---|
278 | switch(peekCurrent())
|
---|
279 | {
|
---|
280 | case ' ':
|
---|
281 | case '\t':
|
---|
282 | break;
|
---|
283 | case '\n':
|
---|
284 | case '\r':
|
---|
285 | {
|
---|
286 | /* We want to count \r\n as a single line break. */
|
---|
287 | if(peekAhead() == '\n')
|
---|
288 | ++m_pos;
|
---|
289 |
|
---|
290 | m_columnOffset = m_pos;
|
---|
291 | ++m_line;
|
---|
292 |
|
---|
293 | break;
|
---|
294 | }
|
---|
295 | case '(':
|
---|
296 | {
|
---|
297 | if(peekAhead() == ':')
|
---|
298 | {
|
---|
299 | m_pos += 2; /* Consume "(:" */
|
---|
300 |
|
---|
301 | const TokenType comment = consumeComment();
|
---|
302 | if(comment == SUCCESS)
|
---|
303 | continue;
|
---|
304 | else
|
---|
305 | return comment;
|
---|
306 | }
|
---|
307 | }
|
---|
308 | default:
|
---|
309 | return SUCCESS;
|
---|
310 | }
|
---|
311 | ++m_pos;
|
---|
312 | }
|
---|
313 |
|
---|
314 | return END_OF_FILE;
|
---|
315 | }
|
---|
316 |
|
---|
317 | char XQueryTokenizer::peekAhead(const int length) const
|
---|
318 | {
|
---|
319 | if(m_pos + length < m_length)
|
---|
320 | return m_data.at(m_pos + length).toAscii();
|
---|
321 | else
|
---|
322 | return 0;
|
---|
323 | }
|
---|
324 |
|
---|
325 | Tokenizer::Token XQueryTokenizer::error()
|
---|
326 | {
|
---|
327 | return Token(ERROR);
|
---|
328 | }
|
---|
|
---|