source: trunk/src/xmlpatterns/parser/qxquerytokenizer.cpp@ 846

Last change on this file since 846 was 846, checked in by Dmitry A. Kuminov, 14 years ago

trunk: Merged in qt 4.7.2 sources from branches/vendor/nokia/qt.

File size: 68.5 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
4** All rights reserved.
5** Contact: Nokia Corporation ([email protected])
6**
7** This file is part of the QtXmlPatterns module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial Usage
11** Licensees holding valid Qt Commercial licenses may use this file in
12** accordance with the Qt Commercial License Agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and Nokia.
15**
16** GNU Lesser General Public License Usage
17** Alternatively, this file may be used under the terms of the GNU Lesser
18** General Public License version 2.1 as published by the Free Software
19** Foundation and appearing in the file LICENSE.LGPL included in the
20** packaging of this file. Please review the following information to
21** ensure the GNU Lesser General Public License version 2.1 requirements
22** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23**
24** In addition, as a special exception, Nokia gives you certain additional
25** rights. These rights are described in the Nokia Qt LGPL Exception
26** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you have questions regarding the use of this file, please contact
37** Nokia at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include <QByteArray>
43
44#include "qquerytransformparser_p.h"
45
46#include "qxquerytokenizer_p.h"
47
48#include "qtokenlookup.cpp"
49
50QT_BEGIN_NAMESPACE
51
52namespace QPatternist
53{
54
55#define handleWhitespace() \
56{ \
57 const TokenType t = consumeWhitespace(); \
58 if(t != SUCCESS) \
59 return Token(t); \
60}
61
62XQueryTokenizer::XQueryTokenizer(const QString &query,
63 const QUrl &location,
64 const State startingState) : Tokenizer(location)
65 , m_data(query)
66 , m_length(query.length())
67 , m_state(startingState)
68 , m_pos(0)
69 , m_line(1)
70 , m_columnOffset(0)
71 , m_scanOnly(false)
72{
73 Q_ASSERT(location.isValid() || location.isEmpty());
74}
75
76const QChar XQueryTokenizer::current() const
77{
78 if(m_pos < m_length)
79 return m_data.at(m_pos);
80 else
81 return QChar();
82}
83
84char XQueryTokenizer::peekCurrent() const
85{
86 return current().toAscii();
87}
88
89int XQueryTokenizer::peekForColonColon() const
90{
91 /* Note, we don't modify m_pos in this function, so we need to do offset
92 * calculations. */
93 int pos = m_pos;
94
95 while(pos < m_length)
96 {
97 switch(m_data.at(pos).toAscii())
98 {
99 /* Fallthrough these four. */
100 case ' ':
101 case '\t':
102 case '\n':
103 case '\r':
104 break;
105 case ':':
106 {
107 if(peekAhead((pos - m_pos) + 1) == ':')
108 return pos - m_pos;
109 /* Fallthrough. */
110 }
111 default:
112 return -1;
113 }
114 ++pos;
115 }
116
117 return -1;
118}
119
120Tokenizer::Token XQueryTokenizer::tokenAndChangeState(const TokenType code,
121 const State s,
122 const int advance)
123{
124 Q_ASSERT(advance >= 0);
125 m_pos += advance;
126 setState(s);
127 return Token(code);
128}
129
130Tokenizer::Token XQueryTokenizer::tokenAndChangeState(const TokenType code,
131 const QString &value,
132 const State s)
133{
134 setState(s);
135 return Token(code, value);
136}
137
138Tokenizer::Token XQueryTokenizer::tokenAndAdvance(const TokenType code,
139 const int advance)
140{
141 Q_ASSERT(advance >= 0);
142 m_pos += advance;
143 return Token(code);
144}
145
146QString XQueryTokenizer::normalizeEOL(const QString &input,
147 const CharacterSkips &characterSkips)
148{
149 const int len = input.count();
150 QString result;
151
152 /* The likely hood is rather high it'll be the same content. */
153 result.reserve(len);
154
155 for(int i = 0; i < len; ++i)
156 {
157 const QChar &at = input.at(i);
158
159 if(characterSkips.contains(i))
160 {
161 result.append(at);
162 continue;
163 }
164 switch(input.at(i).unicode())
165 {
166 case '\r':
167 {
168 if(i + 1 < len && input.at(i + 1) == QLatin1Char('\n'))
169 ++i;
170
171 /* Else, fallthrough. */
172 }
173 case '\n':
174 {
175 result.append(QLatin1Char('\n'));
176 continue;
177 }
178 default:
179 {
180 result.append(at);
181 }
182 }
183 }
184
185 return result;
186}
187
188Tokenizer::TokenType XQueryTokenizer::consumeComment()
189{
190 /* Below, we return ERROR instead of END_OF_FILE such that the parser
191 * sees an invalid comment. */
192 while(m_pos < m_length)
193 {
194 switch(peekCurrent())
195 {
196 case ':':
197 {
198 ++m_pos; /* Consume ':' */
199 if(atEnd())
200 return ERROR;
201
202 if(peekCurrent() == ')')
203 {
204 ++m_pos; /* Consume ')' */
205 return SUCCESS; /* The comment closed nicely. */
206 }
207 continue; /* We don't want to increment m_pos twice. */
208 }
209 case '(':
210 { /* It looks like the start of a comment. */
211 ++m_pos;
212
213 if(atEnd())
214 return END_OF_FILE;
215 else if(peekCurrent() == ':')
216 {
217 /* And it is a nested comment -- parse it. */
218 const TokenType retval = consumeComment();
219 if(retval == SUCCESS)
220 continue; /* Continue with our "own" comment. */
221 else
222 return retval; /* Return the error in the nested comment. */
223 }
224 break;
225 }
226 case '\n':
227 /* Fallthrough. */
228 case '\r':
229 {
230 /* We want to count \r\n as a single line break. */
231 if(peekAhead() == '\n')
232 ++m_pos;
233
234 m_columnOffset = m_pos;
235 ++m_line;
236
237 break;
238 }
239 }
240 ++m_pos;
241 }
242
243 return ERROR; /* Error: we reached the end while inside a comment. */
244}
245
246bool XQueryTokenizer::consumeRawWhitespace()
247{
248 while(m_pos < m_length)
249 {
250 switch(peekCurrent())
251 {
252 case ' ':
253 case '\t':
254 break;
255 case '\n':
256 case '\r':
257 {
258 if(peekAhead() == '\n')
259 ++m_pos;
260
261 m_columnOffset = m_pos;
262 ++m_line;
263
264 break;
265 }
266 default:
267 return false;
268 }
269 ++m_pos;
270 }
271 return true;
272}
273
274Tokenizer::TokenType XQueryTokenizer::consumeWhitespace()
275{
276 while(m_pos < m_length)
277 {
278 switch(peekCurrent())
279 {
280 case ' ':
281 case '\t':
282 break;
283 case '\n':
284 case '\r':
285 {
286 /* We want to count \r\n as a single line break. */
287 if(peekAhead() == '\n')
288 ++m_pos;
289
290 m_columnOffset = m_pos;
291 ++m_line;
292
293 break;
294 }
295 case '(':
296 {
297 if(peekAhead() == ':')
298 {
299 m_pos += 2; /* Consume "(:" */
300
301 const TokenType comment = consumeComment();
302 if(comment == SUCCESS)
303 continue;
304 else
305 return comment;
306 }
307 }
308 default:
309 return SUCCESS;
310 }
311 ++m_pos;
312 }
313
314 return END_OF_FILE;
315}
316
317char XQueryTokenizer::peekAhead(const int length) const
318{
319 if(m_pos + length < m_length)
320 return m_data.at(m_pos + length).toAscii();
321 else
322 return 0;
323}
324
325Tokenizer::Token XQueryTokenizer::error()
326{
327 return Token(ERROR);
328}