source: trunk/src/xmlpatterns/parser/qxquerytokenizer_p.h@ 439

Last change on this file since 439 was 2, checked in by Dmitry A. Kuminov, 16 years ago

Initially imported qt-all-opensource-src-4.5.1 from Trolltech.

File size: 11.0 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4** Contact: Qt Software Information ([email protected])
5**
6** This file is part of the QtXmlPatterns module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial Usage
10** Licensees holding valid Qt Commercial licenses may use this file in
11** accordance with the Qt Commercial License Agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Nokia.
14**
15** GNU Lesser General Public License Usage
16** Alternatively, this file may be used under the terms of the GNU Lesser
17** General Public License version 2.1 as published by the Free Software
18** Foundation and appearing in the file LICENSE.LGPL included in the
19** packaging of this file. Please review the following information to
20** ensure the GNU Lesser General Public License version 2.1 requirements
21** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22**
23** In addition, as a special exception, Nokia gives you certain
24** additional rights. These rights are described in the Nokia Qt LGPL
25** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26** package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you are unsure which license is appropriate for your use, please
37** contact the sales department at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42//
43// W A R N I N G
44// -------------
45//
46// This file is not part of the Qt API. It exists purely as an
47// implementation detail. This header file may change from version to
48// version without notice, or even be removed.
49//
50// We mean it.
51#ifndef Patternist_XQueryTokenizer_H
52#define Patternist_XQueryTokenizer_H
53
54#include <QHash>
55#include <QSet>
56#include <QStack>
57#include <QString>
58#include <QUrl>
59
60#include "qtokenizer_p.h"
61
62QT_BEGIN_HEADER
63
64QT_BEGIN_NAMESPACE
65
66namespace QPatternist
67{
68 struct TokenMap;
69
70 /**
71 * @short A hand-written tokenizer which tokenizes XQuery 1.0 & XPath 2.0,
72 * and delivers tokens to the Bison generated parser.
73 *
74 * @author Frans Englich <[email protected]>
75 */
76 class XQueryTokenizer : public Tokenizer
77 {
78 public:
79 /**
80 * Tokenizer states. Organized alphabetically.
81 */
82 enum State
83 {
84 AfterAxisSeparator,
85 AposAttributeContent,
86 Axis,
87 Default,
88 ElementContent,
89 EndTag,
90 ItemType,
91 KindTest,
92 KindTestForPI,
93 NamespaceDecl,
94 NamespaceKeyword,
95 OccurrenceIndicator,
96 Operator,
97 Pragma,
98 PragmaContent,
99 ProcessingInstructionContent,
100 ProcessingInstructionName,
101 QuotAttributeContent,
102 StartTag,
103 VarName,
104 XMLComment,
105 XMLSpaceDecl,
106 XQueryVersion
107 };
108
109 XQueryTokenizer(const QString &query,
110 const QUrl &location,
111 const State startingState = Default);
112
113 virtual Token nextToken(YYLTYPE *const sourceLocator);
114 virtual int commenceScanOnly();
115 virtual void resumeTokenizationFrom(const int position);
116
117 /**
118 * Does nothing.
119 */
120 virtual void setParserContext(const ParserContext::Ptr &parseInfo);
121
122 private:
123
124 /**
125 * Returns the character corresponding to the builtin reference @p
126 * reference. For instance, passing @c gt will give you '>' in return.
127 *
128 * If @p reference is an invalid character reference, a null QChar is
129 * returned.
130 *
131 * @see QChar::isNull()
132 */
133 QChar charForReference(const QString &reference);
134
135 inline Token tokenAndChangeState(const TokenType code,
136 const State state,
137 const int advance = 1);
138 inline Token tokenAndChangeState(const TokenType code,
139 const QString &value,
140 const State state);
141 inline Token tokenAndAdvance(const TokenType code,
142 const int advance = 1);
143 QString tokenizeCharacterReference();
144
145 inline Token tokenizeStringLiteral();
146 inline Token tokenizeNumberLiteral();
147
148 /**
149 * @returns the character @p length characters from the current
150 * position.
151 */
152 inline char peekAhead(const int length = 1) const;
153
154 /**
155 * @returns whether the stream, starting from @p offset from the
156 * current position, matches @p chs. The length of @p chs is @p len.
157 */
158 inline bool aheadEquals(const char *const chs,
159 const int len,
160 const int offset = 1) const;
161
162 inline Token tokenizeNCName();
163 static inline bool isOperatorKeyword(const TokenType);
164
165 static inline bool isDigit(const char ch);
166 static inline Token error();
167 inline TokenType consumeWhitespace();
168
169 /**
170 * @short Returns the character at the current position, converted to
171 * @c ASCII.
172 *
173 * Equivalent to calling:
174 *
175 * @code
176 * current().toAscii();
177 * @endcode
178 */
179 inline char peekCurrent() const;
180
181 /**
182 * Disregarding encoding conversion, equivalent to calling:
183 *
184 * @code
185 * peekAhead(0);
186 * @endcode
187 */
188 inline const QChar current() const;
189
190 /**
191 * @p hadWhitespace is always set to a proper value.
192 *
193 * @returns the length of whitespace scanned before reaching "::", or
194 * -1 if something else was found.
195 */
196 int peekForColonColon() const;
197
198 static inline bool isNCNameStart(const QChar ch);
199 static inline bool isNCNameBody(const QChar ch);
200 static inline const TokenMap *lookupKeyword(const QString &keyword);
201 inline void popState();
202 inline void pushState(const State state);
203 inline State state() const;
204 inline void setState(const State s);
205 static bool isTypeToken(const TokenType t);
206
207 inline Token tokenizeNCNameOrQName();
208 /**
209 * Advances m_pos until content is encountered.
210 *
211 * Returned is the length stretching from m_pos when starting, until
212 * @p content is encountered. @p content is not included in the length.
213 */
214 int scanUntil(const char *const content);
215
216 /**
217 * Same as calling:
218 * @code
219 * pushState(currentState());
220 * @endcode
221 */
222 inline void pushState();
223
224 /**
225 * Consumes only whitespace, in the traditional sense. The function exits
226 * if non-whitespace is encountered, such as the start of a comment.
227 *
228 * @returns @c true if the end was reached, otherwise @c false
229 */
230 inline bool consumeRawWhitespace();
231
232 /**
233 * @short Parses comments: <tt>(: comment content :)</tt>. It recurses for
234 * parsing nested comments.
235 *
236 * It is assumed that the start token for the comment, "(:", has
237 * already been parsed.
238 *
239 * Typically, don't call this function, but ignoreWhitespace().
240 *
241 * @see <a href="http://www.w3.org/TR/xpath20/#comments">XML Path Language (XPath)
242 * 2.0, 2.6 Comments</a>
243 * @returns
244 * - SUCCESS if everything went ok
245 * - ERROR if there was an error in parsing one or more comments
246 * - END_OF_FILE if the end was reached
247 */
248 Tokenizer::TokenType consumeComment();
249
250 /**
251 * Determines whether @p code is a keyword
252 * that is followed by a second keyword. For instance <tt>declare
253 * function</tt>.
254 */
255 static inline bool isPhraseKeyword(const TokenType code);
256
257 /**
258 * A set of indexes into a QString, the one being passed to
259 * normalizeEOL() whose characters shouldn't be normalized. */
260 typedef QSet<int> CharacterSkips;
261
262 /**
263 * Returns @p input, normalized according to
264 * <a href="http://www.w3.org/TR/xquery/#id-eol-handling">XQuery 1.0:
265 * An XML Query Language, A.2.3 End-of-Line Handling</a>
266 */
267 static QString normalizeEOL(const QString &input,
268 const CharacterSkips &characterSkips);
269
270 inline bool atEnd() const
271 {
272 return m_pos == m_length;
273 }
274
275 Token nextToken();
276 /**
277 * Instead of recognizing and tokenizing embedded expressions in
278 * direct attriute constructors, this function is essentially a mini
279 * recursive-descent parser that has the necessary logic to recognize
280 * embedded expressions and their potentially interfering string literals, in
281 * order to scan to the very end of the attribute value, and return the
282 * whole as a string.
283 *
284 * There is of course syntax errors this function will not detect, but
285 * that is ok since the attributes will be parsed once more.
286 *
287 * An inelegant solution, but which gets the job done.
288 *
289 * @see commenceScanOnly(), resumeTokenizationFrom()
290 */
291 Token attributeAsRaw(const QChar separator,
292 int &stack,
293 const int startPos,
294 const bool inLiteral,
295 QString &result);
296
297 const QString m_data;
298 const int m_length;
299 State m_state;
300 QStack<State> m_stateStack;
301 int m_pos;
302
303 /**
304 * The current line number.
305 *
306 * The line number and column number both starts at 1.
307 */
308 int m_line;
309
310 /**
311 * The offset into m_length for where
312 * the current column starts. So m_length - m_columnOffset
313 * is the current column.
314 *
315 * The line number and column number both starts at 1.
316 */
317 int m_columnOffset;
318
319 const NamePool::Ptr m_namePool;
320 QStack<Token> m_tokenStack;
321 QHash<QString, QChar> m_charRefs;
322 bool m_scanOnly;
323
324 Q_DISABLE_COPY(XQueryTokenizer)
325 };
326}
327
328QT_END_NAMESPACE
329
330QT_END_HEADER
331
332#endif
Note: See TracBrowser for help on using the repository browser.