source: trunk/src/xmlpatterns/parser/qxquerytokenizer_p.h@ 126

Last change on this file since 126 was 2, checked in by Dmitry A. Kuminov, 16 years ago

Initially imported qt-all-opensource-src-4.5.1 from Trolltech.

File size: 11.0 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4** Contact: Qt Software Information ([email protected])
5**
6** This file is part of the QtXmlPatterns module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial Usage
10** Licensees holding valid Qt Commercial licenses may use this file in
11** accordance with the Qt Commercial License Agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Nokia.
14**
15** GNU Lesser General Public License Usage
16** Alternatively, this file may be used under the terms of the GNU Lesser
17** General Public License version 2.1 as published by the Free Software
18** Foundation and appearing in the file LICENSE.LGPL included in the
19** packaging of this file. Please review the following information to
20** ensure the GNU Lesser General Public License version 2.1 requirements
21** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22**
23** In addition, as a special exception, Nokia gives you certain
24** additional rights. These rights are described in the Nokia Qt LGPL
25** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26** package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you are unsure which license is appropriate for your use, please
37** contact the sales department at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42//
43// W A R N I N G
44// -------------
45//
46// This file is not part of the Qt API. It exists purely as an
47// implementation detail. This header file may change from version to
48// version without notice, or even be removed.
49//
50// We mean it.
51#ifndef Patternist_XQueryTokenizer_H
52#define Patternist_XQueryTokenizer_H
53
54#include <QHash>
55#include <QSet>
56#include <QStack>
57#include <QString>
58#include <QUrl>
59
60#include "qtokenizer_p.h"
61
62QT_BEGIN_HEADER
63
64QT_BEGIN_NAMESPACE
65
66namespace QPatternist
67{
68 struct TokenMap;
69
70 /**
71 * @short A hand-written tokenizer which tokenizes XQuery 1.0 & XPath 2.0,
72 * and delivers tokens to the Bison generated parser.
73 *
74 * @author Frans Englich <[email protected]>
75 */
76 class XQueryTokenizer : public Tokenizer
77 {
78 public:
79 /**
80 * Tokenizer states. Organized alphabetically.
81 */
82 enum State
83 {
84 AfterAxisSeparator,
85 AposAttributeContent,
86 Axis,
87 Default,
88 ElementContent,
89 EndTag,
90 ItemType,
91 KindTest,
92 KindTestForPI,
93 NamespaceDecl,
94 NamespaceKeyword,
95 OccurrenceIndicator,
96 Operator,
97 Pragma,
98 PragmaContent,
99 ProcessingInstructionContent,
100 ProcessingInstructionName,
101 QuotAttributeContent,
102 StartTag,
103 VarName,
104 XMLComment,
105 XMLSpaceDecl,
106 XQueryVersion
107 };
108
109 XQueryTokenizer(const QString &query,
110 const QUrl &location,
111 const State startingState = Default);
112
113 virtual Token nextToken(YYLTYPE *const sourceLocator);
114 virtual int commenceScanOnly();
115 virtual void resumeTokenizationFrom(const int position);
116
117 /**
118 * Does nothing.
119 */
120 virtual void setParserContext(const ParserContext::Ptr &parseInfo);
121
122 private:
123
124 /**
125 * Returns the character corresponding to the builtin reference @p
126 * reference. For instance, passing @c gt will give you '>' in return.
127 *
128 * If @p reference is an invalid character reference, a null QChar is
129 * returned.
130 *
131 * @see QChar::isNull()
132 */
133 QChar charForReference(const QString &reference);
134
135 inline Token tokenAndChangeState(const TokenType code,
136 const State state,
137 const int advance = 1);
138 inline Token tokenAndChangeState(const TokenType code,
139 const QString &value,
140 const State state);
141 inline Token tokenAndAdvance(const TokenType code,
142 const int advance = 1);
143 QString tokenizeCharacterReference();
144
145 inline Token tokenizeStringLiteral();
146 inline Token tokenizeNumberLiteral();
147
148 /**
149 * @returns the character @p length characters from the current
150 * position.
151 */
152 inline char peekAhead(const int length = 1) const;
153
154 /**
155 * @returns whether the stream, starting from @p offset from the
156 * current position, matches @p chs. The length of @p chs is @p len.
157 */
158 inline bool aheadEquals(const char *const chs,
159 const int len,
160 const int offset = 1) const;
161
162 inline Token tokenizeNCName();
163 static inline bool isOperatorKeyword(const TokenType);
164
165 static inline bool isDigit(const char ch);
166 static inline Token error();
167 inline TokenType consumeWhitespace();
168
169 /**
170 * @short Returns the character at the current position, converted to
171 * @c ASCII.
172 *
173 * Equivalent to calling:
174 *
175 * @code
176 * current().toAscii();
177 * @endcode
178 */
179 inline char peekCurrent() const;
180
181 /**
182 * Disregarding encoding conversion, equivalent to calling:
183 *
184 * @code
185 * peekAhead(0);
186 * @endcode
187 */
188 inline const QChar current() const;
189
190 /**
191 * @p hadWhitespace is always set to a proper value.
192 *
193 * @returns the length of whitespace scanned before reaching "::", or
194 * -1 if something else was found.
195 */
196 int peekForColonColon() const;
197
198 static inline bool isNCNameStart(const QChar ch);
199 static inline bool isNCNameBody(const QChar ch);
200 static inline const TokenMap *lookupKeyword(const QString &keyword);
201 inline void popState();
202 inline void pushState(const State state);
203 inline State state() const;
204 inline void setState(const State s);
205 static bool isTypeToken(const TokenType t);
206
207 inline Token tokenizeNCNameOrQName();
208 /**
209 * Advances m_pos until content is encountered.
210 *
211 * Returned is the length stretching from m_pos when starting, until
212 * @p content is encountered. @p content is not included in the length.
213 */
214 int scanUntil(const char *const content);
215
216 /**
217 * Same as calling:
218 * @code
219 * pushState(currentState());
220 * @endcode
221 */
222 inline void pushState();
223
224 /**
225 * Consumes only whitespace, in the traditional sense. The function exits
226 * if non-whitespace is encountered, such as the start of a comment.
227 *
228 * @returns @c true if the end was reached, otherwise @c false
229 */
230 inline bool consumeRawWhitespace();
231
232 /**
233 * @short Parses comments: <tt>(: comment content :)</tt>. It recurses for
234 * parsing nested comments.
235 *
236 * It is assumed that the start token for the comment, "(:", has
237 * already been parsed.
238 *
239 * Typically, don't call this function, but ignoreWhitespace().
240 *
241 * @see <a href="http://www.w3.org/TR/xpath20/#comments">XML Path Language (XPath)
242 * 2.0, 2.6 Comments</a>
243 * @returns
244 * - SUCCESS if everything went ok
245 * - ERROR if there was an error in parsing one or more comments