[2] | 1 | /****************************************************************************
|
---|
| 2 | **
|
---|
[651] | 3 | ** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
|
---|
[561] | 4 | ** All rights reserved.
|
---|
| 5 | ** Contact: Nokia Corporation ([email protected])
|
---|
[2] | 6 | **
|
---|
| 7 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit.
|
---|
| 8 | **
|
---|
| 9 | ** $QT_BEGIN_LICENSE:LGPL$
|
---|
| 10 | ** Commercial Usage
|
---|
| 11 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
---|
| 12 | ** accordance with the Qt Commercial License Agreement provided with the
|
---|
| 13 | ** Software or, alternatively, in accordance with the terms contained in
|
---|
| 14 | ** a written agreement between you and Nokia.
|
---|
| 15 | **
|
---|
| 16 | ** GNU Lesser General Public License Usage
|
---|
| 17 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
---|
| 18 | ** General Public License version 2.1 as published by the Free Software
|
---|
| 19 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
---|
| 20 | ** packaging of this file. Please review the following information to
|
---|
| 21 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
---|
| 22 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
---|
| 23 | **
|
---|
[561] | 24 | ** In addition, as a special exception, Nokia gives you certain additional
|
---|
| 25 | ** rights. These rights are described in the Nokia Qt LGPL Exception
|
---|
| 26 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
---|
[2] | 27 | **
|
---|
| 28 | ** GNU General Public License Usage
|
---|
| 29 | ** Alternatively, this file may be used under the terms of the GNU
|
---|
| 30 | ** General Public License version 3.0 as published by the Free Software
|
---|
| 31 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
| 32 | ** packaging of this file. Please review the following information to
|
---|
| 33 | ** ensure the GNU General Public License version 3.0 requirements will be
|
---|
| 34 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
---|
| 35 | **
|
---|
[561] | 36 | ** If you have questions regarding the use of this file, please contact
|
---|
| 37 | ** Nokia at [email protected].
|
---|
[2] | 38 | ** $QT_END_LICENSE$
|
---|
| 39 | **
|
---|
| 40 | ****************************************************************************/
|
---|
| 41 |
|
---|
| 42 | //
|
---|
| 43 | // W A R N I N G
|
---|
| 44 | // -------------
|
---|
| 45 | //
|
---|
| 46 | // This file is not part of the Qt API. It exists purely as an
|
---|
| 47 | // implementation detail. This header file may change from version to
|
---|
| 48 | // version without notice, or even be removed.
|
---|
| 49 | //
|
---|
| 50 | // We mean it.
|
---|
| 51 | #ifndef Patternist_XQueryTokenizer_H
|
---|
| 52 | #define Patternist_XQueryTokenizer_H
|
---|
| 53 |
|
---|
| 54 | #include <QHash>
|
---|
| 55 | #include <QSet>
|
---|
| 56 | #include <QStack>
|
---|
| 57 | #include <QString>
|
---|
| 58 | #include <QUrl>
|
---|
| 59 |
|
---|
| 60 | #include "qtokenizer_p.h"
|
---|
| 61 |
|
---|
| 62 | QT_BEGIN_HEADER
|
---|
| 63 |
|
---|
| 64 | QT_BEGIN_NAMESPACE
|
---|
| 65 |
|
---|
| 66 | namespace QPatternist
|
---|
| 67 | {
|
---|
| 68 | struct TokenMap;
|
---|
| 69 |
|
---|
| 70 | /**
|
---|
| 71 | * @short A hand-written tokenizer which tokenizes XQuery 1.0 & XPath 2.0,
|
---|
| 72 | * and delivers tokens to the Bison generated parser.
|
---|
| 73 | *
|
---|
[561] | 74 | * @author Frans Englich <[email protected]>
|
---|
[2] | 75 | */
|
---|
| 76 | class XQueryTokenizer : public Tokenizer
|
---|
| 77 | {
|
---|
| 78 | public:
|
---|
| 79 | /**
|
---|
| 80 | * Tokenizer states. Organized alphabetically.
|
---|
| 81 | */
|
---|
| 82 | enum State
|
---|
| 83 | {
|
---|
| 84 | AfterAxisSeparator,
|
---|
| 85 | AposAttributeContent,
|
---|
| 86 | Axis,
|
---|
| 87 | Default,
|
---|
| 88 | ElementContent,
|
---|
| 89 | EndTag,
|
---|
| 90 | ItemType,
|
---|
| 91 | KindTest,
|
---|
| 92 | KindTestForPI,
|
---|
| 93 | NamespaceDecl,
|
---|
| 94 | NamespaceKeyword,
|
---|
| 95 | OccurrenceIndicator,
|
---|
| 96 | Operator,
|
---|
| 97 | Pragma,
|
---|
| 98 | PragmaContent,
|
---|
| 99 | ProcessingInstructionContent,
|
---|
| 100 | ProcessingInstructionName,
|
---|
| 101 | QuotAttributeContent,
|
---|
| 102 | StartTag,
|
---|
| 103 | VarName,
|
---|
| 104 | XMLComment,
|
---|
| 105 | XMLSpaceDecl,
|
---|
| 106 | XQueryVersion
|
---|
| 107 | };
|
---|
| 108 |
|
---|
| 109 | XQueryTokenizer(const QString &query,
|
---|
| 110 | const QUrl &location,
|
---|
| 111 | const State startingState = Default);
|
---|
| 112 |
|
---|
| 113 | virtual Token nextToken(YYLTYPE *const sourceLocator);
|
---|
| 114 | virtual int commenceScanOnly();
|
---|
| 115 | virtual void resumeTokenizationFrom(const int position);
|
---|
| 116 |
|
---|
| 117 | /**
|
---|
| 118 | * Does nothing.
|
---|
| 119 | */
|
---|
| 120 | virtual void setParserContext(const ParserContext::Ptr &parseInfo);
|
---|
| 121 |
|
---|
| 122 | private:
|
---|
| 123 |
|
---|
| 124 | /**
|
---|
| 125 | * Returns the character corresponding to the builtin reference @p
|
---|
| 126 | * reference. For instance, passing @c gt will give you '>' in return.
|
---|
| 127 | *
|
---|
| 128 | * If @p reference is an invalid character reference, a null QChar is
|
---|
| 129 | * returned.
|
---|
| 130 | *
|
---|
| 131 | * @see QChar::isNull()
|
---|
| 132 | */
|
---|
| 133 | QChar charForReference(const QString &reference);
|
---|
| 134 |
|
---|
| 135 | inline Token tokenAndChangeState(const TokenType code,
|
---|
| 136 | const State state,
|
---|
| 137 | const int advance = 1);
|
---|
| 138 | inline Token tokenAndChangeState(const TokenType code,
|
---|
| 139 | const QString &value,
|
---|
| 140 | const State state);
|
---|
| 141 | inline Token tokenAndAdvance(const TokenType code,
|
---|
| 142 | const int advance = 1);
|
---|
| 143 | QString tokenizeCharacterReference();
|
---|
| 144 |
|
---|
| 145 | inline Token tokenizeStringLiteral();
|
---|
| 146 | inline Token tokenizeNumberLiteral();
|
---|
| 147 |
|
---|
| 148 | /**
|
---|
| 149 | * @returns the character @p length characters from the current
|
---|
| 150 | * position.
|
---|
| 151 | */
|
---|
| 152 | inline char peekAhead(const int length = 1) const;
|
---|
| 153 |
|
---|
| 154 | /**
|
---|
| 155 | * @returns whether the stream, starting from @p offset from the
|
---|
| 156 | * current position, matches @p chs. The length of @p chs is @p len.
|
---|
| 157 | */
|
---|
| 158 | inline bool aheadEquals(const char *const chs,
|
---|
| 159 | const int len,
|
---|
| 160 | const int offset = 1) const;
|
---|
| 161 |
|
---|
| 162 | inline Token tokenizeNCName();
|
---|
| 163 | static inline bool isOperatorKeyword(const TokenType);
|
---|
| 164 |
|
---|
| 165 | static inline bool isDigit(const char ch);
|
---|
| 166 | static inline Token error();
|
---|
| 167 | inline TokenType consumeWhitespace();
|
---|
| 168 |
|
---|
| 169 | /**
|
---|
| 170 | * @short Returns the character at the current position, converted to
|
---|
| 171 | * @c ASCII.
|
---|
| 172 | *
|
---|
| 173 | * Equivalent to calling:
|
---|
| 174 | *
|
---|
| 175 | * @code
|
---|
| 176 | * current().toAscii();
|
---|
| 177 | * @endcode
|
---|
| 178 | */
|
---|
| 179 | inline char peekCurrent() const;
|
---|
| 180 |
|
---|
| 181 | /**
|
---|
| 182 | * Disregarding encoding conversion, equivalent to calling:
|
---|
| 183 | *
|
---|
| 184 | * @code
|
---|
| 185 | * peekAhead(0);
|
---|
| 186 | * @endcode
|
---|
| 187 | */
|
---|
| 188 | inline const QChar current() const;
|
---|
| 189 |
|
---|
| 190 | /**
|
---|
| 191 | * @p hadWhitespace is always set to a proper value.
|
---|
| 192 | *
|
---|
| 193 | * @returns the length of whitespace scanned before reaching "::", or
|
---|
| 194 | * -1 if something else was found.
|
---|
| 195 | */
|
---|
| 196 | int peekForColonColon() const;
|
---|
| 197 |
|
---|
| 198 | static inline bool isNCNameStart(const QChar ch);
|
---|
| 199 | static inline bool isNCNameBody(const QChar ch);
|
---|
| 200 | static inline const TokenMap *lookupKeyword(const QString &keyword);
|
---|
| 201 | inline void popState();
|
---|
| 202 | inline void pushState(const State state);
|
---|
| 203 | inline State state() const;
|
---|
| 204 | inline void setState(const State s);
|
---|
| 205 | static bool isTypeToken(const TokenType t);
|
---|
| 206 |
|
---|
| 207 | inline Token tokenizeNCNameOrQName();
|
---|
| 208 | /**
|
---|
| 209 | * Advances m_pos until content is encountered.
|
---|
| 210 | *
|
---|
| 211 | * Returned is the length stretching from m_pos when starting, until
|
---|
| 212 | * @p content is encountered. @p content is not included in the length.
|
---|
| 213 | */
|
---|
| 214 | int scanUntil(const char *const content);
|
---|
| 215 |
|
---|
| 216 | /**
|
---|
| 217 | * Same as calling:
|
---|
| 218 | * @code
|
---|
| 219 | * pushState(currentState());
|
---|
| 220 | * @endcode
|
---|
| 221 | */
|
---|
| 222 | inline void pushState();
|
---|
| 223 |
|
---|
| 224 | /**
|
---|
| 225 | * Consumes only whitespace, in the traditional sense. The function exits
|
---|
| 226 | * if non-whitespace is encountered, such as the start of a comment.
|
---|
| 227 | *
|
---|
| 228 | * @returns @c true if the end was reached, otherwise @c false
|
---|
| 229 | */
|
---|
| 230 | inline bool consumeRawWhitespace();
|
---|
| 231 |
|
---|
| 232 | /**
|
---|
| 233 | * @short Parses comments: <tt>(: comment content :)</tt>. It recurses for
|
---|
| 234 | * parsing nested comments.
|
---|
| 235 | *
|
---|
| 236 | * It is assumed that the start token for the comment, "(:", has
|
---|
| 237 | * already been parsed.
|
---|
| 238 | *
|
---|
| 239 | * Typically, don't call this function, but ignoreWhitespace().
|
---|
| 240 | *
|
---|
| 241 | * @see <a href="http://www.w3.org/TR/xpath20/#comments">XML Path Language (XPath)
|
---|
| 242 | * 2.0, 2.6 Comments</a>
|
---|
| 243 | * @returns
|
---|
| 244 | * - SUCCESS if everything went ok
|
---|
| 245 | * - ERROR if there was an error in parsing one or more comments
|
---|
| 246 | * - END_OF_FILE if the end was reached
|
---|
| 247 | */
|
---|
| 248 | Tokenizer::TokenType consumeComment();
|
---|
| 249 |
|
---|
| 250 | /**
|
---|
| 251 | * Determines whether @p code is a keyword
|
---|
| 252 | * that is followed by a second keyword. For instance <tt>declare
|
---|
| 253 | * function</tt>.
|
---|
| 254 | */
|
---|
| 255 | static inline bool isPhraseKeyword(const TokenType code);
|
---|
| 256 |
|
---|
| 257 | /**
|
---|
| 258 | * A set of indexes into a QString, the one being passed to
|
---|
| 259 | * normalizeEOL() whose characters shouldn't be normalized. */
|
---|
| 260 | typedef QSet<int> CharacterSkips;
|
---|
| 261 |
|
---|
| |
---|