[2] | 1 | /****************************************************************************
|
---|
| 2 | **
|
---|
[846] | 3 | ** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
|
---|
[561] | 4 | ** All rights reserved.
|
---|
| 5 | ** Contact: Nokia Corporation ([email protected])
|
---|
[2] | 6 | **
|
---|
| 7 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit.
|
---|
| 8 | **
|
---|
| 9 | ** $QT_BEGIN_LICENSE:LGPL$
|
---|
| 10 | ** Commercial Usage
|
---|
| 11 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
---|
| 12 | ** accordance with the Qt Commercial License Agreement provided with the
|
---|
| 13 | ** Software or, alternatively, in accordance with the terms contained in
|
---|
| 14 | ** a written agreement between you and Nokia.
|
---|
| 15 | **
|
---|
| 16 | ** GNU Lesser General Public License Usage
|
---|
| 17 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
---|
| 18 | ** General Public License version 2.1 as published by the Free Software
|
---|
| 19 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
---|
| 20 | ** packaging of this file. Please review the following information to
|
---|
| 21 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
---|
| 22 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
---|
| 23 | **
|
---|
[561] | 24 | ** In addition, as a special exception, Nokia gives you certain additional
|
---|
| 25 | ** rights. These rights are described in the Nokia Qt LGPL Exception
|
---|
| 26 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
---|
[2] | 27 | **
|
---|
| 28 | ** GNU General Public License Usage
|
---|
| 29 | ** Alternatively, this file may be used under the terms of the GNU
|
---|
| 30 | ** General Public License version 3.0 as published by the Free Software
|
---|
| 31 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
| 32 | ** packaging of this file. Please review the following information to
|
---|
| 33 | ** ensure the GNU General Public License version 3.0 requirements will be
|
---|
| 34 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
---|
| 35 | **
|
---|
[561] | 36 | ** If you have questions regarding the use of this file, please contact
|
---|
| 37 | ** Nokia at [email protected].
|
---|
[2] | 38 | ** $QT_END_LICENSE$
|
---|
| 39 | **
|
---|
| 40 | ****************************************************************************/
|
---|
| 41 |
|
---|
| 42 | //
|
---|
| 43 | // W A R N I N G
|
---|
| 44 | // -------------
|
---|
| 45 | //
|
---|
| 46 | // This file is not part of the Qt API. It exists purely as an
|
---|
| 47 | // implementation detail. This header file may change from version to
|
---|
| 48 | // version without notice, or even be removed.
|
---|
| 49 | //
|
---|
| 50 | // We mean it.
|
---|
| 51 |
|
---|
| 52 | #ifndef Patternist_DerivedString_H
|
---|
| 53 | #define Patternist_DerivedString_H
|
---|
| 54 |
|
---|
| 55 | #include <QRegExp>
|
---|
| 56 |
|
---|
| 57 | #include "private/qxmlutils_p.h"
|
---|
| 58 | #include "qbuiltintypes_p.h"
|
---|
| 59 | #include "qpatternistlocale_p.h"
|
---|
| 60 | #include "qvalidationerror_p.h"
|
---|
| 61 |
|
---|
| 62 | QT_BEGIN_HEADER
|
---|
| 63 |
|
---|
| 64 | QT_BEGIN_NAMESPACE
|
---|
| 65 |
|
---|
| 66 | namespace QPatternist
|
---|
| 67 | {
|
---|
| 68 | /**
|
---|
| 69 | * @short Represents instances of derived @c xs:string types, such as @c
|
---|
| 70 | * xs:normalizedString.
|
---|
| 71 | *
|
---|
| 72 | * Whitespace is a significant part for creating values from the lexical
|
---|
| 73 | * space. Of course the specification is tricky here. Here's some pointers:
|
---|
| 74 | *
|
---|
| 75 | * - From <a href="4.3.6.1 The whiteSpace Schema Component">XML Schema Part 2: Datatypes
|
---|
| 76 | * Second Edition, 4.3.6 whiteSpace</a>:
|
---|
| 77 | * "For all atomic datatypes other than string (and types
|
---|
| 78 | * derived by restriction from it) the value of whiteSpace is
|
---|
| 79 | * collapse and cannot be changed by a schema author; for string the
|
---|
| 80 | * value of whiteSpace is preserve; for any type derived by
|
---|
| 81 | * restriction from string the value of whiteSpace can be any of the
|
---|
| 82 | * three legal values."
|
---|
| 83 | * - From <a href="http://www.w3.org/TR/xmlschema-1/#d0e1654">XML Schema Part 1: Structures
|
---|
| 84 | * Second Edition, 3.1.4 White Space Normalization during Validation</a>:
|
---|
| 85 | * "[Definition:] The normalized value of an element or attribute
|
---|
| 86 | * information item is an initial value whose white space, if any,
|
---|
| 87 | * has been normalized according to the value of the whiteSpace facet of
|
---|
| 88 | * the simple type definition used in its validation."
|
---|
| 89 | *
|
---|
[561] | 90 | * @author Frans Englich <[email protected]>
|
---|
[2] | 91 | * @ingroup Patternist_xdm
|
---|
| 92 | * @todo Documentation is missing
|
---|
| 93 | */
|
---|
| 94 | template<TypeOfDerivedString DerivedType>
|
---|
| 95 | class DerivedString : public AtomicValue
|
---|
| 96 | {
|
---|
| 97 | private:
|
---|
| 98 | static inline ItemType::Ptr itemType()
|
---|
| 99 | {
|
---|
| 100 | switch(DerivedType)
|
---|
| 101 | {
|
---|
| 102 | case TypeNormalizedString: return BuiltinTypes::xsNormalizedString;
|
---|
| 103 | case TypeToken: return BuiltinTypes::xsToken;
|
---|
| 104 | case TypeLanguage: return BuiltinTypes::xsLanguage;
|
---|
| 105 | case TypeNMTOKEN: return BuiltinTypes::xsNMTOKEN;
|
---|
| 106 | case TypeName: return BuiltinTypes::xsName;
|
---|
| 107 | case TypeNCName: return BuiltinTypes::xsNCName;
|
---|
| 108 | case TypeID: return BuiltinTypes::xsID;
|
---|
| 109 | case TypeIDREF: return BuiltinTypes::xsIDREF;
|
---|
| 110 | case TypeENTITY: return BuiltinTypes::xsENTITY;
|
---|
| 111 | case TypeString: return BuiltinTypes::xsString;
|
---|
| 112 | }
|
---|
| 113 |
|
---|
| 114 | Q_ASSERT_X(false, Q_FUNC_INFO, "This line is not supposed to be reached.");
|
---|
| 115 | return ItemType::Ptr();
|
---|
| 116 | }
|
---|
| 117 |
|
---|
| 118 | const QString m_value;
|
---|
| 119 |
|
---|
| 120 | inline DerivedString(const QString &value) : m_value(value)
|
---|
| 121 | {
|
---|
| 122 | }
|
---|
| 123 |
|
---|
| 124 | /**
|
---|
| 125 | * @short This is an incomplete test for whether @p ch conforms to
|
---|
| 126 | * the XML 1.0 NameChar production.
|
---|
| 127 | */
|
---|
| 128 | static inline bool isNameChar(const QChar &ch)
|
---|
| 129 | {
|
---|
| 130 | return ch.isLetter() ||
|
---|
| 131 | ch.isDigit() ||
|
---|
| 132 | ch == QLatin1Char('.') ||
|
---|
| 133 | ch == QLatin1Char('-') ||
|
---|
| 134 | ch == QLatin1Char('_') ||
|
---|
| 135 | ch == QLatin1Char(':');
|
---|
| 136 | }
|
---|
| 137 |
|
---|
| 138 | /**
|
---|
| 139 | * @returns @c true if @p input is a valid @c xs:Name.
|
---|
| 140 | * @see <a href="http://www.w3.org/TR/REC-xml/#NT-Name">Extensible
|
---|
| 141 | * Markup Language (XML) 1.0 (Fourth Edition), [5] Name</a>
|
---|
| 142 | */
|
---|
| 143 | static inline bool isValidName(const QString &input)
|
---|
| 144 | {
|
---|
| 145 | if(input.isEmpty())
|
---|
| 146 | return false;
|
---|
| 147 |
|
---|
| 148 | const QChar first(input.at(0));
|
---|
| 149 |
|
---|
| 150 | if(first.isLetter() ||
|
---|
| 151 | first == QLatin1Char('_') ||
|
---|
| 152 | first == QLatin1Char(':'))
|
---|
| 153 | {
|
---|
| 154 | const int len = input.length();
|
---|
| 155 |
|
---|
| 156 | if(len == 1)
|
---|
| 157 | return true;
|
---|
| 158 |
|
---|
| 159 | /* Since we've checked the first character above, we start at
|
---|
| 160 | * position 1. */
|
---|
| 161 | for(int i = 1; i < len; ++i)
|
---|
| 162 | {
|
---|
| 163 | if(!isNameChar(input.at(i)))
|
---|
| 164 | return false;
|
---|
| 165 | }
|
---|
| 166 |
|
---|
| 167 | return true;
|
---|
| 168 | }
|
---|
| 169 | else
|
---|
| 170 | return false;
|
---|
| 171 | }
|
---|
| 172 |
|
---|
| 173 | /**
|
---|
| 174 | * @returns @c true if @p input conforms to the XML 1.0 @c Nmtoken product.
|
---|
| 175 | *
|
---|
| 176 | * @see <a
|
---|
| 177 | * href="http://www.w3.org/TR/2000/WD-xml-2e-20000814#NT-Nmtoken">Extensible
|
---|
| 178 | * Markup Language (XML) 1.0 (Second Edition), [7] Nmtoken</a>
|
---|
| 179 | */
|
---|
| 180 | static inline bool isValidNMTOKEN(const QString &input)
|
---|
| 181 | {
|
---|
| 182 | const int len = input.length();
|
---|
| 183 |
|
---|
| 184 | if(len == 0)
|
---|
| 185 | return false;
|
---|
| 186 |
|
---|
| 187 | for(int i = 0; i < len; ++i)
|
---|
| 188 | {
|
---|
| 189 | if(!isNameChar(input.at(i)))
|
---|
| 190 | return false;
|
---|
| 191 | }
|
---|
| 192 |
|
---|
| 193 | return true;
|
---|
| 194 | }
|
---|
| 195 |
|
---|
| 196 | /**
|
---|
| 197 | * @short Performs attribute value normalization as if @p input was not
|
---|
| 198 | * from a @c CDATA section.
|
---|
| 199 | *
|
---|
| 200 | * Each whitespace character in @p input that's not a space, such as tab
|
---|
| 201 | * or new line character, is replaced with a space. This algorithm
|
---|
| 202 | * differs from QString::simplified() in that it doesn't collapse
|
---|
| 203 | * subsequent whitespace characters to a single one, or remove trailing
|
---|
| 204 | * and leading space.
|
---|
| 205 | *
|
---|
| 206 | * @see <a href="http://www.w3.org/TR/REC-xml/#AVNormalize">Extensible
|
---|
| 207 | * Markup Language (XML) 1.0 (Second Edition), 3.3.3 [E70]Attribute-Value Normalization</a>
|
---|
| 208 | */
|
---|
| 209 | static QString attributeNormalize(const QString &input)
|
---|
| 210 | {
|
---|
| 211 | QString retval(input);
|
---|
| 212 | const int len = retval.length();
|
---|
| 213 | const QLatin1Char space(' ');
|
---|
| 214 |
|
---|
| 215 | for(int i = 0; i < len; ++i)
|
---|
| 216 | {
|
---|
| 217 | const QChar ati(retval.at(i));
|
---|
| 218 |
|
---|
| 219 | if(ati.isSpace() && ati != space)
|
---|
| 220 | retval[i] = space;
|
---|
| 221 | }
|
---|
| 222 |
|
---|
| 223 | return retval;
|
---|
| 224 | }
|
---|
| 225 |
|
---|
| 226 | static AtomicValue::Ptr error(const NamePool::Ptr &np, const QString &invalidValue)
|
---|
| 227 | {
|
---|
| 228 | return ValidationError::createError(QString::fromLatin1("%1 is not a valid value for "
|
---|
| 229 | "type %2.").arg(formatData(invalidValue))
|
---|
| 230 | .arg(formatType(np, itemType())));
|
---|
| 231 | }
|
---|
| 232 |
|
---|
| 233 | public:
|
---|
| 234 |
|
---|
| 235 | /**
|
---|
| 236 | * @note This function doesn't perform any cleanup/normalizaiton of @p
|
---|
| 237 | * value. @p value must be a canonical value space of the type.
|
---|
| 238 | *
|
---|
| 239 | * If you want cleanup to be performed and/or the lexical space
|
---|
| 240 | * checked, use fromLexical().
|
---|
| 241 | */
|
---|
| 242 | static AtomicValue::Ptr fromValue(const QString &value)
|
---|
| 243 | {
|
---|
| 244 | return AtomicValue::Ptr(new DerivedString(value));
|
---|
| 245 | }
|
---|
| 246 |
|
---|
| 247 | /**
|
---|
| 248 | * Constructs an instance from the lexical
|
---|
| 249 | * representation @p lexical.
|
---|
| 250 | */
|
---|
| 251 | static AtomicValue::Ptr fromLexical(const NamePool::Ptr &np, const QString &lexical)
|
---|
| 252 | {
|
---|
| 253 | switch(DerivedType)
|
---|
| 254 | {
|
---|
| 255 | case TypeString:
|
---|
| 256 | return AtomicValue::Ptr(new DerivedString(lexical));
|
---|
| 257 | case TypeNormalizedString:
|
---|
| 258 | return AtomicValue::Ptr(new DerivedString(attributeNormalize(lexical)));
|
---|
| 259 | case TypeToken:
|
---|
| 260 | return AtomicValue::Ptr(new DerivedString(lexical.simplified()));
|
---|
| 261 | case TypeLanguage:
|
---|
| 262 | {
|
---|
| 263 | const QString simplified(lexical.trimmed());
|
---|
| 264 |
|
---|
| 265 | const QRegExp validate(QLatin1String("[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*"));
|
---|
| 266 | Q_ASSERT(validate.isValid());
|
---|
| 267 |
|
---|
| 268 | if(validate.exactMatch(simplified))
|
---|
| 269 | return AtomicValue::Ptr(new DerivedString(lexical.simplified()));
|
---|
| 270 | else
|
---|
| 271 | return error(np, simplified);
|
---|
| 272 | }
|
---|
| 273 | case TypeNMTOKEN:
|
---|
| 274 | {
|
---|
| 275 | const QString trimmed(lexical.trimmed());
|
---|
| 276 |
|
---|
| 277 | if(isValidNMTOKEN(trimmed))
|
---|
| 278 | return AtomicValue::Ptr(new DerivedString(trimmed));
|
---|
| 279 | else
|
---|
| 280 | return error(np, trimmed);
|
---|
| 281 | }
|
---|
| 282 | case TypeName:
|
---|
| 283 | {
|
---|
| 284 | const QString simplified(lexical.simplified());
|
---|
| 285 |
|
---|
| 286 | if(isValidName(simplified))
|
---|
| 287 | return AtomicValue::Ptr(new DerivedString(simplified));
|
---|
| 288 | else
|
---|
| 289 | return error(np, simplified);
|
---|
| 290 | }
|
---|
| 291 | case TypeID:
|
---|
| 292 | /* Fallthrough. */
|
---|
| 293 | case TypeIDREF:
|
---|
| 294 | /* Fallthrough. */
|
---|
| 295 | case TypeENTITY:
|
---|
| 296 | /* Fallthrough. */
|
---|
| 297 | case TypeNCName:
|
---|
| 298 | {
|
---|
| 299 | /* We treat xs:ID, xs:ENTITY, xs:IDREF and xs:NCName in the exact same
|
---|
| 300 | * way, except for the type annotation.
|
---|
| 301 | *
|
---|
| 302 | * We use trimmed() instead of simplified() because it's
|
---|
| 303 | * faster and whitespace isn't allowed between
|
---|
| 304 | * non-whitespace characters anyway, for these types. */
|
---|
| 305 | const QString trimmed(lexical.trimmed());
|
---|
| 306 |
|
---|
| 307 | if(QXmlUtils::isNCName(trimmed))
|
---|
| 308 | return AtomicValue::Ptr(new DerivedString(trimmed));
|
---|
| 309 | else
|
---|
| 310 | return error(np, trimmed);
|
---|
| 311 | }
|
---|
| 312 | default:
|
---|
| 313 | {
|
---|
| 314 | Q_ASSERT_X(false, Q_FUNC_INFO, "This line is not supposed to be reached.");
|
---|
| 315 | return AtomicValue::Ptr();
|
---|
| 316 | }
|
---|
| 317 | }
|
---|
| 318 | }
|
---|
| 319 |
|
---|
| 320 | virtual QString stringValue() const
|
---|
| 321 | {
|
---|
| 322 | return m_value;
|
---|
| 323 | }
|
---|
| 324 |
|
---|
| 325 | virtual bool evaluateEBV(const QExplicitlySharedDataPointer<DynamicContext> &) const
|
---|
| 326 | {
|
---|
| 327 | return m_value.length() > 0;
|
---|
| 328 | }
|
---|
| 329 |
|
---|
| 330 | virtual ItemType::Ptr type() const
|
---|
| 331 | {
|
---|
| 332 | return itemType();
|
---|
| 333 | }
|
---|
| 334 | };
|
---|
| 335 | }
|
---|
| 336 |
|
---|
| 337 | QT_END_NAMESPACE
|
---|
| 338 |
|
---|
| 339 | QT_END_HEADER
|
---|
| 340 |
|
---|
| 341 | #endif
|
---|