| 1 | /****************************************************************************
|
|---|
| 2 | **
|
|---|
| 3 | ** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
|
|---|
| 4 | ** All rights reserved.
|
|---|
| 5 | ** Contact: Nokia Corporation ([email protected])
|
|---|
| 6 | **
|
|---|
| 7 | ** This file is part of the QtXmlPatterns module of the Qt Toolkit.
|
|---|
| 8 | **
|
|---|
| 9 | ** $QT_BEGIN_LICENSE:LGPL$
|
|---|
| 10 | ** Commercial Usage
|
|---|
| 11 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
|---|
| 12 | ** accordance with the Qt Commercial License Agreement provided with the
|
|---|
| 13 | ** Software or, alternatively, in accordance with the terms contained in
|
|---|
| 14 | ** a written agreement between you and Nokia.
|
|---|
| 15 | **
|
|---|
| 16 | ** GNU Lesser General Public License Usage
|
|---|
| 17 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
|---|
| 18 | ** General Public License version 2.1 as published by the Free Software
|
|---|
| 19 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
|---|
| 20 | ** packaging of this file. Please review the following information to
|
|---|
| 21 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
|---|
| 22 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
|---|
| 23 | **
|
|---|
| 24 | ** In addition, as a special exception, Nokia gives you certain additional
|
|---|
| 25 | ** rights. These rights are described in the Nokia Qt LGPL Exception
|
|---|
| 26 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
|---|
| 27 | **
|
|---|
| 28 | ** GNU General Public License Usage
|
|---|
| 29 | ** Alternatively, this file may be used under the terms of the GNU
|
|---|
| 30 | ** General Public License version 3.0 as published by the Free Software
|
|---|
| 31 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
|---|
| 32 | ** packaging of this file. Please review the following information to
|
|---|
| 33 | ** ensure the GNU General Public License version 3.0 requirements will be
|
|---|
| 34 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
|---|
| 35 | **
|
|---|
| 36 | ** If you have questions regarding the use of this file, please contact
|
|---|
| 37 | ** Nokia at [email protected].
|
|---|
| 38 | ** $QT_END_LICENSE$
|
|---|
| 39 | **
|
|---|
| 40 | ****************************************************************************/
|
|---|
| 41 |
|
|---|
| 42 | //
|
|---|
| 43 | // W A R N I N G
|
|---|
| 44 | // -------------
|
|---|
| 45 | //
|
|---|
| 46 | // This file is not part of the Qt API. It exists purely as an
|
|---|
| 47 | // implementation detail. This header file may change from version to
|
|---|
| 48 | // version without notice, or even be removed.
|
|---|
| 49 | //
|
|---|
| 50 | // We mean it.
|
|---|
| 51 |
|
|---|
| 52 | #ifndef Patternist_CompressedWhitespace_H
|
|---|
| 53 | #define Patternist_CompressedWhitespace_H
|
|---|
| 54 |
|
|---|
| 55 | #include <QtGlobal>
|
|---|
| 56 |
|
|---|
| 57 | QT_BEGIN_HEADER
|
|---|
| 58 |
|
|---|
| 59 | QT_BEGIN_NAMESPACE
|
|---|
| 60 |
|
|---|
| 61 | class QChar;
|
|---|
| 62 | class QString;
|
|---|
| 63 | class QStringRef;
|
|---|
| 64 |
|
|---|
| 65 | namespace QPatternist
|
|---|
| 66 | {
|
|---|
| 67 | /**
|
|---|
| 68 | * @short A compression facility for whitespace nodes.
|
|---|
| 69 | *
|
|---|
| 70 | * CompressedWhitespace compresses and decompresses strings that consists of
|
|---|
| 71 | * whitespace only, and do so with a scheme that is designed to do this
|
|---|
| 72 | * specialized task in an efficient way. The approach is simple: each
|
|---|
| 73 | * sequence of equal whitespace in the input gets coded into one byte,
|
|---|
| 74 | * where the first two bits signals the type, CharIdentifier, and the
|
|---|
| 75 | * remininding six bits is the count.
|
|---|
| 76 | *
|
|---|
| 77 | * For instance, this scheme manages to compress a sequence of spaces
|
|---|
| 78 | * followed by a new line into 16 bits(one QChar), and QString stores
|
|---|
| 79 | * strings of one QChar quite efficiently, by avoiding a heap allocation.
|
|---|
| 80 | *
|
|---|
| 81 | * There is no way to tell whether a QString is compressed or not.
|
|---|
| 82 | *
|
|---|
| 83 | * The compression scheme originates from Saxon, by Michael Kay.
|
|---|
| 84 | *
|
|---|
| 85 | * @author Frans Englich <[email protected]>
|
|---|
| 86 | */
|
|---|
| 87 | class Q_AUTOTEST_EXPORT CompressedWhitespace
|
|---|
| 88 | {
|
|---|
| 89 | public:
|
|---|
| 90 | /**
|
|---|
| 91 | * @short Compresses @p input into a compressed format, returned
|
|---|
| 92 | * as a QString.
|
|---|
| 93 | *
|
|---|
| 94 | * The caller guarantees that input is not empty
|
|---|
| 95 | * and consists only of whitespace.
|
|---|
| 96 | *
|
|---|
| 97 | * The returned format is opaque. There is no way to find out
|
|---|
| 98 | * whether a QString contains compressed data or not.
|
|---|
| 99 | *
|
|---|
| 100 | * @see decompress()
|
|---|
| 101 | */
|
|---|
| 102 | static QString compress(const QStringRef &input);
|
|---|
| 103 |
|
|---|
| 104 | /**
|
|---|
| 105 | * @short Decompresses @p input into a usual QString.
|
|---|
| 106 | *
|
|---|
| 107 | * @p input must be a QString as per returned from compress().
|
|---|
| 108 | *
|
|---|
| 109 | * @see compress()
|
|---|
| 110 | */
|
|---|
| 111 | static QString decompress(const QString &input);
|
|---|
| 112 |
|
|---|
| 113 | private:
|
|---|
| 114 | /**
|
|---|
| 115 | * We use the two upper bits for communicating what space it is.
|
|---|
| 116 | */
|
|---|
| 117 | enum CharIdentifier
|
|---|
| 118 | {
|
|---|
| 119 | Space = 0x0,
|
|---|
| 120 |
|
|---|
| 121 | /**
|
|---|
| 122 | * 0xA, \\r
|
|---|
| 123 | *
|
|---|
| 124 | * Binary: 10000000
|
|---|
| 125 | */
|
|---|
| 126 | CR = 0x80,
|
|---|
| 127 |
|
|---|
| 128 | /**
|
|---|
| 129 | * 0xD, \\n
|
|---|
| 130 | *
|
|---|
| 131 | * Binary: 01000000
|
|---|
| 132 | */
|
|---|
| 133 | LF = 0x40,
|
|---|
| 134 |
|
|---|
| 135 | /**
|
|---|
| 136 | * Binary: 11000000
|
|---|
| 137 | */
|
|---|
| 138 | Tab = 0xC0
|
|---|
| 139 | };
|
|---|
| 140 |
|
|---|
| 141 | enum Constants
|
|---|
| 142 | {
|
|---|
| 143 | /* We can at maximum store this many consecutive characters
|
|---|
| 144 | * of one type. We use 6 bits for the count. */
|
|---|
| 145 | MaxCharCount = (1 << 6) - 1,
|
|---|
| 146 |
|
|---|
|
|---|