1 | /****************************************************************************
|
---|
2 | **
|
---|
3 | ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
|
---|
4 | ** Contact: Qt Software Information ([email protected])
|
---|
5 | **
|
---|
6 | ** This file is part of the QtCore module of the Qt Toolkit.
|
---|
7 | **
|
---|
8 | ** $QT_BEGIN_LICENSE:LGPL$
|
---|
9 | ** Commercial Usage
|
---|
10 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
---|
11 | ** accordance with the Qt Commercial License Agreement provided with the
|
---|
12 | ** Software or, alternatively, in accordance with the terms contained in
|
---|
13 | ** a written agreement between you and Nokia.
|
---|
14 | **
|
---|
15 | ** GNU Lesser General Public License Usage
|
---|
16 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
---|
17 | ** General Public License version 2.1 as published by the Free Software
|
---|
18 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
---|
19 | ** packaging of this file. Please review the following information to
|
---|
20 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
---|
21 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
---|
22 | **
|
---|
23 | ** In addition, as a special exception, Nokia gives you certain
|
---|
24 | ** additional rights. These rights are described in the Nokia Qt LGPL
|
---|
25 | ** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
|
---|
26 | ** package.
|
---|
27 | **
|
---|
28 | ** GNU General Public License Usage
|
---|
29 | ** Alternatively, this file may be used under the terms of the GNU
|
---|
30 | ** General Public License version 3.0 as published by the Free Software
|
---|
31 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
32 | ** packaging of this file. Please review the following information to
|
---|
33 | ** ensure the GNU General Public License version 3.0 requirements will be
|
---|
34 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
---|
35 | **
|
---|
36 | ** If you are unsure which license is appropriate for your use, please
|
---|
37 | ** contact the sales department at [email protected].
|
---|
38 | ** $QT_END_LICENSE$
|
---|
39 | **
|
---|
40 | ****************************************************************************/
|
---|
41 |
|
---|
42 | #include "qisciicodec_p.h"
|
---|
43 | #include "qlist.h"
|
---|
44 |
|
---|
45 | #ifndef QT_NO_CODECS
|
---|
46 |
|
---|
47 | QT_BEGIN_NAMESPACE
|
---|
48 |
|
---|
49 | /*!
|
---|
50 | \class QIsciiCodec
|
---|
51 | \brief The QIsciiCodec class provides conversion to and from the ISCII encoding.
|
---|
52 |
|
---|
53 | \internal
|
---|
54 | */
|
---|
55 |
|
---|
56 |
|
---|
57 | struct Codecs {
|
---|
58 | const char *name;
|
---|
59 | ushort base;
|
---|
60 | };
|
---|
61 |
|
---|
62 | static const Codecs codecs [] = {
|
---|
63 | { "Iscii-Dev", 0x900 },
|
---|
64 | { "Iscii-Bng", 0x980 },
|
---|
65 | { "Iscii-Pnj", 0xa00 },
|
---|
66 | { "Iscii-Gjr", 0xa80 },
|
---|
67 | { "Iscii-Ori", 0xb00 },
|
---|
68 | { "Iscii-Tml", 0xb80 },
|
---|
69 | { "Iscii-Tlg", 0xc00 },
|
---|
70 | { "Iscii-Knd", 0xc80 },
|
---|
71 | { "Iscii-Mlm", 0xd00 }
|
---|
72 | };
|
---|
73 |
|
---|
74 | QIsciiCodec::~QIsciiCodec()
|
---|
75 | {
|
---|
76 | }
|
---|
77 |
|
---|
78 | QByteArray QIsciiCodec::name() const
|
---|
79 | {
|
---|
80 | return codecs[idx].name;
|
---|
81 | }
|
---|
82 |
|
---|
83 | int QIsciiCodec::mibEnum() const
|
---|
84 | {
|
---|
85 | /* There is no MIBEnum for Iscii */
|
---|
86 | return -3000-idx;
|
---|
87 | }
|
---|
88 |
|
---|
89 | static const uchar inv = 0xFF;
|
---|
90 |
|
---|
91 | /* iscii range from 0xa0 - 0xff */
|
---|
92 | static const uchar iscii_to_uni_table[0x60] = {
|
---|
93 | 0x00, 0x01, 0x02, 0x03,
|
---|
94 | 0x05, 0x06, 0x07, 0x08,
|
---|
95 | 0x09, 0x0a, 0x0b, 0x0e,
|
---|
96 | 0x0f, 0x20, 0x0d, 0x12,
|
---|
97 |
|
---|
98 | 0x13, 0x14, 0x11, 0x15,
|
---|
99 | 0x16, 0x17, 0x18, 0x19,
|
---|
100 | 0x1a, 0x1b, 0x1c, 0x1d,
|
---|
101 | 0x1e, 0x1f, 0x20, 0x21,
|
---|
102 |
|
---|
103 | 0x22, 0x23, 0x24, 0x25,
|
---|
104 | 0x26, 0x27, 0x28, 0x29,
|
---|
105 | 0x2a, 0x2b, 0x2c, 0x2d,
|
---|
106 | 0x2e, 0x2f, 0x5f, 0x30,
|
---|
107 |
|
---|
108 | 0x31, 0x32, 0x33, 0x34,
|
---|
109 | 0x35, 0x36, 0x37, 0x38,
|
---|
110 | 0x39, inv, 0x3e, 0x3f,
|
---|
111 | 0x40, 0x41, 0x42, 0x43,
|
---|
112 |
|
---|
113 | 0x46, 0x47, 0x48, 0x45,
|
---|
114 | 0x4a, 0x4b, 0x4c, 0x49,
|
---|
115 | 0x4d, 0x3c, 0x64, 0x00,
|
---|
116 | 0x00, 0x00, 0x00, 0x00,
|
---|
117 |
|
---|
118 | 0x00, 0x66, 0x67, 0x68,
|
---|
119 | 0x69, 0x6a, 0x6b, 0x6c,
|
---|
120 | 0x6d, 0x6e, 0x6f, 0x00,
|
---|
121 | 0x00, 0x00, 0x00, 0x00
|
---|
122 | };
|
---|
123 |
|
---|
124 | static const uchar uni_to_iscii_table[0x80] = {
|
---|
125 | 0x00, 0xa1, 0xa2, 0xa3,
|
---|
126 | 0x00, 0xa4, 0xa5, 0xa6,
|
---|
127 | 0xa7, 0xa8, 0xa9, 0xaa,
|
---|
128 | 0x00, 0xae, 0xab, 0xac,
|
---|
129 |
|
---|
130 | 0xad, 0xb2, 0xaf, 0xb0,
|
---|
131 | 0xb1, 0xb3, 0xb4, 0xb5,
|
---|
132 | 0xb6, 0xb7, 0xb8, 0xb9,
|
---|
133 | 0xba, 0xbb, 0xbc, 0xbd,
|
---|
134 |
|
---|
135 | 0xbe, 0xbf, 0xc0, 0xc1,
|
---|
136 | 0xc2, 0xc3, 0xc4, 0xc5,
|
---|
137 | 0xc6, 0xc7, 0xc8, 0xc9,
|
---|
138 | 0xca, 0xcb, 0xcc, 0xcd,
|
---|
139 |
|
---|
140 | 0xcf, 0xd0, 0xd1, 0xd2,
|
---|
141 | 0xd3, 0xd4, 0xd5, 0xd6,
|
---|
142 | 0xd7, 0xd8, 0x00, 0x00,
|
---|
143 | 0xe9, 0x00, 0xda, 0xdb,
|
---|
144 |
|
---|
145 | 0xdc, 0xdd, 0xde, 0xdf,
|
---|
146 | 0x00, 0xe3, 0xe0, 0xe1,
|
---|
147 | 0xe2, 0xe7, 0xe4, 0xe5,
|
---|
148 | 0xe6, 0xe8, 0x00, 0x00,
|
---|
149 |
|
---|
150 | 0x00, 0x00, 0x00, 0x00,
|
---|
151 | 0x00, 0x00, 0x00, 0x00,
|
---|
152 | 0x01, 0x02, 0x03, 0x04, // decomposable into the uc codes listed here + nukta
|
---|
153 | 0x05, 0x06, 0x07, 0xce,
|
---|
154 |
|
---|
155 | 0x00, 0x00, 0x00, 0x00,
|
---|
156 | 0xea, 0x08, 0xf1, 0xf2,
|
---|
157 | 0xf3, 0xf4, 0xf5, 0xf6,
|
---|
158 | 0xf7, 0xf8, 0xf9, 0xfa,
|
---|
159 |
|
---|
160 | 0x00, 0x00, 0x00, 0x00,
|
---|
161 | 0x00, 0x00, 0x00, 0x00,
|
---|
162 | 0x00, 0x00, 0x00, 0x00,
|
---|
163 | 0x00, 0x00, 0x00, 0x00
|
---|
164 | };
|
---|
165 |
|
---|
166 | static const uchar uni_to_iscii_pairs[] = {
|
---|
167 | 0x00, 0x00,
|
---|
168 | 0x15, 0x3c, // 0x958
|
---|
169 | 0x16, 0x3c, // 0x959
|
---|
170 | 0x17, 0x3c, // 0x95a
|
---|
171 | 0x1c, 0x3c, // 0x95b
|
---|
172 | 0x21, 0x3c, // 0x95c
|
---|
173 | 0x22, 0x3c, // 0x95d
|
---|
174 | 0x2b, 0x3c, // 0x95e
|
---|
175 | 0x64, 0x64 // 0x965
|
---|
176 | };
|
---|
177 |
|
---|
178 |
|
---|
179 | QByteArray QIsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
|
---|
180 | {
|
---|
181 | char replacement = '?';
|
---|
182 | bool halant = false;
|
---|
183 | if (state) {
|
---|
184 | if (state->flags & ConvertInvalidToNull)
|
---|
185 | replacement = 0;
|
---|
186 | halant = state->state_data[0];
|
---|
187 | }
|
---|
188 | int invalid = 0;
|
---|
189 |
|
---|
190 | QByteArray result;
|
---|
191 | result.resize(2*len); //worst case
|
---|
192 |
|
---|
193 | uchar *ch = reinterpret_cast<uchar *>(result.data());
|
---|
194 |
|
---|
195 | const int base = codecs[idx].base;
|
---|
196 |
|
---|
197 | for (int i =0; i < len; ++i) {
|
---|
198 | const ushort codePoint = uc[i].unicode();
|
---|
199 |
|
---|
200 | /* The low 7 bits of ISCII is plain ASCII. However, we go all the
|
---|
201 | * way up to 0xA0 such that we can roundtrip with convertToUnicode()'s
|
---|
202 | * behavior. */
|
---|
203 | if(codePoint < 0xA0) {
|
---|
204 | *ch++ = static_cast<uchar>(codePoint);
|
---|
205 | continue;
|
---|
206 | }
|
---|
207 |
|
---|
208 | const int pos = codePoint - base;
|
---|
209 | if (pos > 0 && pos < 0x80) {
|
---|
210 | uchar iscii = uni_to_iscii_table[pos];
|
---|
211 | if (iscii > 0x80) {
|
---|
212 | *ch++ = iscii;
|
---|
213 | } else if (iscii) {
|
---|
214 | const uchar *pair = uni_to_iscii_pairs + 2*iscii;
|
---|
215 | *ch++ = *pair++;
|
---|
216 | *ch++ = *pair++;
|
---|
217 | } else {
|
---|
218 | *ch++ = replacement;
|
---|
219 | ++invalid;
|
---|
220 | }
|
---|
221 | } else {
|
---|
222 | if (uc[i].unicode() == 0x200c) { // ZWNJ
|
---|
223 | if (halant)
|
---|
224 | // Consonant Halant ZWNJ -> Consonant Halant Halant
|
---|
225 | *ch++ = 0xe8;
|
---|
226 | } else if (uc[i].unicode() == 0x200d) { // ZWJ
|
---|
227 | if (halant)
|
---|
228 | // Consonant Halant ZWJ -> Consonant Halant Nukta
|
---|
229 | *ch++ = 0xe9;
|
---|
230 | } else {
|
---|
231 | *ch++ = replacement;
|
---|
232 | ++invalid;
|
---|
233 | }
|
---|
234 | }
|
---|
235 | halant = (pos == 0x4d);
|
---|
236 | }
|
---|
237 | result.truncate(ch - (uchar *)result.data());
|
---|
238 |
|
---|
239 | if (state) {
|
---|
240 | state->invalidChars += invalid;
|
---|
241 | state->state_data[0] = halant;
|
---|
242 | }
|
---|
243 | return result;
|
---|
244 | }
|
---|
245 |
|
---|
246 | QString QIsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
|
---|
247 | {
|
---|
248 | bool halant = false;
|
---|
249 | if (state) {
|
---|
250 | halant = state->state_data[0];
|
---|
251 | }
|
---|
252 |
|
---|
253 | QString result;
|
---|
254 | result.resize(len);
|
---|
255 | QChar *uc = result.data();
|
---|
256 |
|
---|
257 | const int base = codecs[idx].base;
|
---|
258 |
|
---|
259 | for (int i = 0; i < len; ++i) {
|
---|
260 | ushort ch = (uchar) chars[i];
|
---|
261 | if (ch < 0xa0)
|
---|
262 | *uc++ = ch;
|
---|
263 | else {
|
---|
264 | ushort c = iscii_to_uni_table[ch - 0xa0];
|
---|
265 | if (halant && (c == inv || c == 0xe9)) {
|
---|
266 | // Consonant Halant inv -> Consonant Halant ZWJ
|
---|
267 | // Consonant Halant Nukta -> Consonant Halant ZWJ
|
---|
268 | *uc++ = QChar(0x200d);
|
---|
269 | } else if (halant && c == 0xe8) {
|
---|
270 | // Consonant Halant Halant -> Consonant Halant ZWNJ
|
---|
271 | *uc++ = QChar(0x200c);
|
---|
272 | } else {
|
---|
273 | *uc++ = QChar(c+base);
|
---|
274 | }
|
---|
275 | }
|
---|
276 | halant = ((uchar)chars[i] == 0xe8);
|
---|
277 | }
|
---|
278 | result.resize(uc - result.unicode());
|
---|
279 |
|
---|
280 | if (state) {
|
---|
281 | state->state_data[0] = halant;
|
---|
282 | }
|
---|
283 | return result;
|
---|
284 | }
|
---|
285 |
|
---|
286 | QT_END_NAMESPACE
|
---|
287 |
|
---|
288 | #endif // QT_NO_CODECS
|
---|