source: trunk/src/corelib/codecs/qisciicodec.cpp@ 447

Last change on this file since 447 was 2, checked in by Dmitry A. Kuminov, 16 years ago

Initially imported qt-all-opensource-src-4.5.1 from Trolltech.

File size: 7.8 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4** Contact: Qt Software Information ([email protected])
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial Usage
10** Licensees holding valid Qt Commercial licenses may use this file in
11** accordance with the Qt Commercial License Agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Nokia.
14**
15** GNU Lesser General Public License Usage
16** Alternatively, this file may be used under the terms of the GNU Lesser
17** General Public License version 2.1 as published by the Free Software
18** Foundation and appearing in the file LICENSE.LGPL included in the
19** packaging of this file. Please review the following information to
20** ensure the GNU Lesser General Public License version 2.1 requirements
21** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22**
23** In addition, as a special exception, Nokia gives you certain
24** additional rights. These rights are described in the Nokia Qt LGPL
25** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26** package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you are unsure which license is appropriate for your use, please
37** contact the sales department at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "qisciicodec_p.h"
43#include "qlist.h"
44
45#ifndef QT_NO_CODECS
46
47QT_BEGIN_NAMESPACE
48
49/*!
50 \class QIsciiCodec
51 \brief The QIsciiCodec class provides conversion to and from the ISCII encoding.
52
53 \internal
54*/
55
56
57struct Codecs {
58 const char *name;
59 ushort base;
60};
61
62static const Codecs codecs [] = {
63 { "Iscii-Dev", 0x900 },
64 { "Iscii-Bng", 0x980 },
65 { "Iscii-Pnj", 0xa00 },
66 { "Iscii-Gjr", 0xa80 },
67 { "Iscii-Ori", 0xb00 },
68 { "Iscii-Tml", 0xb80 },
69 { "Iscii-Tlg", 0xc00 },
70 { "Iscii-Knd", 0xc80 },
71 { "Iscii-Mlm", 0xd00 }
72};
73
74QIsciiCodec::~QIsciiCodec()
75{
76}
77
78QByteArray QIsciiCodec::name() const
79{
80 return codecs[idx].name;
81}
82
83int QIsciiCodec::mibEnum() const
84{
85 /* There is no MIBEnum for Iscii */
86 return -3000-idx;
87}
88
89static const uchar inv = 0xFF;
90
91/* iscii range from 0xa0 - 0xff */
92static const uchar iscii_to_uni_table[0x60] = {
93 0x00, 0x01, 0x02, 0x03,
94 0x05, 0x06, 0x07, 0x08,
95 0x09, 0x0a, 0x0b, 0x0e,
96 0x0f, 0x20, 0x0d, 0x12,
97
98 0x13, 0x14, 0x11, 0x15,
99 0x16, 0x17, 0x18, 0x19,
100 0x1a, 0x1b, 0x1c, 0x1d,
101 0x1e, 0x1f, 0x20, 0x21,
102
103 0x22, 0x23, 0x24, 0x25,
104 0x26, 0x27, 0x28, 0x29,
105 0x2a, 0x2b, 0x2c, 0x2d,
106 0x2e, 0x2f, 0x5f, 0x30,
107
108 0x31, 0x32, 0x33, 0x34,
109 0x35, 0x36, 0x37, 0x38,
110 0x39, inv, 0x3e, 0x3f,
111 0x40, 0x41, 0x42, 0x43,
112
113 0x46, 0x47, 0x48, 0x45,
114 0x4a, 0x4b, 0x4c, 0x49,
115 0x4d, 0x3c, 0x64, 0x00,
116 0x00, 0x00, 0x00, 0x00,
117
118 0x00, 0x66, 0x67, 0x68,
119 0x69, 0x6a, 0x6b, 0x6c,
120 0x6d, 0x6e, 0x6f, 0x00,
121 0x00, 0x00, 0x00, 0x00
122};
123
124static const uchar uni_to_iscii_table[0x80] = {
125 0x00, 0xa1, 0xa2, 0xa3,
126 0x00, 0xa4, 0xa5, 0xa6,
127 0xa7, 0xa8, 0xa9, 0xaa,
128 0x00, 0xae, 0xab, 0xac,
129
130 0xad, 0xb2, 0xaf, 0xb0,
131 0xb1, 0xb3, 0xb4, 0xb5,
132 0xb6, 0xb7, 0xb8, 0xb9,
133 0xba, 0xbb, 0xbc, 0xbd,
134
135 0xbe, 0xbf, 0xc0, 0xc1,
136 0xc2, 0xc3, 0xc4, 0xc5,
137 0xc6, 0xc7, 0xc8, 0xc9,
138 0xca, 0xcb, 0xcc, 0xcd,
139
140 0xcf, 0xd0, 0xd1, 0xd2,
141 0xd3, 0xd4, 0xd5, 0xd6,
142 0xd7, 0xd8, 0x00, 0x00,
143 0xe9, 0x00, 0xda, 0xdb,
144
145 0xdc, 0xdd, 0xde, 0xdf,
146 0x00, 0xe3, 0xe0, 0xe1,
147 0xe2, 0xe7, 0xe4, 0xe5,
148 0xe6, 0xe8, 0x00, 0x00,
149
150 0x00, 0x00, 0x00, 0x00,
151 0x00, 0x00, 0x00, 0x00,
152 0x01, 0x02, 0x03, 0x04, // decomposable into the uc codes listed here + nukta
153 0x05, 0x06, 0x07, 0xce,
154
155 0x00, 0x00, 0x00, 0x00,
156 0xea, 0x08, 0xf1, 0xf2,
157 0xf3, 0xf4, 0xf5, 0xf6,
158 0xf7, 0xf8, 0xf9, 0xfa,
159
160 0x00, 0x00, 0x00, 0x00,
161 0x00, 0x00, 0x00, 0x00,
162 0x00, 0x00, 0x00, 0x00,
163 0x00, 0x00, 0x00, 0x00
164};
165
166static const uchar uni_to_iscii_pairs[] = {
167 0x00, 0x00,
168 0x15, 0x3c, // 0x958
169 0x16, 0x3c, // 0x959
170 0x17, 0x3c, // 0x95a
171 0x1c, 0x3c, // 0x95b
172 0x21, 0x3c, // 0x95c
173 0x22, 0x3c, // 0x95d
174 0x2b, 0x3c, // 0x95e
175 0x64, 0x64 // 0x965
176};
177
178
179QByteArray QIsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
180{
181 char replacement = '?';
182 bool halant = false;
183 if (state) {
184 if (state->flags & ConvertInvalidToNull)
185 replacement = 0;
186 halant = state->state_data[0];
187 }
188 int invalid = 0;
189
190 QByteArray result;
191 result.resize(2*len); //worst case
192
193 uchar *ch = reinterpret_cast<uchar *>(result.data());
194
195 const int base = codecs[idx].base;
196
197 for (int i =0; i < len; ++i) {
198 const ushort codePoint = uc[i].unicode();
199
200 /* The low 7 bits of ISCII is plain ASCII. However, we go all the
201 * way up to 0xA0 such that we can roundtrip with convertToUnicode()'s
202 * behavior. */
203 if(codePoint < 0xA0) {
204 *ch++ = static_cast<uchar>(codePoint);
205 continue;
206 }
207
208 const int pos = codePoint - base;
209 if (pos > 0 && pos < 0x80) {
210 uchar iscii = uni_to_iscii_table[pos];
211 if (iscii > 0x80) {
212 *ch++ = iscii;
213 } else if (iscii) {
214 const uchar *pair = uni_to_iscii_pairs + 2*iscii;
215 *ch++ = *pair++;
216 *ch++ = *pair++;
217 } else {
218 *ch++ = replacement;
219 ++invalid;
220 }
221 } else {
222 if (uc[i].unicode() == 0x200c) { // ZWNJ
223 if (halant)
224 // Consonant Halant ZWNJ -> Consonant Halant Halant
225 *ch++ = 0xe8;
226 } else if (uc[i].unicode() == 0x200d) { // ZWJ
227 if (halant)
228 // Consonant Halant ZWJ -> Consonant Halant Nukta
229 *ch++ = 0xe9;
230 } else {
231 *ch++ = replacement;
232 ++invalid;
233 }
234 }
235 halant = (pos == 0x4d);
236 }
237 result.truncate(ch - (uchar *)result.data());
238
239 if (state) {
240 state->invalidChars += invalid;
241 state->state_data[0] = halant;
242 }
243 return result;
244}
245
246QString QIsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
247{
248 bool halant = false;
249 if (state) {
250 halant = state->state_data[0];
251 }
252
253 QString result;
254 result.resize(len);
255 QChar *uc = result.data();
256
257 const int base = codecs[idx].base;
258
259 for (int i = 0; i < len; ++i) {
260 ushort ch = (uchar) chars[i];
261 if (ch < 0xa0)
262 *uc++ = ch;
263 else {
264 ushort c = iscii_to_uni_table[ch - 0xa0];
265 if (halant && (c == inv || c == 0xe9)) {
266 // Consonant Halant inv -> Consonant Halant ZWJ
267 // Consonant Halant Nukta -> Consonant Halant ZWJ
268 *uc++ = QChar(0x200d);
269 } else if (halant && c == 0xe8) {
270 // Consonant Halant Halant -> Consonant Halant ZWNJ
271 *uc++ = QChar(0x200c);
272 } else {
273 *uc++ = QChar(c+base);
274 }
275 }
276 halant = ((uchar)chars[i] == 0xe8);
277 }
278 result.resize(uc - result.unicode());
279
280 if (state) {
281 state->state_data[0] = halant;
282 }
283 return result;
284}
285
286QT_END_NAMESPACE
287
288#endif // QT_NO_CODECS
Note: See TracBrowser for help on using the repository browser.