source: trunk/src/corelib/codecs/qtsciicodec.cpp@ 5

Last change on this file since 5 was 2, checked in by Dmitry A. Kuminov, 16 years ago

Initially imported qt-all-opensource-src-4.5.1 from Trolltech.

File size: 14.9 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4** Contact: Qt Software Information ([email protected])
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial Usage
10** Licensees holding valid Qt Commercial licenses may use this file in
11** accordance with the Qt Commercial License Agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Nokia.
14**
15** GNU Lesser General Public License Usage
16** Alternatively, this file may be used under the terms of the GNU Lesser
17** General Public License version 2.1 as published by the Free Software
18** Foundation and appearing in the file LICENSE.LGPL included in the
19** packaging of this file. Please review the following information to
20** ensure the GNU Lesser General Public License version 2.1 requirements
21** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22**
23** In addition, as a special exception, Nokia gives you certain
24** additional rights. These rights are described in the Nokia Qt LGPL
25** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26** package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you are unsure which license is appropriate for your use, please
37** contact the sales department at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42// Most of the code here was originally written by Hans Petter Bieker,
43// and is included in Qt with the author's permission, and the grateful
44// thanks of the Trolltech team.
45
46#include "qtsciicodec_p.h"
47#include "qlist.h"
48
49#ifndef QT_NO_CODECS
50
51QT_BEGIN_NAMESPACE
52
53static unsigned char qt_UnicodeToTSCII(ushort u1, ushort u2, ushort u3);
54static unsigned int qt_TSCIIToUnicode(unsigned int code, uint *s);
55
56#define IsTSCIIChar(c) (((c) >= 0x80) && ((c) <= 0xfd))
57
58/*! \class QTsciiCodec
59 \reentrant
60 \internal
61*/
62
63/*!
64 Destroys the text codec object.
65*/
66QTsciiCodec::~QTsciiCodec()
67{
68}
69
70/*!
71 Converts the first \a len characters in \a uc from Unicode to this
72 encoding, and returns the result in a byte array. The \a state contains
73 some conversion flags, and is used by the codec to maintain state
74 information.
75*/
76QByteArray QTsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
77{
78 char replacement = '?';
79 if (state) {
80 if (state->flags & ConvertInvalidToNull)
81 replacement = 0;
82 }
83 int invalid = 0;
84
85 QByteArray rstr;
86 rstr.resize(len);
87 uchar* cursor = (uchar*)rstr.data();
88 for (int i = 0; i < len; i++) {
89 QChar ch = uc[i];
90 uchar j;
91 if (ch.row() == 0x00 && ch.cell() < 0x80) {
92 // ASCII
93 j = ch.cell();
94 } else if ((j = qt_UnicodeToTSCII(uc[i].unicode(),
95 uc[i + 1].unicode(),
96 uc[i + 2].unicode()))) {
97 // We have to check the combined chars first!
98 i += 2;
99 } else if ((j = qt_UnicodeToTSCII(uc[i].unicode(),
100 uc[i + 1].unicode(), 0))) {
101 i++;
102 } else if ((j = qt_UnicodeToTSCII(uc[i].unicode(), 0, 0))) {
103 } else {
104 // Error
105 j = replacement;
106 ++invalid;
107 }
108 *cursor++ = j;
109 }
110 rstr.resize(cursor - (const uchar*)rstr.constData());
111
112 if (state) {
113 state->invalidChars += invalid;
114 }
115 return rstr;
116}
117
118/*!
119 Converts the first \a len characters in \a chars from this encoding
120 to Unicode, and returns the result in a QString. The \a state contains
121 some conversion flags, and is used by the codec to maintain state
122 information.
123*/
124QString QTsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
125{
126 QChar replacement = QChar::ReplacementCharacter;
127 if (state) {
128 if (state->flags & ConvertInvalidToNull)
129 replacement = QChar::Null;
130 }
131 int invalid = 0;
132
133 QString result;
134 for (int i = 0; i < len; i++) {
135 uchar ch = chars[i];
136 if (ch < 0x80) {
137 // ASCII
138 result += QLatin1Char(ch);
139 } else if (IsTSCIIChar(ch)) {
140 // TSCII
141 uint s[3];
142 uint u = qt_TSCIIToUnicode(ch, s);
143 uint *p = s;
144 while (u--) {
145 uint c = *p++;
146 if (c)
147 result += QChar(c);
148 else {
149 result += replacement;
150 ++invalid;
151 }
152 }
153 } else {
154 // Invalid
155 result += replacement;
156 ++invalid;
157 }
158 }
159
160 if (state) {
161 state->invalidChars += invalid;
162 }
163 return result;
164}
165
166/*!
167 Returns the official name for the encoding that is handled by the codec.
168
169 \sa QTextCodec::name()
170*/
171QByteArray QTsciiCodec::name() const
172{
173 return "TSCII";
174}
175
176/*!
177 Returns the MIB enum for the encoding.
178
179 \sa QTextCodec::mibEnum()
180*/
181int QTsciiCodec::mibEnum() const
182{
183 /* There is no MIBEnum for TSCII now */
184 return -3197;
185}
186
187static const int UnToTsLast = 124; // 125 items -- so the last will be 124
188static const ushort UnToTs [][4] = {
189 // *Sorted* list of TSCII maping for unicode chars
190 //FIRST SECOND THIRD TSCII
191 {0x00A0, 0x0000, 0x0000, 0xA0},
192 {0x00A9, 0x0000, 0x0000, 0xA9},
193 {0x0B83, 0x0000, 0x0000, 0xB7},
194 {0x0B85, 0x0000, 0x0000, 0xAB},
195 {0x0B86, 0x0000, 0x0000, 0xAC},
196 {0x0B87, 0x0000, 0x0000, 0xAD},
197 {0x0B88, 0x0000, 0x0000, 0xAE},
198 {0x0B89, 0x0000, 0x0000, 0xAF},
199 {0x0B8A, 0x0000, 0x0000, 0xB0},
200 {0x0B8E, 0x0000, 0x0000, 0xB1},
201 {0x0B8F, 0x0000, 0x0000, 0xB2},
202 {0x0B90, 0x0000, 0x0000, 0xB3},
203 {0x0B92, 0x0000, 0x0000, 0xB4},
204 {0x0B93, 0x0000, 0x0000, 0xB5},
205 {0x0B94, 0x0000, 0x0000, 0xB6},
206 {0x0B95, 0x0000, 0x0000, 0xB8},
207 {0x0B95, 0x0B82, 0x0000, 0xEC},
208 {0x0B95, 0x0BC1, 0x0000, 0xCC},
209 {0x0B95, 0x0BC2, 0x0000, 0xDC},
210 {0x0B99, 0x0000, 0x0000, 0xB9},
211 {0x0B99, 0x0B82, 0x0000, 0xED},
212 {0x0B99, 0x0BC1, 0x0000, 0x99},
213 {0x0B99, 0x0BC2, 0x0000, 0x9B},
214 {0x0B9A, 0x0000, 0x0000, 0xBA},
215 {0x0B9A, 0x0B82, 0x0000, 0xEE},
216 {0x0B9A, 0x0BC1, 0x0000, 0xCD},
217 {0x0B9A, 0x0BC2, 0x0000, 0xDD},
218 {0x0B9C, 0x0000, 0x0000, 0x83},
219 {0x0B9C, 0x0B82, 0x0000, 0x88},
220 {0x0B9E, 0x0000, 0x0000, 0xBB},
221 {0x0B9E, 0x0B82, 0x0000, 0xEF},
222 {0x0B9E, 0x0BC1, 0x0000, 0x9A},
223 {0x0B9E, 0x0BC2, 0x0000, 0x9C},
224 {0x0B9F, 0x0000, 0x0000, 0xBC},
225 {0x0B9F, 0x0B82, 0x0000, 0xF0},
226 {0x0B9F, 0x0BBF, 0x0000, 0xCA},
227 {0x0B9F, 0x0BC0, 0x0000, 0xCB},
228 {0x0B9F, 0x0BC1, 0x0000, 0xCE},
229 {0x0B9F, 0x0BC2, 0x0000, 0xDE},
230 {0x0BA1, 0x0B82, 0x0000, 0xF2},
231 {0x0BA3, 0x0000, 0x0000, 0xBD},
232 {0x0BA3, 0x0B82, 0x0000, 0xF1},
233 {0x0BA3, 0x0BC1, 0x0000, 0xCF},
234 {0x0BA3, 0x0BC2, 0x0000, 0xDF},
235 {0x0BA4, 0x0000, 0x0000, 0xBE},
236 {0x0BA4, 0x0BC1, 0x0000, 0xD0},
237 {0x0BA4, 0x0BC2, 0x0000, 0xE0},
238 {0x0BA8, 0x0000, 0x0000, 0xBF},
239 {0x0BA8, 0x0B82, 0x0000, 0xF3},
240 {0x0BA8, 0x0BC1, 0x0000, 0xD1},
241 {0x0BA8, 0x0BC2, 0x0000, 0xE1},
242 {0x0BA9, 0x0000, 0x0000, 0xC9},
243 {0x0BA9, 0x0B82, 0x0000, 0xFD},
244 {0x0BA9, 0x0BC1, 0x0000, 0xDB},
245 {0x0BA9, 0x0BC2, 0x0000, 0xEB},
246 {0x0BAA, 0x0000, 0x0000, 0xC0},
247 {0x0BAA, 0x0B82, 0x0000, 0xF4},
248 {0x0BAA, 0x0BC1, 0x0000, 0xD2},
249 {0x0BAA, 0x0BC2, 0x0000, 0xE2},
250 {0x0BAE, 0x0000, 0x0000, 0xC1},
251 {0x0BAE, 0x0B82, 0x0000, 0xF5},
252 {0x0BAE, 0x0BC1, 0x0000, 0xD3},
253 {0x0BAE, 0x0BC2, 0x0000, 0xE3},
254 {0x0BAF, 0x0000, 0x0000, 0xC2},
255 {0x0BAF, 0x0B82, 0x0000, 0xF6},
256 {0x0BAF, 0x0BC1, 0x0000, 0xD4},
257 {0x0BAF, 0x0BC2, 0x0000, 0xE4},
258 {0x0BB0, 0x0000, 0x0000, 0xC3},
259 {0x0BB0, 0x0B82, 0x0000, 0xF7},
260 {0x0BB0, 0x0BC1, 0x0000, 0xD5},
261 {0x0BB0, 0x0BC2, 0x0000, 0xE5},
262 {0x0BB1, 0x0000, 0x0000, 0xC8},
263 {0x0BB1, 0x0B82, 0x0000, 0xFC},
264 {0x0BB1, 0x0BC1, 0x0000, 0xDA},
265 {0x0BB1, 0x0BC2, 0x0000, 0xEA},
266 {0x0BB2, 0x0000, 0x0000, 0xC4},
267 {0x0BB2, 0x0B82, 0x0000, 0xF8},
268 {0x0BB2, 0x0BC1, 0x0000, 0xD6},
269 {0x0BB2, 0x0BC2, 0x0000, 0xE6},
270 {0x0BB3, 0x0000, 0x0000, 0xC7},
271 {0x0BB3, 0x0B82, 0x0000, 0xFB},
272 {0x0BB3, 0x0BC1, 0x0000, 0xD9},
273 {0x0BB3, 0x0BC2, 0x0000, 0xE9},
274 {0x0BB4, 0x0000, 0x0000, 0xC6},
275 {0x0BB4, 0x0B82, 0x0000, 0xFA},
276 {0x0BB4, 0x0BC1, 0x0000, 0xD8},
277 {0x0BB4, 0x0BC2, 0x0000, 0xE8},
278 {0x0BB5, 0x0000, 0x0000, 0xC5},
279 {0x0BB5, 0x0B82, 0x0000, 0xF9},
280 {0x0BB5, 0x0BC1, 0x0000, 0xD7},
281 {0x0BB5, 0x0BC2, 0x0000, 0xE7},
282 {0x0BB7, 0x0000, 0x0000, 0x84},
283 {0x0BB7, 0x0B82, 0x0000, 0x89},
284 {0x0BB8, 0x0000, 0x0000, 0x85},
285 {0x0BB8, 0x0B82, 0x0000, 0x8A},
286 {0x0BB9, 0x0000, 0x0000, 0x86},
287 {0x0BB9, 0x0B82, 0x0000, 0x8B},
288 {0x0BBE, 0x0000, 0x0000, 0xA1},
289 {0x0BBF, 0x0000, 0x0000, 0xA2},
290 {0x0BC0, 0x0000, 0x0000, 0xA3},
291 {0x0BC1, 0x0000, 0x0000, 0xA4},
292 {0x0BC2, 0x0000, 0x0000, 0xA5},
293 {0x0BC6, 0x0000, 0x0000, 0xA6},
294 {0x0BC7, 0x0000, 0x0000, 0xA7},
295 {0x0BC8, 0x0000, 0x0000, 0xA8},
296 {0x0BCC, 0x0000, 0x0000, 0xAA},
297 {0x0BE6, 0x0000, 0x0000, 0x80},
298 {0x0BE7, 0x0000, 0x0000, 0x81},
299 {0x0BE7, 0x0BB7, 0x0000, 0x87},
300 {0x0BE7, 0x0BB7, 0x0B82, 0x8C},
301 {0x0BE8, 0x0000, 0x0000, 0x8D},
302 {0x0BE9, 0x0000, 0x0000, 0x8E},
303 {0x0BEA, 0x0000, 0x0000, 0x8F},
304 {0x0BEB, 0x0000, 0x0000, 0x90},
305 {0x0BEC, 0x0000, 0x0000, 0x95},
306 {0x0BED, 0x0000, 0x0000, 0x96},
307 {0x0BEE, 0x0000, 0x0000, 0x97},
308 {0x0BEF, 0x0000, 0x0000, 0x98},
309 {0x0BF0, 0x0000, 0x0000, 0x9D},
310 {0x0BF1, 0x0000, 0x0000, 0x9E},
311 {0x0BF2, 0x0000, 0x0000, 0x9F},
312 {0x2018, 0x0000, 0x0000, 0x91},
313 {0x2019, 0x0000, 0x0000, 0x92},
314 {0x201C, 0x0000, 0x0000, 0x93},
315 {0x201C, 0x0000, 0x0000, 0x94}
316};
317
318static const ushort TsToUn [][3] = {
319 // Starting at 0x80
320 {0x0BE6, 0x0000, 0x0000},
321 {0x0BE7, 0x0000, 0x0000},
322 {0x0000, 0x0000, 0x0000}, // unknown
323 {0x0B9C, 0x0000, 0x0000},
324 {0x0BB7, 0x0000, 0x0000},
325 {0x0BB8, 0x0000, 0x0000},
326 {0x0BB9, 0x0000, 0x0000},
327 {0x0BE7, 0x0BB7, 0x0000},
328 {0x0B9C, 0x0B82, 0x0000},
329 {0x0BB7, 0x0B82, 0x0000},
330 {0x0BB8, 0x0B82, 0x0000},
331 {0x0BB9, 0x0B82, 0x0000},
332 {0x0BE7, 0x0BB7, 0x0B82},
333 {0x0BE8, 0x0000, 0x0000},
334 {0x0BE9, 0x0000, 0x0000},
335 {0x0BEA, 0x0000, 0x0000},
336 {0x0BEB, 0x0000, 0x0000},
337 {0x2018, 0x0000, 0x0000},
338 {0x2019, 0x0000, 0x0000},
339 {0x201C, 0x0000, 0x0000},
340 {0x201C, 0x0000, 0x0000}, // two of the same??
341 {0x0BEC, 0x0000, 0x0000},
342 {0x0BED, 0x0000, 0x0000},
343 {0x0BEE, 0x0000, 0x0000},
344 {0x0BEF, 0x0000, 0x0000},
345 {0x0B99, 0x0BC1, 0x0000},
346 {0x0B9E, 0x0BC1, 0x0000},
347 {0x0B99, 0x0BC2, 0x0000},
348 {0x0B9E, 0x0BC2, 0x0000},
349 {0x0BF0, 0x0000, 0x0000},
350 {0x0BF1, 0x0000, 0x0000},
351 {0x0BF2, 0x0000, 0x0000},
352 {0x00A0, 0x0000, 0x0000},
353 {0x0BBE, 0x0000, 0x0000},
354 {0x0BBF, 0x0000, 0x0000},
355 {0x0BC0, 0x0000, 0x0000},
356 {0x0BC1, 0x0000, 0x0000},
357 {0x0BC2, 0x0000, 0x0000},
358 {0x0BC6, 0x0000, 0x0000},
359 {0x0BC7, 0x0000, 0x0000},
360 {0x0BC8, 0x0000, 0x0000},
361 {0x00A9, 0x0000, 0x0000},
362 {0x0BCC, 0x0000, 0x0000},
363 {0x0B85, 0x0000, 0x0000},
364 {0x0B86, 0x0000, 0x0000},
365 {0x0B87, 0x0000, 0x0000},
366 {0x0B88, 0x0000, 0x0000},
367 {0x0B89, 0x0000, 0x0000},
368 {0x0B8A, 0x0000, 0x0000},
369 {0x0B8E, 0x0000, 0x0000},
370 {0x0B8F, 0x0000, 0x0000},
371 {0x0B90, 0x0000, 0x0000},
372 {0x0B92, 0x0000, 0x0000},
373 {0x0B93, 0x0000, 0x0000},
374 {0x0B94, 0x0000, 0x0000},
375 {0x0B83, 0x0000, 0x0000},
376 {0x0B95, 0x0000, 0x0000},
377 {0x0B99, 0x0000, 0x0000},
378 {0x0B9A, 0x0000, 0x0000},
379 {0x0B9E, 0x0000, 0x0000},
380 {0x0B9F, 0x0000, 0x0000},
381 {0x0BA3, 0x0000, 0x0000},
382 {0x0BA4, 0x0000, 0x0000},
383 {0x0BA8, 0x0000, 0x0000},
384 {0x0BAA, 0x0000, 0x0000},
385 {0x0BAE, 0x0000, 0x0000},
386 {0x0BAF, 0x0000, 0x0000},
387 {0x0BB0, 0x0000, 0x0000},
388 {0x0BB2, 0x0000, 0x0000},
389 {0x0BB5, 0x0000, 0x0000},
390 {0x0BB4, 0x0000, 0x0000},
391 {0x0BB3, 0x0000, 0x0000},
392 {0x0BB1, 0x0000, 0x0000},
393 {0x0BA9, 0x0000, 0x0000},
394 {0x0B9F, 0x0BBF, 0x0000},
395 {0x0B9F, 0x0BC0, 0x0000},
396 {0x0B95, 0x0BC1, 0x0000},
397 {0x0B9A, 0x0BC1, 0x0000},
398 {0x0B9F, 0x0BC1, 0x0000},
399 {0x0BA3, 0x0BC1, 0x0000},
400 {0x0BA4, 0x0BC1, 0x0000},
401 {0x0BA8, 0x0BC1, 0x0000},
402 {0x0BAA, 0x0BC1, 0x0000},
403 {0x0BAE, 0x0BC1, 0x0000},
404 {0x0BAF, 0x0BC1, 0x0000},
405 {0x0BB0, 0x0BC1, 0x0000},
406 {0x0BB2, 0x0BC1, 0x0000},
407 {0x0BB5, 0x0BC1, 0x0000},
408 {0x0BB4, 0x0BC1, 0x0000},
409 {0x0BB3, 0x0BC1, 0x0000},
410 {0x0BB1, 0x0BC1, 0x0000},
411 {0x0BA9, 0x0BC1, 0x0000},
412 {0x0B95, 0x0BC2, 0x0000},
413 {0x0B9A, 0x0BC2, 0x0000},
414 {0x0B9F, 0x0BC2, 0x0000},
415 {0x0BA3, 0x0BC2, 0x0000},
416 {0x0BA4, 0x0BC2, 0x0000},
417 {0x0BA8, 0x0BC2, 0x0000},
418 {0x0BAA, 0x0BC2, 0x0000},
419 {0x0BAE, 0x0BC2, 0x0000},
420 {0x0BAF, 0x0BC2, 0x0000},
421 {0x0BB0, 0x0BC2, 0x0000},
422 {0x0BB2, 0x0BC2, 0x0000},
423 {0x0BB5, 0x0BC2, 0x0000},
424 {0x0BB4, 0x0BC2, 0x0000},
425 {0x0BB3, 0x0BC2, 0x0000},
426 {0x0BB1, 0x0BC2, 0x0000},
427 {0x0BA9, 0x0BC2, 0x0000},
428 {0x0B95, 0x0B82, 0x0000},
429 {0x0B99, 0x0B82, 0x0000},
430 {0x0B9A, 0x0B82, 0x0000},
431 {0x0B9E, 0x0B82, 0x0000},
432 {0x0B9F, 0x0B82, 0x0000},
433 {0x0BA3, 0x0B82, 0x0000},
434 {0x0BA1, 0x0B82, 0x0000},
435 {0x0BA8, 0x0B82, 0x0000},
436 {0x0BAA, 0x0B82, 0x0000},
437 {0x0BAE, 0x0B82, 0x0000},
438 {0x0BAF, 0x0B82, 0x0000},
439 {0x0BB0, 0x0B82, 0x0000},
440 {0x0BB2, 0x0B82, 0x0000},
441 {0x0BB5, 0x0B82, 0x0000},
442 {0x0BB4, 0x0B82, 0x0000},
443 {0x0BB3, 0x0B82, 0x0000},
444 {0x0BB1, 0x0B82, 0x0000},
445 {0x0BA9, 0x0B82, 0x0000}
446};
447
448static int cmp(const ushort *s1, const ushort *s2, size_t len)
449{
450 int diff = 0;
451
452 while (len-- && (diff = *s1++ - *s2++) == 0)
453 ;
454
455 return diff;
456}
457
458static unsigned char qt_UnicodeToTSCII(ushort u1, ushort u2, ushort u3)
459{
460 ushort s[3];
461 s[0] = u1;
462 s[1] = u2;
463 s[2] = u3;
464
465 int a = 0; // start pos
466 int b = UnToTsLast; // end pos
467
468 // do a binary search for the composed unicode in the list
469 while (a <= b) {
470 int w = (a + b) / 2;
471 int j = cmp(UnToTs[w], s, 3);
472
473 if (j == 0)
474 // found it
475 return UnToTs[w][3];
476
477 if (j < 0)
478 a = w + 1;
479 else
480 b = w - 1;
481 }
482
483 return 0;
484}
485
486static unsigned int qt_TSCIIToUnicode(uint code, uint *s)
487{
488 int len = 0;
489 for (int i = 0; i < 3; i++) {
490 uint u = TsToUn[code & 0x7f][i];
491 s[i] = u;
492 if (s[i]) len = i + 1;
493 }
494
495 return len;
496}
497
498QT_END_NAMESPACE
499
500#endif // QT_NO_CODECS
Note: See TracBrowser for help on using the repository browser.