source: trunk/src/plugins/codecs/jp/qjiscodec.cpp@ 1168

Last change on this file since 1168 was 846, checked in by Dmitry A. Kuminov, 14 years ago

trunk: Merged in qt 4.7.2 sources from branches/vendor/nokia/qt.

File size: 11.6 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
4** All rights reserved.
5** Contact: Nokia Corporation ([email protected])
6**
7** This file is part of the plugins of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial Usage
11** Licensees holding valid Qt Commercial licenses may use this file in
12** accordance with the Qt Commercial License Agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and Nokia.
15**
16** GNU Lesser General Public License Usage
17** Alternatively, this file may be used under the terms of the GNU Lesser
18** General Public License version 2.1 as published by the Free Software
19** Foundation and appearing in the file LICENSE.LGPL included in the
20** packaging of this file. Please review the following information to
21** ensure the GNU Lesser General Public License version 2.1 requirements
22** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23**
24** In addition, as a special exception, Nokia gives you certain additional
25** rights. These rights are described in the Nokia Qt LGPL Exception
26** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you have questions regarding the use of this file, please contact
37** Nokia at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42// Most of the code here was originally written by Serika Kurusugawa,
43// a.k.a. Junji Takagi, and is included in Qt with the author's permission
44// and the grateful thanks of the Qt team.
45
46/*! \class QJisCodec
47 \reentrant
48 \internal
49*/
50
51#include "qjiscodec.h"
52#include "qlist.h"
53
54QT_BEGIN_NAMESPACE
55
56#ifndef QT_NO_TEXTCODEC
57enum {
58 Esc = 0x1b,
59 So = 0x0e, // Shift Out
60 Si = 0x0f, // Shift In
61
62 ReverseSolidus = 0x5c,
63 YenSign = 0x5c,
64 Tilde = 0x7e,
65 Overline = 0x7e
66};
67
68#define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf))
69#define IsJisChar(c) (((c) >= 0x21) && ((c) <= 0x7e))
70
71#define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))
72
73enum Iso2022State{ Ascii, MinState = Ascii,
74 JISX0201_Latin, JISX0201_Kana,
75 JISX0208_1978, JISX0208_1983,
76 JISX0212, MaxState = JISX0212,
77 UnknownState };
78
79static const char Esc_CHARS[] = "()*+-./";
80
81static const char Esc_Ascii[] = {Esc, '(', 'B', 0 };
82static const char Esc_JISX0201_Latin[] = {Esc, '(', 'J', 0 };
83static const char Esc_JISX0201_Kana[] = {Esc, '(', 'I', 0 };
84static const char Esc_JISX0208_1978[] = {Esc, '$', '@', 0 };
85static const char Esc_JISX0208_1983[] = {Esc, '$', 'B', 0 };
86static const char Esc_JISX0212[] = {Esc, '$', '(', 'D', 0 };
87static const char * const Esc_SEQ[] = { Esc_Ascii,
88 Esc_JISX0201_Latin,
89 Esc_JISX0201_Kana,
90 Esc_JISX0208_1978,
91 Esc_JISX0208_1983,
92 Esc_JISX0212 };
93
94/*! \internal */
95QJisCodec::QJisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default))
96{
97}
98
99
100/*! \internal */
101QJisCodec::~QJisCodec()
102{
103 delete (QJpUnicodeConv*)conv;
104 conv = 0;
105}
106
107QByteArray QJisCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *cs) const
108{
109 char replacement = '?';
110 if (cs) {
111 if (cs->flags & ConvertInvalidToNull)
112 replacement = 0;
113 }
114 int invalid = 0;
115
116 QByteArray result;
117 Iso2022State state = Ascii;
118 Iso2022State prev = Ascii;
119 for (int i = 0; i < len; i++) {
120 QChar ch = uc[i];
121 uint j;
122 if (ch.row() == 0x00 && ch.cell() < 0x80) {
123 // Ascii
124 if (state != JISX0201_Latin ||
125 ch.cell() == ReverseSolidus || ch.cell() == Tilde) {
126 state = Ascii;
127 }
128 j = ch.cell();
129 } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
130 if (j < 0x80) {
131 // JIS X 0201 Latin
132 if (state != Ascii ||
133 ch.cell() == YenSign || ch.cell() == Overline) {
134 state = JISX0201_Latin;
135 }
136 } else {
137 // JIS X 0201 Kana
138 state = JISX0201_Kana;
139 j &= 0x7f;
140 }
141 } else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) {
142 // JIS X 0208
143 state = JISX0208_1983;
144 } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
145 // JIS X 0212
146 state = JISX0212;
147 } else {
148 // Invalid
149 state = UnknownState;
150 j = replacement;
151 ++invalid;
152 }
153 if (state != prev) {
154 if (state == UnknownState) {
155 result += Esc_Ascii;
156 } else {
157 result += Esc_SEQ[state - MinState];
158 }
159 prev = state;
160 }
161 if (j < 0x0100) {
162 result += j & 0xff;
163 } else {
164 result += (j >> 8) & 0xff;
165 result += j & 0xff;
166 }
167 }
168 if (prev != Ascii) {
169 result += Esc_Ascii;
170 }
171
172 if (cs) {
173 cs->invalidChars += invalid;
174 }
175 return result;
176}
177
178QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const
179{
180 uchar buf[4] = {0, 0, 0, 0};
181 int nbuf = 0;
182 Iso2022State state = Ascii, prev = Ascii;
183 bool esc = false;
184 QChar replacement = QChar::ReplacementCharacter;
185 if (cs) {
186 if (cs->flags & ConvertInvalidToNull)
187 replacement = QChar::Null;
188 nbuf = cs->remainingChars;
189 buf[0] = (cs->state_data[0] >> 24) & 0xff;
190 buf[1] = (cs->state_data[0] >> 16) & 0xff;
191 buf[2] = (cs->state_data[0] >> 8) & 0xff;
192 buf[3] = (cs->state_data[0] >> 0) & 0xff;
193 state = (Iso2022State)((cs->state_data[1] >> 0) & 0xff);
194 prev = (Iso2022State)((cs->state_data[1] >> 8) & 0xff);
195 esc = cs->state_data[2];
196 }
197 int invalid = 0;
198
199 QString result;
200 for (int i=0; i<len; i++) {
201 uchar ch = chars[i];
202 if (esc) {
203 // Escape sequence
204 state = UnknownState;
205 switch (nbuf) {
206 case 0:
207 if (ch == '$' || strchr(Esc_CHARS, ch)) {
208 buf[nbuf++] = ch;
209 } else {
210 nbuf = 0;
211 esc = false;
212 }
213 break;
214 case 1:
215 if (buf[0] == '$') {
216 if (strchr(Esc_CHARS, ch)) {
217 buf[nbuf++] = ch;
218 } else {
219 switch (ch) {
220 case '@':
221 state = JISX0208_1978; // Esc $ @
222 break;
223 case 'B':
224 state = JISX0208_1983; // Esc $ B
225 break;
226 }
227 nbuf = 0;
228 esc = false;
229 }
230 } else {
231 if (buf[0] == '(') {
232 switch (ch) {
233 case 'B':
234 state = Ascii; // Esc (B
235 break;
236 case 'I':
237 state = JISX0201_Kana; // Esc (I
238 break;
239 case 'J':
240 state = JISX0201_Latin; // Esc (J
241 break;
242 }
243 }
244 nbuf = 0;
245 esc = false;
246 }
247 break;
248 case 2:
249 if (buf[1] == '(') {
250 switch (ch) {
251 case 'D':
252 state = JISX0212; // Esc $ (D
253 break;
254 }
255 }
256 nbuf = 0;
257 esc = false;
258 break;
259 }
260 } else {
261 if (ch == Esc) {
262 // Escape sequence
263 nbuf = 0;
264 esc = true;
265 } else if (ch == So) {
266 // Shift out
267 prev = state;
268 state = JISX0201_Kana;
269 nbuf = 0;
270 } else if (ch == Si) {
271 // Shift in
272 if (prev == Ascii || prev == JISX0201_Latin) {
273 state = prev;
274 } else {
275 state = Ascii;
276 }
277 nbuf = 0;
278 } else {
279 uint u;
280 switch (nbuf) {
281 case 0:
282 switch (state) {
283 case Ascii:
284 if (ch < 0x80) {
285 result += QLatin1Char(ch);
286 break;
287 }
288 /* fall through */
289 case JISX0201_Latin:
290 u = conv->jisx0201ToUnicode(ch);
291 result += QValidChar(u);
292 break;
293 case JISX0201_Kana:
294 u = conv->jisx0201ToUnicode(ch | 0x80);
295 result += QValidChar(u);
296 break;
297 case JISX0208_1978:
298 case JISX0208_1983:
299 case JISX0212:
300 buf[nbuf++] = ch;
301 break;
302 default:
303 result += QChar::ReplacementCharacter;
304 break;
305 }
306 break;
307 case 1:
308 switch (state) {
309 case JISX0208_1978:
310 case JISX0208_1983:
311 u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f);
312 result += QValidChar(u);
313 break;
314 case JISX0212:
315 u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f);
316 result += QValidChar(u);
317 break;
318 default:
319 result += replacement;
320 ++invalid;
321 break;
322 }
323 nbuf = 0;
324 break;
325 }
326 }
327 }
328 }
329
330 if (cs) {
331 cs->remainingChars = nbuf;
332 cs->invalidChars += invalid;
333 cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3];
334 cs->state_data[1] = (prev << 8) + state;
335 cs->state_data[2] = esc;
336 }
337
338 return result;
339}
340
341
342
343/*! \internal */
344int QJisCodec::_mibEnum()
345{
346 return 39;
347}
348