1 | /****************************************************************************
|
---|
2 | **
|
---|
3 | ** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
|
---|
4 | ** All rights reserved.
|
---|
5 | ** Contact: Nokia Corporation ([email protected])
|
---|
6 | **
|
---|
7 | ** This file is part of the plugins of the Qt Toolkit.
|
---|
8 | **
|
---|
9 | ** $QT_BEGIN_LICENSE:LGPL$
|
---|
10 | ** Commercial Usage
|
---|
11 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
---|
12 | ** accordance with the Qt Commercial License Agreement provided with the
|
---|
13 | ** Software or, alternatively, in accordance with the terms contained in
|
---|
14 | ** a written agreement between you and Nokia.
|
---|
15 | **
|
---|
16 | ** GNU Lesser General Public License Usage
|
---|
17 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
---|
18 | ** General Public License version 2.1 as published by the Free Software
|
---|
19 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
---|
20 | ** packaging of this file. Please review the following information to
|
---|
21 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
---|
22 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
---|
23 | **
|
---|
24 | ** In addition, as a special exception, Nokia gives you certain additional
|
---|
25 | ** rights. These rights are described in the Nokia Qt LGPL Exception
|
---|
26 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
---|
27 | **
|
---|
28 | ** GNU General Public License Usage
|
---|
29 | ** Alternatively, this file may be used under the terms of the GNU
|
---|
30 | ** General Public License version 3.0 as published by the Free Software
|
---|
31 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
32 | ** packaging of this file. Please review the following information to
|
---|
33 | ** ensure the GNU General Public License version 3.0 requirements will be
|
---|
34 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
---|
35 | **
|
---|
36 | ** If you have questions regarding the use of this file, please contact
|
---|
37 | ** Nokia at [email protected].
|
---|
38 | ** $QT_END_LICENSE$
|
---|
39 | **
|
---|
40 | ****************************************************************************/
|
---|
41 |
|
---|
42 | // Most of the code here was originally written by Serika Kurusugawa,
|
---|
43 | // a.k.a. Junji Takagi, and is included in Qt with the author's permission
|
---|
44 | // and the grateful thanks of the Qt team.
|
---|
45 |
|
---|
46 | /*! \class QJisCodec
|
---|
47 | \reentrant
|
---|
48 | \internal
|
---|
49 | */
|
---|
50 |
|
---|
51 | #include "qjiscodec.h"
|
---|
52 | #include "qlist.h"
|
---|
53 |
|
---|
54 | QT_BEGIN_NAMESPACE
|
---|
55 |
|
---|
56 | #ifndef QT_NO_TEXTCODEC
|
---|
57 | enum {
|
---|
58 | Esc = 0x1b,
|
---|
59 | So = 0x0e, // Shift Out
|
---|
60 | Si = 0x0f, // Shift In
|
---|
61 |
|
---|
62 | ReverseSolidus = 0x5c,
|
---|
63 | YenSign = 0x5c,
|
---|
64 | Tilde = 0x7e,
|
---|
65 | Overline = 0x7e
|
---|
66 | };
|
---|
67 |
|
---|
68 | #define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf))
|
---|
69 | #define IsJisChar(c) (((c) >= 0x21) && ((c) <= 0x7e))
|
---|
70 |
|
---|
71 | #define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))
|
---|
72 |
|
---|
73 | enum Iso2022State{ Ascii, MinState = Ascii,
|
---|
74 | JISX0201_Latin, JISX0201_Kana,
|
---|
75 | JISX0208_1978, JISX0208_1983,
|
---|
76 | JISX0212, MaxState = JISX0212,
|
---|
77 | UnknownState };
|
---|
78 |
|
---|
79 | static const char Esc_CHARS[] = "()*+-./";
|
---|
80 |
|
---|
81 | static const char Esc_Ascii[] = {Esc, '(', 'B', 0 };
|
---|
82 | static const char Esc_JISX0201_Latin[] = {Esc, '(', 'J', 0 };
|
---|
83 | static const char Esc_JISX0201_Kana[] = {Esc, '(', 'I', 0 };
|
---|
84 | static const char Esc_JISX0208_1978[] = {Esc, '$', '@', 0 };
|
---|
85 | static const char Esc_JISX0208_1983[] = {Esc, '$', 'B', 0 };
|
---|
86 | static const char Esc_JISX0212[] = {Esc, '$', '(', 'D', 0 };
|
---|
87 | static const char * const Esc_SEQ[] = { Esc_Ascii,
|
---|
88 | Esc_JISX0201_Latin,
|
---|
89 | Esc_JISX0201_Kana,
|
---|
90 | Esc_JISX0208_1978,
|
---|
91 | Esc_JISX0208_1983,
|
---|
92 | Esc_JISX0212 };
|
---|
93 |
|
---|
94 | /*! \internal */
|
---|
95 | QJisCodec::QJisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default))
|
---|
96 | {
|
---|
97 | }
|
---|
98 |
|
---|
99 |
|
---|
100 | /*! \internal */
|
---|
101 | QJisCodec::~QJisCodec()
|
---|
102 | {
|
---|
103 | delete (QJpUnicodeConv*)conv;
|
---|
104 | conv = 0;
|
---|
105 | }
|
---|
106 |
|
---|
107 | QByteArray QJisCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *cs) const
|
---|
108 | {
|
---|
109 | char replacement = '?';
|
---|
110 | if (cs) {
|
---|
111 | if (cs->flags & ConvertInvalidToNull)
|
---|
112 | replacement = 0;
|
---|
113 | }
|
---|
114 | int invalid = 0;
|
---|
115 |
|
---|
116 | QByteArray result;
|
---|
117 | Iso2022State state = Ascii;
|
---|
118 | Iso2022State prev = Ascii;
|
---|
119 | for (int i = 0; i < len; i++) {
|
---|
120 | QChar ch = uc[i];
|
---|
121 | uint j;
|
---|
122 | if (ch.row() == 0x00 && ch.cell() < 0x80) {
|
---|
123 | // Ascii
|
---|
124 | if (state != JISX0201_Latin ||
|
---|
125 | ch.cell() == ReverseSolidus || ch.cell() == Tilde) {
|
---|
126 | state = Ascii;
|
---|
127 | }
|
---|
128 | j = ch.cell();
|
---|
129 | } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
|
---|
130 | if (j < 0x80) {
|
---|
131 | // JIS X 0201 Latin
|
---|
132 | if (state != Ascii ||
|
---|
133 | ch.cell() == YenSign || ch.cell() == Overline) {
|
---|
134 | state = JISX0201_Latin;
|
---|
135 | }
|
---|
136 | } else {
|
---|
137 | // JIS X 0201 Kana
|
---|
138 | state = JISX0201_Kana;
|
---|
139 | j &= 0x7f;
|
---|
140 | }
|
---|
141 | } else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) {
|
---|
142 | // JIS X 0208
|
---|
143 | state = JISX0208_1983;
|
---|
144 | } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
|
---|
145 | // JIS X 0212
|
---|
146 | state = JISX0212;
|
---|
147 | } else {
|
---|
148 | // Invalid
|
---|
149 | state = UnknownState;
|
---|
150 | j = replacement;
|
---|
151 | ++invalid;
|
---|
152 | }
|
---|
153 | if (state != prev) {
|
---|
154 | if (state == UnknownState) {
|
---|
155 | result += Esc_Ascii;
|
---|
156 | } else {
|
---|
157 | result += Esc_SEQ[state - MinState];
|
---|
158 | }
|
---|
159 | prev = state;
|
---|
160 | }
|
---|
161 | if (j < 0x0100) {
|
---|
162 | result += j & 0xff;
|
---|
163 | } else {
|
---|
164 | result += (j >> 8) & 0xff;
|
---|
165 | result += j & 0xff;
|
---|
166 | }
|
---|
167 | }
|
---|
168 | if (prev != Ascii) {
|
---|
169 | result += Esc_Ascii;
|
---|
170 | }
|
---|
171 |
|
---|
172 | if (cs) {
|
---|
173 | cs->invalidChars += invalid;
|
---|
174 | }
|
---|
175 | return result;
|
---|
176 | }
|
---|
177 |
|
---|
178 | QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const
|
---|
179 | {
|
---|
180 | uchar buf[4] = {0, 0, 0, 0};
|
---|
181 | int nbuf = 0;
|
---|
182 | Iso2022State state = Ascii, prev = Ascii;
|
---|
183 | bool esc = false;
|
---|
184 | QChar replacement = QChar::ReplacementCharacter;
|
---|
185 | if (cs) {
|
---|
186 | if (cs->flags & ConvertInvalidToNull)
|
---|
187 | replacement = QChar::Null;
|
---|
188 | nbuf = cs->remainingChars;
|
---|
189 | buf[0] = (cs->state_data[0] >> 24) & 0xff;
|
---|
190 | buf[1] = (cs->state_data[0] >> 16) & 0xff;
|
---|
191 | buf[2] = (cs->state_data[0] >> 8) & 0xff;
|
---|
192 | buf[3] = (cs->state_data[0] >> 0) & 0xff;
|
---|
193 | state = (Iso2022State)((cs->state_data[1] >> 0) & 0xff);
|
---|
194 | prev = (Iso2022State)((cs->state_data[1] >> 8) & 0xff);
|
---|
195 | esc = cs->state_data[2];
|
---|
196 | }
|
---|
197 | int invalid = 0;
|
---|
198 |
|
---|
199 | QString result;
|
---|
200 | for (int i=0; i<len; i++) {
|
---|
201 | uchar ch = chars[i];
|
---|
202 | if (esc) {
|
---|
203 | // Escape sequence
|
---|
204 | state = UnknownState;
|
---|
205 | switch (nbuf) {
|
---|
206 | case 0:
|
---|
207 | if (ch == '$' || strchr(Esc_CHARS, ch)) {
|
---|
208 | buf[nbuf++] = ch;
|
---|
209 | } else {
|
---|
210 | nbuf = 0;
|
---|
211 | esc = false;
|
---|
212 | }
|
---|
213 | break;
|
---|
214 | case 1:
|
---|
215 | if (buf[0] == '$') {
|
---|
216 | if (strchr(Esc_CHARS, ch)) {
|
---|
217 | buf[nbuf++] = ch;
|
---|
218 | } else {
|
---|
219 | switch (ch) {
|
---|
220 | case '@':
|
---|
221 | state = JISX0208_1978; // Esc $ @
|
---|
222 | break;
|
---|
223 | case 'B':
|
---|
224 | state = JISX0208_1983; // Esc $ B
|
---|
225 | break;
|
---|
226 | }
|
---|
227 | nbuf = 0;
|
---|
228 | esc = false;
|
---|
229 | }
|
---|
230 | } else {
|
---|
231 | if (buf[0] == '(') {
|
---|
232 | switch (ch) {
|
---|
233 | case 'B':
|
---|
234 | state = Ascii; // Esc (B
|
---|
235 | break;
|
---|
236 | case 'I':
|
---|
237 | state = JISX0201_Kana; // Esc (I
|
---|
238 | break;
|
---|
239 | case 'J':
|
---|
240 | state = JISX0201_Latin; // Esc (J
|
---|
241 | break;
|
---|
242 | }
|
---|
243 | }
|
---|
244 | nbuf = 0;
|
---|
245 | esc = false;
|
---|
246 | }
|
---|
247 | break;
|
---|
248 | case 2:
|
---|
249 | if (buf[1] == '(') {
|
---|
250 | switch (ch) {
|
---|
251 | case 'D':
|
---|
252 | state = JISX0212; // Esc $ (D
|
---|
253 | break;
|
---|
254 | }
|
---|
255 | }
|
---|
256 | nbuf = 0;
|
---|
257 | esc = false;
|
---|
258 | break;
|
---|
259 | }
|
---|
260 | } else {
|
---|
261 | if (ch == Esc) {
|
---|
262 | // Escape sequence
|
---|
263 | nbuf = 0;
|
---|
264 | esc = true;
|
---|
265 | } else if (ch == So) {
|
---|
266 | // Shift out
|
---|
267 | prev = state;
|
---|
268 | state = JISX0201_Kana;
|
---|
269 | nbuf = 0;
|
---|
270 | } else if (ch == Si) {
|
---|
271 | // Shift in
|
---|
272 | if (prev == Ascii || prev == JISX0201_Latin) {
|
---|
273 | state = prev;
|
---|
274 | } else {
|
---|
275 | state = Ascii;
|
---|
276 | }
|
---|
277 | nbuf = 0;
|
---|
278 | } else {
|
---|
279 | uint u;
|
---|
280 | switch (nbuf) {
|
---|
281 | case 0:
|
---|
282 | switch (state) {
|
---|
283 | case Ascii:
|
---|
284 | if (ch < 0x80) {
|
---|
285 | result += QLatin1Char(ch);
|
---|
286 | break;
|
---|
287 | }
|
---|
288 | /* fall through */
|
---|
289 | case JISX0201_Latin:
|
---|
290 | u = conv->jisx0201ToUnicode(ch);
|
---|
291 | result += QValidChar(u);
|
---|
292 | break;
|
---|
293 | case JISX0201_Kana:
|
---|
294 | u = conv->jisx0201ToUnicode(ch | 0x80);
|
---|
295 | result += QValidChar(u);
|
---|
296 | break;
|
---|
297 | case JISX0208_1978:
|
---|
298 | case JISX0208_1983:
|
---|
299 | case JISX0212:
|
---|
300 | buf[nbuf++] = ch;
|
---|
301 | break;
|
---|
302 | default:
|
---|
303 | result += QChar::ReplacementCharacter;
|
---|
304 | break;
|
---|
305 | }
|
---|
306 | break;
|
---|
307 | case 1:
|
---|
308 | switch (state) {
|
---|
309 | case JISX0208_1978:
|
---|
310 | case JISX0208_1983:
|
---|
311 | u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f);
|
---|
312 | result += QValidChar(u);
|
---|
313 | break;
|
---|
314 | case JISX0212:
|
---|
315 | u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f);
|
---|
316 | result += QValidChar(u);
|
---|
317 | break;
|
---|
318 | default:
|
---|
319 | result += replacement;
|
---|
320 | ++invalid;
|
---|
321 | break;
|
---|
322 | }
|
---|
323 | nbuf = 0;
|
---|
324 | break;
|
---|
325 | }
|
---|
326 | }
|
---|
327 | }
|
---|
328 | }
|
---|
329 |
|
---|
330 | if (cs) {
|
---|
331 | cs->remainingChars = nbuf;
|
---|
332 | cs->invalidChars += invalid;
|
---|
333 | cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3];
|
---|
334 | cs->state_data[1] = (prev << 8) + state;
|
---|
335 | cs->state_data[2] = esc;
|
---|
336 | }
|
---|
337 |
|
---|
338 | return result;
|
---|
339 | }
|
---|
340 |
|
---|
341 |
|
---|
342 |
|
---|
343 | /*! \internal */
|
---|
344 | int QJisCodec::_mibEnum()
|
---|
345 | {
|
---|
346 | return 39;
|
---|
347 | }
|
---|
348 |
|
---|
|
---|