| 1 | /****************************************************************************
|
|---|
| 2 | **
|
|---|
| 3 | ** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
|
|---|
| 4 | ** All rights reserved.
|
|---|
| 5 | ** Contact: Nokia Corporation ([email protected])
|
|---|
| 6 | **
|
|---|
| 7 | ** This file is part of the plugins of the Qt Toolkit.
|
|---|
| 8 | **
|
|---|
| 9 | ** $QT_BEGIN_LICENSE:LGPL$
|
|---|
| 10 | ** Commercial Usage
|
|---|
| 11 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
|---|
| 12 | ** accordance with the Qt Commercial License Agreement provided with the
|
|---|
| 13 | ** Software or, alternatively, in accordance with the terms contained in
|
|---|
| 14 | ** a written agreement between you and Nokia.
|
|---|
| 15 | **
|
|---|
| 16 | ** GNU Lesser General Public License Usage
|
|---|
| 17 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
|---|
| 18 | ** General Public License version 2.1 as published by the Free Software
|
|---|
| 19 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
|---|
| 20 | ** packaging of this file. Please review the following information to
|
|---|
| 21 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
|---|
| 22 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
|---|
| 23 | **
|
|---|
| 24 | ** In addition, as a special exception, Nokia gives you certain additional
|
|---|
| 25 | ** rights. These rights are described in the Nokia Qt LGPL Exception
|
|---|
| 26 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
|---|
| 27 | **
|
|---|
| 28 | ** GNU General Public License Usage
|
|---|
| 29 | ** Alternatively, this file may be used under the terms of the GNU
|
|---|
| 30 | ** General Public License version 3.0 as published by the Free Software
|
|---|
| 31 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
|---|
| 32 | ** packaging of this file. Please review the following information to
|
|---|
| 33 | ** ensure the GNU General Public License version 3.0 requirements will be
|
|---|
| 34 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
|---|
| 35 | **
|
|---|
| 36 | ** If you have questions regarding the use of this file, please contact
|
|---|
| 37 | ** Nokia at [email protected].
|
|---|
| 38 | ** $QT_END_LICENSE$
|
|---|
| 39 | **
|
|---|
| 40 | ****************************************************************************/
|
|---|
| 41 |
|
|---|
| 42 | // Most of the code here was originally written by Serika Kurusugawa,
|
|---|
| 43 | // a.k.a. Junji Takagi, and is included in Qt with the author's permission
|
|---|
| 44 | // and the grateful thanks of the Qt team.
|
|---|
| 45 |
|
|---|
| 46 | /*! \class QJisCodec
|
|---|
| 47 | \reentrant
|
|---|
| 48 | \internal
|
|---|
| 49 | */
|
|---|
| 50 |
|
|---|
| 51 | #include "qjiscodec.h"
|
|---|
| 52 | #include "qlist.h"
|
|---|
| 53 |
|
|---|
| 54 | QT_BEGIN_NAMESPACE
|
|---|
| 55 |
|
|---|
| 56 | #ifndef QT_NO_TEXTCODEC
|
|---|
| 57 | enum {
|
|---|
| 58 | Esc = 0x1b,
|
|---|
| 59 | So = 0x0e, // Shift Out
|
|---|
| 60 | Si = 0x0f, // Shift In
|
|---|
| 61 |
|
|---|
| 62 | ReverseSolidus = 0x5c,
|
|---|
| 63 | YenSign = 0x5c,
|
|---|
| 64 | Tilde = 0x7e,
|
|---|
| 65 | Overline = 0x7e
|
|---|
| 66 | };
|
|---|
| 67 |
|
|---|
| 68 | #define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf))
|
|---|
| 69 | #define IsJisChar(c) (((c) >= 0x21) && ((c) <= 0x7e))
|
|---|
| 70 |
|
|---|
| 71 | #define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))
|
|---|
| 72 |
|
|---|
| 73 | enum Iso2022State{ Ascii, MinState = Ascii,
|
|---|
| 74 | JISX0201_Latin, JISX0201_Kana,
|
|---|
| 75 | JISX0208_1978, JISX0208_1983,
|
|---|
| 76 | JISX0212, MaxState = JISX0212,
|
|---|
| 77 | UnknownState };
|
|---|
| 78 |
|
|---|
| 79 | static const char Esc_CHARS[] = "()*+-./";
|
|---|
| 80 |
|
|---|
| 81 | static const char Esc_Ascii[] = {Esc, '(', 'B', 0 };
|
|---|
| 82 | static const char Esc_JISX0201_Latin[] = {Esc, '(', 'J', 0 };
|
|---|
| 83 | static const char Esc_JISX0201_Kana[] = {Esc, '(', 'I', 0 };
|
|---|
| 84 | static const char Esc_JISX0208_1978[] = {Esc, '$', '@', 0 };
|
|---|
| 85 | static const char Esc_JISX0208_1983[] = {Esc, '$', 'B', 0 };
|
|---|
| 86 | static const char Esc_JISX0212[] = {Esc, '$', '(', 'D', 0 };
|
|---|
| 87 | static const char * const Esc_SEQ[] = { Esc_Ascii,
|
|---|
| 88 | Esc_JISX0201_Latin,
|
|---|
| 89 | Esc_JISX0201_Kana,
|
|---|
| 90 | Esc_JISX0208_1978,
|
|---|
| 91 | Esc_JISX0208_1983,
|
|---|
| 92 | Esc_JISX0212 };
|
|---|
| 93 |
|
|---|
| 94 | /*! \internal */
|
|---|
| 95 | QJisCodec::QJisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default))
|
|---|
| 96 | {
|
|---|
| 97 | }
|
|---|
| 98 |
|
|---|
| 99 |
|
|---|
| 100 | /*! \internal */
|
|---|
| 101 | QJisCodec::~QJisCodec()
|
|---|
| 102 | {
|
|---|
| 103 | delete (QJpUnicodeConv*)conv;
|
|---|
| 104 | conv = 0;
|
|---|
| 105 | }
|
|---|
| 106 |
|
|---|
| 107 | QByteArray QJisCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *cs) const
|
|---|
| 108 | {
|
|---|
| 109 | char replacement = '?';
|
|---|
| 110 | if (cs) {
|
|---|
| 111 | if (cs->flags & ConvertInvalidToNull)
|
|---|
| 112 | replacement = 0;
|
|---|
| 113 | }
|
|---|
| 114 | int invalid = 0;
|
|---|
| 115 |
|
|---|
| 116 | QByteArray result;
|
|---|
| 117 | Iso2022State state = Ascii;
|
|---|
| 118 | Iso2022State prev = Ascii;
|
|---|
| 119 | for (int i = 0; i < len; i++) {
|
|---|
| 120 | QChar ch = uc[i];
|
|---|
| 121 | uint j;
|
|---|
| 122 | if (ch.row() == 0x00 && ch.cell() < 0x80) {
|
|---|
| 123 | // Ascii
|
|---|
| 124 | if (state != JISX0201_Latin ||
|
|---|
| 125 | ch.cell() == ReverseSolidus || ch.cell() == Tilde) {
|
|---|
| 126 | state = Ascii;
|
|---|
| 127 | }
|
|---|
| 128 | j = ch.cell();
|
|---|
| 129 | } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
|
|---|
| 130 | if (j < 0x80) {
|
|---|
| 131 | // JIS X 0201 Latin
|
|---|
| 132 | if (state != Ascii ||
|
|---|
| 133 | ch.cell() == YenSign || ch.cell() == Overline) {
|
|---|
| 134 | state = JISX0201_Latin;
|
|---|
| 135 | }
|
|---|
| 136 | } else {
|
|---|
| 137 | // JIS X 0201 Kana
|
|---|
| 138 | state = JISX0201_Kana;
|
|---|
| 139 | j &= 0x7f;
|
|---|
| 140 | }
|
|---|
| 141 | } else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) {
|
|---|
| 142 | // JIS X 0208
|
|---|
| 143 | state = JISX0208_1983;
|
|---|
| 144 | } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
|
|---|
| 145 | // JIS X 0212
|
|---|
| 146 | state = JISX0212;
|
|---|
| 147 | } else {
|
|---|
| 148 | // Invalid
|
|---|
| 149 | state = UnknownState;
|
|---|
| 150 | j = replacement;
|
|---|
| 151 | ++invalid;
|
|---|
| 152 | }
|
|---|
| 153 | if (state != prev) {
|
|---|
| 154 | if (state == UnknownState) {
|
|---|
| 155 | result += Esc_Ascii;
|
|---|
| 156 | } else {
|
|---|
| 157 | result += Esc_SEQ[state - MinState];
|
|---|
| 158 | }
|
|---|
| 159 | prev = state;
|
|---|
| 160 | }
|
|---|
| 161 | if (j < 0x0100) {
|
|---|
| 162 | result += j & 0xff;
|
|---|
| 163 | } else {
|
|---|
| 164 | result += (j >> 8) & 0xff;
|
|---|
| 165 | result += j & 0xff;
|
|---|
| 166 | }
|
|---|
| 167 | }
|
|---|
| 168 | if (prev != Ascii) {
|
|---|
| 169 | result += Esc_Ascii;
|
|---|
| 170 | }
|
|---|
| 171 |
|
|---|
| 172 | if (cs) {
|
|---|
| 173 | cs->invalidChars += invalid;
|
|---|
| 174 | }
|
|---|
| 175 | return result;
|
|---|
| 176 | }
|
|---|
| 177 |
|
|---|
| 178 | QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const
|
|---|
| 179 | {
|
|---|
| 180 | uchar buf[4] = {0, 0, 0, 0};
|
|---|
| 181 | int nbuf = 0;
|
|---|
| 182 | Iso2022State state = Ascii, prev = Ascii;
|
|---|
| 183 | bool esc = false;
|
|---|
| 184 | QChar replacement = QChar::ReplacementCharacter;
|
|---|
| 185 | if (cs) {
|
|---|
| 186 | if (cs->flags & ConvertInvalidToNull)
|
|---|
| 187 | replacement = QChar::Null;
|
|---|
| 188 | nbuf = cs->remainingChars;
|
|---|
| 189 | buf[0] = (cs->state_data[0] >> 24) & 0xff;
|
|---|
| 190 | buf[1] = (cs->state_data[0] >> 16) & 0xff;
|
|---|
| 191 | buf[2] = (cs->state_data[0] >> 8) & 0xff;
|
|---|
| 192 | buf[3] = (cs->state_data[0] >> 0) & 0xff;
|
|---|
| 193 | state = (Iso2022State)((cs->state_data[1] >> 0) & 0xff);
|
|---|
| 194 | prev = (Iso2022State)((cs->state_data[1] >> 8) & 0xff);
|
|---|
| 195 | esc = cs->state_data[2];
|
|---|
| 196 | }
|
|---|
| 197 | int invalid = 0;
|
|---|
| 198 |
|
|---|
| 199 | QString result;
|
|---|
| 200 | for (int i=0; i<len; i++) {
|
|---|
| 201 | uchar ch = chars[i];
|
|---|
| 202 | if (esc) {
|
|---|
| 203 | // Escape sequence
|
|---|
| 204 | state = UnknownState;
|
|---|
| 205 | switch (nbuf) {
|
|---|
| 206 | case 0:
|
|---|
| 207 | if (ch == '$' || strchr(Esc_CHARS, ch)) {
|
|---|
| 208 | buf[nbuf++] = ch;
|
|---|
| 209 | } else {
|
|---|
| 210 | nbuf = 0;
|
|---|
| 211 | esc = false;
|
|---|
| 212 | }
|
|---|
| 213 | break;
|
|---|
| 214 | case 1:
|
|---|
| 215 | if (buf[0] == '$') {
|
|---|
| 216 | if (strchr(Esc_CHARS, ch)) {
|
|---|
| 217 | buf[nbuf++] = ch;
|
|---|
| 218 | } else {
|
|---|
| 219 | switch (ch) {
|
|---|
| 220 | case '@':
|
|---|
| 221 | state = JISX0208_1978; // Esc $ @
|
|---|
| 222 | break;
|
|---|
| 223 | case 'B':
|
|---|
| 224 | state = JISX0208_1983; // Esc $ B
|
|---|
| 225 | break;
|
|---|
| 226 | }
|
|---|
| 227 | nbuf = 0;
|
|---|
| 228 | esc = false;
|
|---|
| 229 | }
|
|---|
| 230 | } else {
|
|---|
| 231 | if (buf[0] == '(') {
|
|---|
| 232 | switch (ch) {
|
|---|
| 233 | case 'B':
|
|---|
| 234 | state = Ascii; // Esc (B
|
|---|
| 235 | break;
|
|---|
| 236 | case 'I':
|
|---|
| 237 | state = JISX0201_Kana; // Esc (I
|
|---|
| 238 | break;
|
|---|
| 239 | case 'J':
|
|---|
| 240 | state = JISX0201_Latin; // Esc (J
|
|---|
| 241 | break;
|
|---|
| 242 | }
|
|---|
| 243 | }
|
|---|
| 244 | nbuf = 0;
|
|---|
| 245 | esc = false;
|
|---|
| 246 | }
|
|---|
| 247 | break;
|
|---|
| 248 | case 2:
|
|---|
| 249 | if (buf[1] == '(') {
|
|---|
| 250 | switch (ch) {
|
|---|
| 251 | case 'D':
|
|---|
| 252 | state = JISX0212; // Esc $ (D
|
|---|
| 253 | break;
|
|---|
| 254 | }
|
|---|
| 255 | }
|
|---|
| 256 | nbuf = 0;
|
|---|
| 257 | esc = false;
|
|---|
| 258 | break;
|
|---|
| 259 | }
|
|---|
| 260 | } else {
|
|---|
| 261 | if (ch == Esc) {
|
|---|
| 262 | // Escape sequence
|
|---|
| 263 | nbuf = 0;
|
|---|
| 264 | esc = true;
|
|---|
| 265 | } else if (ch == So) {
|
|---|
| 266 | // Shift out
|
|---|
| 267 | prev = state;
|
|---|
| 268 | state = JISX0201_Kana;
|
|---|
| 269 | nbuf = 0;
|
|---|
| 270 | } else if (ch == Si) {
|
|---|
| 271 | // Shift in
|
|---|
| 272 | if (prev == Ascii || prev == JISX0201_Latin) {
|
|---|
| 273 | state = prev;
|
|---|
| 274 | } else {
|
|---|
| 275 | state = Ascii;
|
|---|
| 276 | }
|
|---|
| 277 | nbuf = 0;
|
|---|
| 278 | } else {
|
|---|
| 279 | uint u;
|
|---|
| 280 | switch (nbuf) {
|
|---|
| 281 | case 0:
|
|---|
| 282 | switch (state) {
|
|---|
| 283 | case Ascii:
|
|---|
| 284 | if (ch < 0x80) {
|
|---|
| 285 | result += QLatin1Char(ch);
|
|---|
| 286 | break;
|
|---|
| 287 | }
|
|---|
| 288 | /* fall through */
|
|---|
| 289 | case JISX0201_Latin:
|
|---|
| 290 | u = conv->jisx0201ToUnicode(ch);
|
|---|
| 291 | result += QValidChar(u);
|
|---|
| 292 | break;
|
|---|
| 293 | case JISX0201_Kana:
|
|---|
| 294 | u = conv->jisx0201ToUnicode(ch | 0x80);
|
|---|
| 295 | result += QValidChar(u);
|
|---|
| 296 | break;
|
|---|
| 297 | case JISX0208_1978:
|
|---|
| 298 | case JISX0208_1983:
|
|---|
| 299 | case JISX0212:
|
|---|
| 300 | buf[nbuf++] = ch;
|
|---|
| 301 | break;
|
|---|
| 302 | default:
|
|---|
| 303 | result += QChar::ReplacementCharacter;
|
|---|
| 304 | break;
|
|---|
| 305 | }
|
|---|
| 306 | break;
|
|---|
| 307 | case 1:
|
|---|
| 308 | switch (state) {
|
|---|
| 309 | case JISX0208_1978:
|
|---|
| 310 | case JISX0208_1983:
|
|---|
| 311 | u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f);
|
|---|
| 312 | result += QValidChar(u);
|
|---|
| 313 | break;
|
|---|
| 314 | case JISX0212:
|
|---|
| 315 | u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f);
|
|---|
| 316 | result += QValidChar(u);
|
|---|
| 317 | break;
|
|---|
| 318 | default:
|
|---|
| 319 | result += replacement;
|
|---|
| 320 | ++invalid;
|
|---|
| 321 | break;
|
|---|
| 322 | }
|
|---|
| 323 | nbuf = 0;
|
|---|
| 324 | break;
|
|---|
| 325 | }
|
|---|
| 326 | }
|
|---|
| 327 | }
|
|---|
| 328 | }
|
|---|
| 329 |
|
|---|
| 330 | if (cs) {
|
|---|
| 331 | cs->remainingChars = nbuf;
|
|---|
| 332 | cs->invalidChars += invalid;
|
|---|
| 333 | cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3];
|
|---|
| 334 | cs->state_data[1] = (prev << 8) + state;
|
|---|
| 335 | cs->state_data[2] = esc;
|
|---|
| 336 | }
|
|---|
| 337 |
|
|---|
| 338 | return result;
|
|---|
| 339 | }
|
|---|
| 340 |
|
|---|
| 341 |
|
|---|
| 342 |
|
|---|
| 343 | /*! \internal */
|
|---|
| 344 | int QJisCodec::_mibEnum()
|
|---|
| 345 | {
|
|---|
| 346 | return 39;
|
|---|
| 347 | }
|
|---|
| 348 |
|
|---|
| 349 | /*! \internal */
|
|---|
| 350 | QByteArray QJisCodec::_name()
|
|---|
| 351 | {
|
|---|
| 352 | return "ISO-2022-JP";
|
|---|
| 353 | }
|
|---|
| 354 |
|
|---|
| 355 | /*!
|
|---|
| 356 | Returns the codec's mime name.
|
|---|
| 357 | */
|
|---|
| 358 | QList<QByteArray> QJisCodec::_aliases()
|
|---|
| 359 | {
|
|---|
| 360 | QList<QByteArray> list;
|
|---|
| 361 | list << "JIS7"; // Qt 3 compat
|
|---|
| 362 | return list;
|
|---|
| 363 | }
|
|---|
| 364 |
|
|---|
| 365 | #endif // QT_NO_TEXTCODEC
|
|---|
| 366 |
|
|---|
| 367 | QT_END_NAMESPACE
|
|---|