1 | /****************************************************************************
|
---|
2 | **
|
---|
3 | ** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
|
---|
4 | ** All rights reserved.
|
---|
5 | ** Contact: Nokia Corporation ([email protected])
|
---|
6 | **
|
---|
7 | ** This file is part of the QtCore module of the Qt Toolkit.
|
---|
8 | **
|
---|
9 | ** $QT_BEGIN_LICENSE:LGPL$
|
---|
10 | ** Commercial Usage
|
---|
11 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
---|
12 | ** accordance with the Qt Commercial License Agreement provided with the
|
---|
13 | ** Software or, alternatively, in accordance with the terms contained in
|
---|
14 | ** a written agreement between you and Nokia.
|
---|
15 | **
|
---|
16 | ** GNU Lesser General Public License Usage
|
---|
17 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
---|
18 | ** General Public License version 2.1 as published by the Free Software
|
---|
19 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
---|
20 | ** packaging of this file. Please review the following information to
|
---|
21 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
---|
22 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
---|
23 | **
|
---|
24 | ** In addition, as a special exception, Nokia gives you certain additional
|
---|
25 | ** rights. These rights are described in the Nokia Qt LGPL Exception
|
---|
26 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
---|
27 | **
|
---|
28 | ** GNU General Public License Usage
|
---|
29 | ** Alternatively, this file may be used under the terms of the GNU
|
---|
30 | ** General Public License version 3.0 as published by the Free Software
|
---|
31 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
32 | ** packaging of this file. Please review the following information to
|
---|
33 | ** ensure the GNU General Public License version 3.0 requirements will be
|
---|
34 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
---|
35 | **
|
---|
36 | ** If you have questions regarding the use of this file, please contact
|
---|
37 | ** Nokia at [email protected].
|
---|
38 | ** $QT_END_LICENSE$
|
---|
39 | **
|
---|
40 | ****************************************************************************/
|
---|
41 |
|
---|
42 | #include "qplatformdefs.h"
|
---|
43 | #include "qtextcodec.h"
|
---|
44 | #include "qtextcodec_p.h"
|
---|
45 |
|
---|
46 | #ifndef QT_NO_TEXTCODEC
|
---|
47 |
|
---|
48 | #include "qlist.h"
|
---|
49 | #include "qfile.h"
|
---|
50 | #ifndef QT_NO_LIBRARY
|
---|
51 | # include "qcoreapplication.h"
|
---|
52 | # include "qtextcodecplugin.h"
|
---|
53 | # include "private/qfactoryloader_p.h"
|
---|
54 | #endif
|
---|
55 | #include "qstringlist.h"
|
---|
56 |
|
---|
57 | #ifdef Q_OS_UNIX
|
---|
58 | # include "qiconvcodec_p.h"
|
---|
59 | #endif
|
---|
60 |
|
---|
61 | #if defined(Q_OS_OS2)
|
---|
62 | # include <unidef.h>
|
---|
63 | # include <uconv.h>
|
---|
64 | # include "qvector.h"
|
---|
65 | #endif
|
---|
66 |
|
---|
67 | #include "qutfcodec_p.h"
|
---|
68 | #include "qsimplecodec_p.h"
|
---|
69 | #include "qlatincodec_p.h"
|
---|
70 | #ifndef QT_NO_CODECS
|
---|
71 | # include "qtsciicodec_p.h"
|
---|
72 | # include "qisciicodec_p.h"
|
---|
73 | #ifndef Q_OS_SYMBIAN
|
---|
74 | # if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
|
---|
75 | // no iconv(3) support, must build all codecs into the library
|
---|
76 | # include "../../plugins/codecs/cn/qgb18030codec.h"
|
---|
77 | # include "../../plugins/codecs/jp/qeucjpcodec.h"
|
---|
78 | # include "../../plugins/codecs/jp/qjiscodec.h"
|
---|
79 | # include "../../plugins/codecs/jp/qsjiscodec.h"
|
---|
80 | # include "../../plugins/codecs/kr/qeuckrcodec.h"
|
---|
81 | # include "../../plugins/codecs/tw/qbig5codec.h"
|
---|
82 | # endif // QT_NO_ICONV
|
---|
83 | # if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
|
---|
84 | # include "qfontlaocodec_p.h"
|
---|
85 | # include "../../plugins/codecs/jp/qfontjpcodec.h"
|
---|
86 | # endif
|
---|
87 | #endif // QT_NO_SYMBIAN
|
---|
88 | #endif // QT_NO_CODECS
|
---|
89 | #include "qlocale.h"
|
---|
90 | #include "qmutex.h"
|
---|
91 | #include "qhash.h"
|
---|
92 |
|
---|
93 | #include <stdlib.h>
|
---|
94 | #include <ctype.h>
|
---|
95 | #include <locale.h>
|
---|
96 | #if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_OSF)
|
---|
97 | #include <langinfo.h>
|
---|
98 | #endif
|
---|
99 |
|
---|
100 | #if defined(Q_OS_WINCE)
|
---|
101 | # define QT_NO_SETLOCALE
|
---|
102 | #endif
|
---|
103 |
|
---|
104 | #ifdef Q_OS_SYMBIAN
|
---|
105 | #include "qtextcodec_symbian.cpp"
|
---|
106 | #endif
|
---|
107 |
|
---|
108 |
|
---|
109 | // enabling this is not exception safe!
|
---|
110 | // #define Q_DEBUG_TEXTCODEC
|
---|
111 |
|
---|
112 | QT_BEGIN_NAMESPACE
|
---|
113 |
|
---|
114 | #if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
|
---|
115 | Q_GLOBAL_STATIC_WITH_ARGS(QFactoryLoader, loader,
|
---|
116 | (QTextCodecFactoryInterface_iid, QLatin1String("/codecs")))
|
---|
117 | #endif
|
---|
118 |
|
---|
119 | //Cache for QTextCodec::codecForName and codecForMib.
|
---|
120 | typedef QHash<QByteArray, QTextCodec *> QTextCodecCache;
|
---|
121 | Q_GLOBAL_STATIC(QTextCodecCache, qTextCodecCache)
|
---|
122 |
|
---|
123 |
|
---|
124 | static char qtolower(register char c)
|
---|
125 | { if (c >= 'A' && c <= 'Z') return c + 0x20; return c; }
|
---|
126 | static bool qisalnum(register char c)
|
---|
127 | { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
|
---|
128 |
|
---|
129 | static bool nameMatch(const QByteArray &name, const QByteArray &test)
|
---|
130 | {
|
---|
131 | // if they're the same, return a perfect score
|
---|
132 | if (qstricmp(name, test) == 0)
|
---|
133 | return true;
|
---|
134 |
|
---|
135 | const char *n = name.constData();
|
---|
136 | const char *h = test.constData();
|
---|
137 |
|
---|
138 | // if the letters and numbers are the same, we have a match
|
---|
139 | while (*n != '\0') {
|
---|
140 | if (qisalnum(*n)) {
|
---|
141 | for (;;) {
|
---|
142 | if (*h == '\0')
|
---|
143 | return false;
|
---|
144 | if (qisalnum(*h))
|
---|
145 | break;
|
---|
146 | ++h;
|
---|
147 | }
|
---|
148 | if (qtolower(*n) != qtolower(*h))
|
---|
149 | return false;
|
---|
150 | ++h;
|
---|
151 | }
|
---|
152 | ++n;
|
---|
153 | }
|
---|
154 | while (*h && !qisalnum(*h))
|
---|
155 | ++h;
|
---|
156 | return (*h == '\0');
|
---|
157 | }
|
---|
158 |
|
---|
159 |
|
---|
160 | static QTextCodec *createForName(const QByteArray &name)
|
---|
161 | {
|
---|
162 | #if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
|
---|
163 | QFactoryLoader *l = loader();
|
---|
164 | QStringList keys = l->keys();
|
---|
165 | for (int i = 0; i < keys.size(); ++i) {
|
---|
166 | if (nameMatch(name, keys.at(i).toLatin1())) {
|
---|
167 | QString realName = keys.at(i);
|
---|
168 | if (QTextCodecFactoryInterface *factory
|
---|
169 | = qobject_cast<QTextCodecFactoryInterface*>(l->instance(realName))) {
|
---|
170 | return factory->create(realName);
|
---|
171 | }
|
---|
172 | }
|
---|
173 | }
|
---|
174 | #else
|
---|
175 | Q_UNUSED(name);
|
---|
176 | #endif
|
---|
177 | return 0;
|
---|
178 | }
|
---|
179 |
|
---|
180 | static QTextCodec *createForMib(int mib)
|
---|
181 | {
|
---|
182 | #ifndef QT_NO_TEXTCODECPLUGIN
|
---|
183 | QString name = QLatin1String("MIB: ") + QString::number(mib);
|
---|
184 | if (QTextCodecFactoryInterface *factory
|
---|
185 | = qobject_cast<QTextCodecFactoryInterface*>(loader()->instance(name)))
|
---|
186 | return factory->create(name);
|
---|
187 | #else
|
---|
188 | Q_UNUSED(mib);
|
---|
189 | #endif
|
---|
190 | return 0;
|
---|
191 | }
|
---|
192 |
|
---|
193 | static QList<QTextCodec*> *all = 0;
|
---|
194 | #ifdef Q_DEBUG_TEXTCODEC
|
---|
195 | static bool destroying_is_ok = false;
|
---|
196 | #endif
|
---|
197 |
|
---|
198 | static QTextCodec *localeMapper = 0;
|
---|
199 | QTextCodec *QTextCodec::cftr = 0;
|
---|
200 |
|
---|
201 |
|
---|
202 | class QTextCodecCleanup
|
---|
203 | {
|
---|
204 | public:
|
---|
205 | ~QTextCodecCleanup();
|
---|
206 | };
|
---|
207 |
|
---|
208 | /*
|
---|
209 | Deletes all the created codecs. This destructor is called just
|
---|
210 | before exiting to delete any QTextCodec objects that may be lying
|
---|
211 | around.
|
---|
212 | */
|
---|
213 | QTextCodecCleanup::~QTextCodecCleanup()
|
---|
214 | {
|
---|
215 | if (!all)
|
---|
216 | return;
|
---|
217 |
|
---|
218 | #ifdef Q_DEBUG_TEXTCODEC
|
---|
219 | destroying_is_ok = true;
|
---|
220 | #endif
|
---|
221 |
|
---|
222 | for (QList<QTextCodec *>::const_iterator it = all->constBegin()
|
---|
223 | ; it != all->constEnd(); ++it) {
|
---|
224 | delete *it;
|
---|
225 | }
|
---|
226 | delete all;
|
---|
227 | all = 0;
|
---|
228 | localeMapper = 0;
|
---|
229 |
|
---|
230 | #ifdef Q_DEBUG_TEXTCODEC
|
---|
231 | destroying_is_ok = false;
|
---|
232 | #endif
|
---|
233 | }
|
---|
234 |
|
---|
235 | Q_GLOBAL_STATIC(QTextCodecCleanup, createQTextCodecCleanup)
|
---|
236 |
|
---|
237 | bool QTextCodec::validCodecs()
|
---|
238 | {
|
---|
239 | #ifdef Q_OS_SYMBIAN
|
---|
240 | // If we don't have a trap handler, we're outside of the main() function,
|
---|
241 | // ie. in global constructors or destructors. Don't use codecs in this
|
---|
242 | // case as it would lead to crashes because we don't have a cleanup stack on Symbian
|
---|
243 | return (User::TrapHandler() != NULL);
|
---|
244 | #else
|
---|
245 | return true;
|
---|
246 | #endif
|
---|
247 | }
|
---|
248 |
|
---|
249 |
|
---|
250 | #if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
|
---|
251 | class QWindowsLocalCodec: public QTextCodec
|
---|
252 | {
|
---|
253 | public:
|
---|
254 | QWindowsLocalCodec();
|
---|
255 | ~QWindowsLocalCodec();
|
---|
256 |
|
---|
257 | QString convertToUnicode(const char *, int, ConverterState *) const;
|
---|
258 | QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
|
---|
259 | QString convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const;
|
---|
260 |
|
---|
261 | QByteArray name() const;
|
---|
262 | int mibEnum() const;
|
---|
263 |
|
---|
264 | };
|
---|
265 |
|
---|
266 | QWindowsLocalCodec::QWindowsLocalCodec()
|
---|
267 | {
|
---|
268 | }
|
---|
269 |
|
---|
270 | QWindowsLocalCodec::~QWindowsLocalCodec()
|
---|
271 | {
|
---|
272 | }
|
---|
273 |
|
---|
274 | QString QWindowsLocalCodec::convertToUnicode(const char *chars, int length, ConverterState *state) const
|
---|
275 | {
|
---|
276 | const char *mb = chars;
|
---|
277 | int mblen = length;
|
---|
278 |
|
---|
279 | if (!mb || !mblen)
|
---|
280 | return QString();
|
---|
281 |
|
---|
282 | const int wclen_auto = 4096;
|
---|
283 | wchar_t wc_auto[wclen_auto];
|
---|
284 | int wclen = wclen_auto;
|
---|
285 | wchar_t *wc = wc_auto;
|
---|
286 | int len;
|
---|
287 | QString sp;
|
---|
288 | bool prepend = false;
|
---|
289 | char state_data = 0;
|
---|
290 | int remainingChars = 0;
|
---|
291 |
|
---|
292 | //save the current state information
|
---|
293 | if (state) {
|
---|
294 | state_data = (char)state->state_data[0];
|
---|
295 | remainingChars = state->remainingChars;
|
---|
296 | }
|
---|
297 |
|
---|
298 | //convert the pending charcter (if available)
|
---|
299 | if (state && remainingChars) {
|
---|
300 | char prev[3] = {0};
|
---|
301 | prev[0] = state_data;
|
---|
302 | prev[1] = mb[0];
|
---|
303 | remainingChars = 0;
|
---|
304 | len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
|
---|
305 | prev, 2, wc, wclen);
|
---|
306 | if (len) {
|
---|
307 | prepend = true;
|
---|
308 | sp.append(QChar(wc[0]));
|
---|
309 | mb++;
|
---|
310 | mblen--;
|
---|
311 | wc[0] = 0;
|
---|
312 | }
|
---|
313 | }
|
---|
314 |
|
---|
315 | while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
|
---|
316 | mb, mblen, wc, wclen))) {
|
---|
317 | int r = GetLastError();
|
---|
318 | if (r == ERROR_INSUFFICIENT_BUFFER) {
|
---|
319 | if (wc != wc_auto) {
|
---|
320 | qWarning("MultiByteToWideChar: Size changed");
|
---|
321 | break;
|
---|
322 | } else {
|
---|
323 | wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
|
---|
324 | mb, mblen, 0, 0);
|
---|
325 | wc = new wchar_t[wclen];
|
---|
326 | // and try again...
|
---|
327 | }
|
---|
328 | } else if (r == ERROR_NO_UNICODE_TRANSLATION) {
|
---|
329 | //find the last non NULL character
|
---|
330 | while (mblen > 1 && !(mb[mblen-1]))
|
---|
331 | mblen--;
|
---|
332 | //check whether, we hit an invalid character in the middle
|
---|
333 | if ((mblen <= 1) || (remainingChars && state_data))
|
---|
334 | return convertToUnicodeCharByChar(chars, length, state);
|
---|
335 | //Remove the last character and try again...
|
---|
336 | state_data = mb[mblen-1];
|
---|
337 | remainingChars = 1;
|
---|
338 | mblen--;
|
---|
339 | } else {
|
---|
340 | // Fail.
|
---|
341 | qWarning("MultiByteToWideChar: Cannot convert multibyte text");
|
---|
342 | break;
|
---|
343 | }
|
---|
344 | }
|
---|
345 | if (len <= 0)
|
---|
346 | return QString();
|
---|
347 | if (wc[len-1] == 0) // len - 1: we don't want terminator
|
---|
348 | --len;
|
---|
349 |
|
---|
350 | //save the new state information
|
---|
351 | if (state) {
|
---|
352 | state->state_data[0] = (char)state_data;
|
---|
353 | state->remainingChars = remainingChars;
|
---|
354 | }
|
---|
355 | QString s((QChar*)wc, len);
|
---|
356 | if (wc != wc_auto)
|
---|
357 | delete [] wc;
|
---|
358 | if (prepend) {
|
---|
359 | return sp+s;
|
---|
360 | }
|
---|
361 | return s;
|
---|
362 | }
|
---|
363 |
|
---|
364 | QString QWindowsLocalCodec::convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const
|
---|
365 | {
|
---|
366 | if (!chars || !length)
|
---|
367 | return QString();
|
---|
368 |
|
---|
369 | int copyLocation = 0;
|
---|
370 | int extra = 2;
|
---|
371 | if (state && state->remainingChars) {
|
---|
372 | copyLocation = state->remainingChars;
|
---|
373 | extra += copyLocation;
|
---|
374 | }
|
---|
375 | int newLength = length + extra;
|
---|
376 | char *mbcs = new char[newLength];
|
---|
377 | //ensure that we have a NULL terminated string
|
---|
378 | mbcs[newLength-1] = 0;
|
---|
379 | mbcs[newLength-2] = 0;
|
---|
380 | memcpy(&(mbcs[copyLocation]), chars, length);
|
---|
381 | if (copyLocation) {
|
---|
382 | //copy the last character from the state
|
---|
383 | mbcs[0] = (char)state->state_data[0];
|
---|
384 | state->remainingChars = 0;
|
---|
385 | }
|
---|
386 | const char *mb = mbcs;
|
---|
387 | #ifndef Q_OS_WINCE
|
---|
388 | const char *next = 0;
|
---|
389 | QString s;
|
---|
390 | while((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
|
---|
391 | wchar_t wc[2] ={0};
|
---|
392 | int charlength = next - mb;
|
---|
393 | int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
|
---|
394 | if (len>0) {
|
---|
395 | s.append(QChar(wc[0]));
|
---|
396 | } else {
|
---|
397 | int r = GetLastError();
|
---|
398 | //check if the character being dropped is the last character
|
---|
399 | if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) {
|
---|
400 | state->remainingChars = 1;
|
---|
401 | state->state_data[0] = (char)*mb;
|
---|
402 | }
|
---|
403 | }
|
---|
404 | mb = next;
|
---|
405 | }
|
---|
406 | #else
|
---|
407 | QString s;
|
---|
408 | int size = mbstowcs(NULL, mb, length);
|
---|
409 | if (size < 0) {
|
---|
410 | Q_ASSERT("Error in CE TextCodec");
|
---|
411 | return QString();
|
---|
412 | }
|
---|
413 | wchar_t* ws = new wchar_t[size + 2];
|
---|
414 | ws[size +1] = 0;
|
---|
415 | ws[size] = 0;
|
---|
416 | size = mbstowcs(ws, mb, length);
|
---|
417 | for (int i=0; i< size; i++)
|
---|
418 | s.append(QChar(ws[i]));
|
---|
419 | delete [] ws;
|
---|
420 | #endif
|
---|
421 | delete mbcs;
|
---|
422 | return s;
|
---|
423 | }
|
---|
424 |
|
---|
425 | QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar *ch, int uclen, ConverterState *) const
|
---|
426 | {
|
---|
427 | if (!ch)
|
---|
428 | return QByteArray();
|
---|
429 | if (uclen == 0)
|
---|
430 | return QByteArray("");
|
---|
431 | BOOL used_def;
|
---|
432 | QByteArray mb(4096, 0);
|
---|
433 | int len;
|
---|
434 | while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen,
|
---|
435 | mb.data(), mb.size()-1, 0, &used_def)))
|
---|
436 | {
|
---|
437 | int r = GetLastError();
|
---|
438 | if (r == ERROR_INSUFFICIENT_BUFFER) {
|
---|
439 | mb.resize(1+WideCharToMultiByte(CP_ACP, 0,
|
---|
440 | (const wchar_t*)ch, uclen,
|
---|
441 | 0, 0, 0, &used_def));
|
---|
442 | // and try again...
|
---|
443 | } else {
|
---|
444 | #ifndef QT_NO_DEBUG
|
---|
445 | // Fail.
|
---|
446 | qWarning("WideCharToMultiByte: Cannot convert multibyte text (error %d): %s (UTF-8)",
|
---|
447 | r, QString(ch, uclen).toLocal8Bit().data());
|
---|
448 | #endif
|
---|
449 | break;
|
---|
450 | }
|
---|
451 | }
|
---|
452 | mb.resize(len);
|
---|
453 | return mb;
|
---|
454 | }
|
---|
455 |
|
---|
456 |
|
---|
457 | QByteArray QWindowsLocalCodec::name() const
|
---|
458 | {
|
---|
459 | return "System";
|
---|
460 | }
|
---|
461 |
|
---|
462 | int QWindowsLocalCodec::mibEnum() const
|
---|
463 | {
|
---|
464 | return 0;
|
---|
465 | }
|
---|
466 |
|
---|
467 | #elif defined(Q_OS_OS2)
|
---|
468 |
|
---|
469 | class QOs2LocalCodec: public QTextCodec
|
---|
470 | {
|
---|
471 | public:
|
---|
472 | QOs2LocalCodec();
|
---|
473 | ~QOs2LocalCodec();
|
---|
474 |
|
---|
475 | QString convertToUnicode(const char *, int, ConverterState *) const;
|
---|
476 | QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
|
---|
477 |
|
---|
478 | QByteArray name() const;
|
---|
479 | int mibEnum() const;
|
---|
480 |
|
---|
481 | private:
|
---|
482 | UconvObject uoSubYes;
|
---|
483 | UconvObject uoSubNo;
|
---|
484 | };
|
---|
485 |
|
---|
486 | QOs2LocalCodec::QOs2LocalCodec() : uoSubYes(0), uoSubNo(0)
|
---|
487 | {
|
---|
488 | // create the conversion object for the process code page that performs
|
---|
489 | // substitution of invalid characters with '?'
|
---|
490 | UniCreateUconvObject((UniChar *)L"@sub=yes,subchar=\\x3F,subuni=\\x003F",
|
---|
491 | &uoSubYes);
|
---|
492 | Q_ASSERT(uoSubYes);
|
---|
493 |
|
---|
494 | // same as above but doesn't perform substitution
|
---|
495 | UniCreateUconvObject((UniChar *)L"@sub=no", &uoSubNo);
|
---|
496 | Q_ASSERT(uoSubNo);
|
---|
497 | }
|
---|
498 |
|
---|
499 | QOs2LocalCodec::~QOs2LocalCodec()
|
---|
500 | {
|
---|
501 | UniFreeUconvObject(uoSubNo);
|
---|
502 | UniFreeUconvObject(uoSubYes);
|
---|
503 | }
|
---|
504 |
|
---|
505 | static void qOs2LocalCodecStateFree(QTextCodec::ConverterState *state)
|
---|
506 | {
|
---|
507 | delete reinterpret_cast<char *>(state->d);
|
---|
508 | }
|
---|
509 |
|
---|
510 | QString QOs2LocalCodec::convertToUnicode(const char *chars, int length,
|
---|
511 | ConverterState *state) const
|
---|
512 | {
|
---|
513 | QString res;
|
---|
514 |
|
---|
515 | if (!chars)
|
---|
516 | return res;
|
---|
517 | if (!length)
|
---|
518 | return QLatin1String("");
|
---|
519 |
|
---|
520 | UconvObject uo = uoSubYes;
|
---|
521 | if (state && (state->flags & ConvertInvalidToNull))
|
---|
522 | uo = uoSubNo;
|
---|
523 |
|
---|
524 | int remainingChars = 0;
|
---|
525 | char *remainingBuffer = 0;
|
---|
526 |
|
---|
527 | if (state) {
|
---|
528 | // stateful conversion
|
---|
529 | remainingBuffer = reinterpret_cast<char *>(state->d);
|
---|
530 | if (remainingBuffer) {
|
---|
531 | // restore state
|
---|
532 | remainingChars = state->remainingChars;
|
---|
533 | } else {
|
---|
534 | // first time, add the destructor for state->d
|
---|
535 | state->flags |= FreeFunction;
|
---|
536 | QTextCodecUnalignedPointer::encode(state->state_data,
|
---|
537 | qOs2LocalCodecStateFree);
|
---|
538 | }
|
---|
539 | }
|
---|
540 |
|
---|
541 | const char *mbPtr = chars;
|
---|
542 | size_t mbLeft = length;
|
---|
543 |
|
---|
544 | QByteArray mbExtra;
|
---|
545 | if (remainingChars) {
|
---|
546 | // we have to prepend the remaining bytes from the previous conversion
|
---|
547 | mbLeft += remainingChars;
|
---|
548 | mbExtra.resize(mbLeft);
|
---|
549 | mbPtr = mbExtra.data();
|
---|
550 |
|
---|
551 | memcpy(mbExtra.data(), remainingBuffer, remainingChars);
|
---|
552 | memcpy(mbExtra.data() + remainingChars, chars, length);
|
---|
553 |
|
---|
554 | remainingBuffer = 0;
|
---|
555 | remainingChars = 0;
|
---|
556 | }
|
---|
557 |
|
---|
558 | size_t ucLen = mbLeft;
|
---|
559 | QString ucBuf(ucLen, QLatin1Char('\0'));
|
---|
560 | UniChar *ucPtr = reinterpret_cast<UniChar *>(ucBuf.data());
|
---|
561 | size_t ucLeft = ucLen;
|
---|
562 |
|
---|
563 | size_t nonIdent = 0;
|
---|
564 | int rc;
|
---|
565 |
|
---|
566 | while (mbLeft) {
|
---|
567 | rc = UniUconvToUcs(uo, (void**)&mbPtr, &mbLeft, &ucPtr, &ucLeft,
|
---|
568 | &nonIdent);
|
---|
569 | if (rc == ULS_BUFFERFULL) {
|
---|
570 | size_t ucDone = ucLen - ucLeft;
|
---|
571 | size_t mbDone = length - mbLeft;
|
---|
572 | // assume that mbLeft/ucLeft is an approximation of mbDone/ucDone
|
---|
573 | ucLen = ucDone + (mbLeft * ucDone) / mbDone;
|
---|
574 | ucBuf.resize(ucLen);
|
---|
575 | ucPtr = reinterpret_cast<UniChar *>(ucBuf.data() + ucDone);
|
---|
576 | } else if (rc == ULS_ILLEGALSEQUENCE && state) {
|
---|
577 | // conversion stopped because the remaining inBytesLeft make up
|
---|
578 | // an incomplete multi-byte sequence; save them for later
|
---|
579 | remainingBuffer = new char[mbLeft];
|
---|
580 | memcpy(remainingBuffer, mbPtr, mbLeft);
|
---|
581 | remainingChars = mbLeft;
|
---|
582 | break;
|
---|
583 | } else if (rc != ULS_SUCCESS) {
|
---|
584 | // just fail on an unexpected error (will return what we've got)
|
---|
585 | qWarning("QOs2LocalCodec::convertToUnicode: UniUconvToUcs failed "
|
---|
586 | "with %d", rc);
|
---|
587 | break;
|
---|
588 | }
|
---|
589 | }
|
---|
590 |
|
---|
591 | ucBuf.resize(ucLen - ucLeft);
|
---|
592 | res = ucBuf;
|
---|
593 |
|
---|
594 | if (state) {
|
---|
595 | // update the state
|
---|
596 | state->invalidChars = nonIdent;
|
---|
597 | state->remainingChars = remainingChars;
|
---|
598 | state->d = remainingBuffer;
|
---|
599 | }
|
---|
600 |
|
---|
601 | return res;
|
---|
602 | }
|
---|
603 |
|
---|
604 | QByteArray QOs2LocalCodec::convertFromUnicode(const QChar *uchars, int length,
|
---|
605 | ConverterState *state) const
|
---|
606 | {
|
---|
607 | QByteArray res;
|
---|
608 |
|
---|
609 | if (!uchars)
|
---|
610 | return res;
|
---|
611 | if (!length)
|
---|
612 | return QByteArray("");
|
---|
613 |
|
---|
614 | UconvObject uo = uoSubYes;
|
---|
615 | if (state && (state->flags & ConvertInvalidToNull))
|
---|
616 | uo = uoSubNo;
|
---|
617 |
|
---|
618 | const UniChar *ucPtr = reinterpret_cast<const UniChar *>(uchars);
|
---|
619 | size_t ucLeft = length;
|
---|
620 |
|
---|
621 | QVector<QChar> ucExtra;
|
---|
622 | if (state && state->remainingChars) {
|
---|
623 | // we have one surrogate char to be prepended
|
---|
624 | Q_ASSERT(state->remainingChars == 1);
|
---|
625 | ucLeft += 1;
|
---|
626 | ucExtra.resize(ucLeft);
|
---|
627 | ucPtr = reinterpret_cast<const UniChar *>(ucExtra.data());
|
---|
628 |
|
---|
629 | ucExtra[0] = state->state_data[0];
|
---|
630 | memcpy(ucExtra.data() + 1, uchars, length * sizeof(QChar));
|
---|
631 |
|
---|
632 | state->remainingChars = 0;
|
---|
633 | }
|
---|
634 |
|
---|
635 | // be optimistic (imply that one byte is necessary per every Unicode char)
|
---|
636 | size_t mbLen = length;
|
---|
637 | QByteArray mbBuf(mbLen, '\0');
|
---|
638 | char *mbPtr = mbBuf.data();
|
---|
639 | size_t mbLeft = mbLen;
|
---|
640 |
|
---|
641 | size_t nonIdent = 0;
|
---|
642 | int rc;
|
---|
643 |
|
---|
644 | while (ucLeft) {
|
---|
645 | rc = UniUconvFromUcs(uo, const_cast<UniChar **>(&ucPtr), &ucLeft,
|
---|
646 | (void**)&mbPtr, &mbLeft, &nonIdent);
|
---|
647 | if (rc == ULS_BUFFERFULL) {
|
---|
648 | size_t mbDone = mbLen - mbLeft;
|
---|
649 | size_t ucDone = length - ucLeft;
|
---|
650 | size_t newLen = mbLen;
|
---|
651 | if (ucDone) {
|
---|
652 | // assume that ucLeft/mbLeft is an approximation of ucDone/mbDone
|
---|
653 | newLen = mbDone + (ucLeft * mbDone) / ucDone;
|
---|
654 | }
|
---|
655 | if (newLen == mbLen) {
|
---|
656 | // could not process a single Unicode char, double the size
|
---|
657 | mbLen *= 2;
|
---|
658 | } else {
|
---|
659 | mbLen = newLen;
|
---|
660 | }
|
---|
661 | mbBuf.resize(mbLen);
|
---|
662 | mbPtr = mbBuf.data() + mbDone;
|
---|
663 | mbLeft = mbLen - mbDone;
|
---|
664 | } else if (rc == ULS_ILLEGALSEQUENCE && state) {
|
---|
665 | // buffer ends in a surrogate
|
---|
666 | Q_ASSERT(ucLeft == 2);
|
---|
667 | state->state_data[0] = *ucPtr;
|
---|
668 | state->remainingChars = 1;
|
---|
669 | break;
|
---|
670 | } else if (rc != ULS_SUCCESS) {
|
---|
671 | // just fail on an unexpected error (will return what we've got)
|
---|
672 | qWarning("QOs2LocalCodec::convertFromUnicode: UniUconvFromUcs failed "
|
---|
673 | "with %d", rc);
|
---|
674 | break;
|
---|
675 | }
|
---|
676 | }
|
---|
677 |
|
---|
678 | mbBuf.resize(mbLen - mbLeft);
|
---|
679 | res = mbBuf;
|
---|
680 |
|
---|
681 | if (state) {
|
---|
682 | // update the state
|
---|
683 | state->invalidChars = nonIdent;
|
---|
684 | }
|
---|
685 |
|
---|
686 | return res;
|
---|
687 | }
|
---|
688 |
|
---|
689 | QByteArray QOs2LocalCodec::name() const
|
---|
690 | {
|
---|
691 | return "System";
|
---|
692 | }
|
---|
693 |
|
---|
694 | int QOs2LocalCodec::mibEnum() const
|
---|
695 | {
|
---|
696 | return 0;
|
---|
697 | }
|
---|
698 |
|
---|
699 | #else
|
---|
700 |
|
---|
701 | /* locale names mostly copied from XFree86 */
|
---|
702 | static const char * const iso8859_2locales[] = {
|
---|
703 | "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
|
---|
704 | "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
|
---|
705 | "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
|
---|
706 | "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
|
---|
707 |
|
---|
708 | static const char * const iso8859_3locales[] = {
|
---|
709 | "eo", 0 };
|
---|
710 |
|
---|
711 | static const char * const iso8859_4locales[] = {
|
---|
712 | "ee", "ee_EE", 0 };
|
---|
713 |
|
---|
714 | static const char * const iso8859_5locales[] = {
|
---|
715 | "mk", "mk_MK", "sp", "sp_YU", 0 };
|
---|
716 |
|
---|
717 | static const char * const cp_1251locales[] = {
|
---|
718 | "be", "be_BY", "bg", "bg_BG", "bulgarian", 0 };
|
---|
719 |
|
---|
720 | static const char * const pt_154locales[] = {
|
---|
721 | "ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 };
|
---|
722 |
|
---|
723 | static const char * const iso8859_6locales[] = {
|
---|
724 | "ar_AA", "ar_SA", "arabic", 0 };
|
---|
725 |
|
---|
726 | static const char * const iso8859_7locales[] = {
|
---|
727 | "el", "el_GR", "greek", 0 };
|
---|
728 |
|
---|
729 | static const char * const iso8859_8locales[] = {
|
---|
730 | "hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
|
---|
731 |
|
---|
732 | static const char * const iso8859_9locales[] = {
|
---|
733 | "tr", "tr_TR", "turkish", 0 };
|
---|
734 |
|
---|
735 | static const char * const iso8859_13locales[] = {
|
---|
736 | "lt", "lt_LT", "lv", "lv_LV", 0 };
|
---|
737 |
|
---|
738 | static const char * const iso8859_15locales[] = {
|
---|
739 | "et", "et_EE",
|
---|
740 | // Euro countries
|
---|
741 | "br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE",
|
---|
742 | "es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR",
|
---|
743 | "fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR",
|
---|
744 | "nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE",
|
---|
745 | 0 };
|
---|
746 |
|
---|
747 | static const char * const koi8_ulocales[] = {
|
---|
748 | "uk", "uk_UA", "ru_UA", "ukrainian", 0 };
|
---|
749 |
|
---|
750 | static const char * const tis_620locales[] = {
|
---|
751 | "th", "th_TH", "thai", 0 };
|
---|
752 |
|
---|
753 | // static const char * const tcvnlocales[] = {
|
---|
754 | // "vi", "vi_VN", 0 };
|
---|
755 |
|
---|
756 | static bool try_locale_list(const char * const locale[], const QByteArray &lang)
|
---|
757 | {
|
---|
758 | int i;
|
---|
759 | for(i=0; locale[i] && lang != locale[i]; i++)
|
---|
760 | ;
|
---|
761 | return locale[i] != 0;
|
---|
762 | }
|
---|
763 |
|
---|
764 | // For the probably_koi8_locales we have to look. the standard says
|
---|
765 | // these are 8859-5, but almost all Russian users use KOI8-R and
|
---|
766 | // incorrectly set $LANG to ru_RU. We'll check tolower() to see what
|
---|
767 | // it thinks ru_RU means.
|
---|
768 |
|
---|
769 | // If you read the history, it seems that many Russians blame ISO and
|
---|
770 | // Perestroika for the confusion.
|
---|
771 | //
|
---|
772 | // The real bug is that some programs break if the user specifies
|
---|
773 | // ru_RU.KOI8-R.
|
---|
774 |
|
---|
775 | static const char * const probably_koi8_rlocales[] = {
|
---|
776 | "ru", "ru_SU", "ru_RU", "russian", 0 };
|
---|
777 |
|
---|
778 | static QTextCodec * ru_RU_hack(const char * i) {
|
---|
779 | #if defined(Q_OS_OS2)
|
---|
780 | // @todo temporary hack. the proper one is to use the current process'
|
---|
781 | // code page if LANG or its codepage part is missing
|
---|
782 | return QTextCodec::codecForName("cp866");
|
---|
783 | #else
|
---|
784 | QTextCodec * ru_RU_codec = 0;
|
---|
785 |
|
---|
786 | #if !defined(QT_NO_SETLOCALE)
|
---|
787 | QByteArray origlocale(setlocale(LC_CTYPE, i));
|
---|
788 | #else
|
---|
789 | QByteArray origlocale(i);
|
---|
790 | #endif
|
---|
791 | // unicode koi8r latin5 name
|
---|
792 | // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
|
---|
793 | // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
|
---|
794 | int latin5 = tolower(0xCE);
|
---|
795 | int koi8r = tolower(0xE0);
|
---|
796 | if (koi8r == 0xC0 && latin5 != 0xEE) {
|
---|
797 | ru_RU_codec = QTextCodec::codecForName("KOI8-R");
|
---|
798 | } else if (koi8r != 0xC0 && latin5 == 0xEE) {
|
---|
799 | ru_RU_codec = QTextCodec::codecForName("ISO 8859-5");
|
---|
800 | } else {
|
---|
801 | // something else again... let's assume... *throws dice*
|
---|
802 | ru_RU_codec = QTextCodec::codecForName("KOI8-R");
|
---|
803 | qWarning("QTextCodec: Using KOI8-R, probe failed (%02x %02x %s)",
|
---|
804 | koi8r, latin5, i);
|
---|
805 | }
|
---|
806 | #if !defined(QT_NO_SETLOCALE)
|
---|
807 | setlocale(LC_CTYPE, origlocale);
|
---|
808 | #endif
|
---|
809 |
|
---|
810 | return ru_RU_codec;
|
---|
811 | #endif // defined(Q_OS_OS2)
|
---|
812 | }
|
---|
813 |
|
---|
814 | #endif
|
---|
815 |
|
---|
816 | #if !defined(Q_OS_WIN32) && !defined(Q_OS_WINCE) && !defined(Q_OS_OS2)
|
---|
817 | static QTextCodec *checkForCodec(const QByteArray &name) {
|
---|
818 | QTextCodec *c = QTextCodec::codecForName(name);
|
---|
819 | if (!c) {
|
---|
820 | const int index = name.indexOf('@');
|
---|
821 | if (index != -1) {
|
---|
822 | c = QTextCodec::codecForName(name.left(index));
|
---|
823 | }
|
---|
824 | }
|
---|
825 | return c;
|
---|
826 | }
|
---|
827 | #endif
|
---|
828 |
|
---|
829 | /* the next two functions are implicitely thread safe,
|
---|
830 | as they are only called by setup() which uses a mutex.
|
---|
831 | */
|
---|
832 | static void setupLocaleMapper()
|
---|
833 | {
|
---|
834 | #ifdef Q_OS_SYMBIAN
|
---|
835 | localeMapper = QSymbianTextCodec::localeMapper;
|
---|
836 | if (localeMapper)
|
---|
837 | return;
|
---|
838 | #endif
|
---|
839 |
|
---|
840 | #if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
|
---|
841 | localeMapper = QTextCodec::codecForName("System");
|
---|
842 | #elif defined(Q_OS_OS2)
|
---|
843 | localeMapper = QTextCodec::codecForName("System");
|
---|
844 | #else
|
---|
845 |
|
---|
846 | #ifndef QT_NO_ICONV
|
---|
847 | localeMapper = QTextCodec::codecForName("System");
|
---|
848 | #endif
|
---|
849 |
|
---|
850 | #if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_OSF)
|
---|
851 | if (!localeMapper) {
|
---|
852 | char *charset = nl_langinfo (CODESET);
|
---|
853 | if (charset)
|
---|
854 | localeMapper = QTextCodec::codecForName(charset);
|
---|
855 | }
|
---|
856 | #endif
|
---|
857 |
|
---|
858 | if (!localeMapper) {
|
---|
859 | // Very poorly defined and followed standards causes lots of
|
---|
860 | // code to try to get all the cases... This logic is
|
---|
861 | // duplicated in QIconvCodec, so if you change it here, change
|
---|
862 | // it there too.
|
---|
863 |
|
---|
864 | // Try to determine locale codeset from locale name assigned to
|
---|
865 | // LC_CTYPE category.
|
---|
866 |
|
---|
867 | // First part is getting that locale name. First try setlocale() which
|
---|
868 | // definitely knows it, but since we cannot fully trust it, get ready
|
---|
869 | // to fall back to environment variables.
|
---|
870 | #if !defined(QT_NO_SETLOCALE)
|
---|
871 | const QByteArray ctype = setlocale(LC_CTYPE, 0);
|
---|
872 | #else
|
---|
873 | const QByteArray ctype;
|
---|
874 | #endif
|
---|
875 |
|
---|
876 | // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
|
---|
877 | // environment variables.
|
---|
878 | QByteArray lang = qgetenv("LC_ALL");
|
---|
879 | if (lang.isEmpty() || lang == "C") {
|
---|
880 | lang = qgetenv("LC_CTYPE");
|
---|
881 | }
|
---|
882 | if (lang.isEmpty() || lang == "C") {
|
---|
883 | lang = qgetenv("LANG");
|
---|
884 | }
|
---|
885 |
|
---|
886 | // Now try these in order:
|
---|
887 | // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
|
---|
888 | // 2. CODESET from lang if it contains a .CODESET part
|
---|
889 | // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
|
---|
890 | // 4. locale (ditto)
|
---|
891 | // 5. check for "@euro"
|
---|
892 | // 6. guess locale from ctype unless ctype is "C"
|
---|
893 | // 7. guess locale from lang
|
---|
894 |
|
---|
895 | // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
|
---|
896 | int indexOfDot = ctype.indexOf('.');
|
---|
897 | if (indexOfDot != -1)
|
---|
898 | localeMapper = checkForCodec( ctype.mid(indexOfDot + 1) );
|
---|
899 |
|
---|
900 | // 2. CODESET from lang if it contains a .CODESET part
|
---|
901 | if (!localeMapper) {
|
---|
902 | indexOfDot = lang.indexOf('.');
|
---|
903 | if (indexOfDot != -1)
|
---|
904 | localeMapper = checkForCodec( lang.mid(indexOfDot + 1) );
|
---|
905 | }
|
---|
906 |
|
---|
907 | // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
|
---|
908 | if (!localeMapper && !ctype.isEmpty() && ctype != "C")
|
---|
909 | localeMapper = checkForCodec(ctype);
|
---|
910 |
|
---|
911 | // 4. locale (ditto)
|
---|
912 | if (!localeMapper && !lang.isEmpty())
|
---|
913 | localeMapper = checkForCodec(lang);
|
---|
914 |
|
---|
915 | // 5. "@euro"
|
---|
916 | if ((!localeMapper && ctype.contains("@euro")) || lang.contains("@euro"))
|
---|
917 | localeMapper = checkForCodec("ISO 8859-15");
|
---|
918 |
|
---|
919 | // 6. guess locale from ctype unless ctype is "C"
|
---|
920 | // 7. guess locale from lang
|
---|
921 | const QByteArray &try_by_name = (!ctype.isEmpty() && ctype != "C") ? lang : ctype;
|
---|
922 |
|
---|
923 | // Now do the guessing.
|
---|
924 | if (!lang.isEmpty() && !localeMapper && !try_by_name.isEmpty()) {
|
---|
925 | if (try_locale_list(iso8859_15locales, lang))
|
---|
926 | localeMapper = QTextCodec::codecForName("ISO 8859-15");
|
---|
927 | else if (try_locale_list(iso8859_2locales, lang))
|
---|
928 | localeMapper = QTextCodec::codecForName("ISO 8859-2");
|
---|
929 | else if (try_locale_list(iso8859_3locales, lang))
|
---|
930 | localeMapper = QTextCodec::codecForName("ISO 8859-3");
|
---|
931 | else if (try_locale_list(iso8859_4locales, lang))
|
---|
932 | localeMapper = QTextCodec::codecForName("ISO 8859-4");
|
---|
933 | else if (try_locale_list(iso8859_5locales, lang))
|
---|
934 | localeMapper = QTextCodec::codecForName("ISO 8859-5");
|
---|
935 | else if (try_locale_list(iso8859_6locales, lang))
|
---|
936 | localeMapper = QTextCodec::codecForName("ISO 8859-6");
|
---|
937 | else if (try_locale_list(iso8859_7locales, lang))
|
---|
938 | localeMapper = QTextCodec::codecForName("ISO 8859-7");
|
---|
939 | else if (try_locale_list(iso8859_8locales, lang))
|
---|
940 | localeMapper = QTextCodec::codecForName("ISO 8859-8-I");
|
---|
941 | else if (try_locale_list(iso8859_9locales, lang))
|
---|
942 | localeMapper = QTextCodec::codecForName("ISO 8859-9");
|
---|
943 | else if (try_locale_list(iso8859_13locales, lang))
|
---|
944 | localeMapper = QTextCodec::codecForName("ISO 8859-13");
|
---|
945 | else if (try_locale_list(tis_620locales, lang))
|
---|
946 | localeMapper = QTextCodec::codecForName("ISO 8859-11");
|
---|
947 | else if (try_locale_list(koi8_ulocales, lang))
|
---|
948 | localeMapper = QTextCodec::codecForName("KOI8-U");
|
---|
949 | else if (try_locale_list(cp_1251locales, lang))
|
---|
950 | localeMapper = QTextCodec::codecForName("CP 1251");
|
---|
951 | else if (try_locale_list(pt_154locales, lang))
|
---|
952 | localeMapper = QTextCodec::codecForName("PT 154");
|
---|
953 | else if (try_locale_list(probably_koi8_rlocales, lang))
|
---|
954 | localeMapper = ru_RU_hack(lang);
|
---|
955 | }
|
---|
956 |
|
---|
957 | }
|
---|
958 |
|
---|
959 | // If everything failed, we default to 8859-1
|
---|
960 | // We could perhaps default to 8859-15.
|
---|
961 | if (!localeMapper)
|
---|
962 | localeMapper = QTextCodec::codecForName("ISO 8859-1");
|
---|
963 | #endif
|
---|
964 | }
|
---|
965 |
|
---|
966 | #ifndef QT_NO_THREAD
|
---|
967 | Q_GLOBAL_STATIC_WITH_ARGS(QMutex, textCodecsMutex, (QMutex::Recursive));
|
---|
968 | #endif
|
---|
969 |
|
---|
970 | // textCodecsMutex need to be locked to enter this function
|
---|
971 | static void setup()
|
---|
972 | {
|
---|
973 | if (all)
|
---|
974 | return;
|
---|
975 |
|
---|
976 | #ifdef Q_OS_SYMBIAN
|
---|
977 | // If we don't have a trap handler, we're outside of the main() function,
|
---|
978 | // ie. in global constructors or destructors. Don't create codecs in this
|
---|
979 | // case as it would lead to crashes because of a missing cleanup stack on Symbian
|
---|
980 | if (User::TrapHandler() == NULL)
|
---|
981 | return;
|
---|
982 | #endif
|
---|
983 |
|
---|
984 | #ifdef Q_DEBUG_TEXTCODEC
|
---|
985 | if (destroying_is_ok)
|
---|
986 | qWarning("QTextCodec: Creating new codec during codec cleanup");
|
---|
987 | #endif
|
---|
988 | all = new QList<QTextCodec*>;
|
---|
989 | // create the cleanup object to cleanup all codecs on exit
|
---|
990 | (void) createQTextCodecCleanup();
|
---|
991 |
|
---|
992 | #ifndef QT_NO_CODECS
|
---|
993 | (void)new QTsciiCodec;
|
---|
994 | for (int i = 0; i < 9; ++i)
|
---|
995 | (void)new QIsciiCodec(i);
|
---|
996 |
|
---|
997 | for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i)
|
---|
998 | (void)new QSimpleTextCodec(i);
|
---|
999 |
|
---|
1000 | #ifdef Q_OS_SYMBIAN
|
---|
1001 | localeMapper = QSymbianTextCodec::init();
|
---|
1002 | #endif
|
---|
1003 |
|
---|
1004 | # if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
|
---|
1005 | // no font codecs when bootstrapping
|
---|
1006 | (void)new QFontLaoCodec;
|
---|
1007 | # if defined(QT_NO_ICONV)
|
---|
1008 | // no iconv(3) support, must build all codecs into the library
|
---|
1009 | (void)new QFontGb2312Codec;
|
---|
1010 | (void)new QFontGbkCodec;
|
---|
1011 | (void)new QFontGb18030_0Codec;
|
---|
1012 | (void)new QFontJis0208Codec;
|
---|
1013 | (void)new QFontJis0201Codec;
|
---|
1014 | (void)new QFontKsc5601Codec;
|
---|
1015 | (void)new QFontBig5hkscsCodec;
|
---|
1016 | (void)new QFontBig5Codec;
|
---|
1017 | # endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
|
---|
1018 | # endif // Q_WS_X11
|
---|
1019 |
|
---|
1020 |
|
---|
1021 | #ifndef Q_OS_SYMBIAN
|
---|
1022 | # if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
|
---|
1023 | // no asian codecs when bootstrapping, sorry
|
---|
1024 | (void)new QGb18030Codec;
|
---|
1025 | (void)new QGbkCodec;
|
---|
1026 | (void)new QGb2312Codec;
|
---|
1027 | (void)new QEucJpCodec;
|
---|
1028 | (void)new QJisCodec;
|
---|
1029 | (void)new QSjisCodec;
|
---|
1030 | (void)new QEucKrCodec;
|
---|
1031 | (void)new QCP949Codec;
|
---|
1032 | (void)new QBig5Codec;
|
---|
1033 | (void)new QBig5hkscsCodec;
|
---|
1034 | # endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
|
---|
1035 | #endif //Q_OS_SYMBIAN
|
---|
1036 | #endif // QT_NO_CODECS
|
---|
1037 |
|
---|
1038 | #if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
|
---|
1039 | (void) new QWindowsLocalCodec;
|
---|
1040 | #endif // Q_OS_WIN32
|
---|
1041 |
|
---|
1042 | #if defined(Q_OS_OS2)
|
---|
1043 | (void) new QOs2LocalCodec;
|
---|
1044 | #endif // Q_OS_OS2
|
---|
1045 |
|
---|
1046 | (void)new QUtf16Codec;
|
---|
1047 | (void)new QUtf16BECodec;
|
---|
1048 | (void)new QUtf16LECodec;
|
---|
1049 | (void)new QUtf32Codec;
|
---|
1050 | (void)new QUtf32BECodec;
|
---|
1051 | (void)new QUtf32LECodec;
|
---|
1052 | #ifndef Q_OS_SYMBIAN
|
---|
1053 | (void)new QLatin15Codec;
|
---|
1054 | #endif
|
---|
1055 | (void)new QLatin1Codec;
|
---|
1056 | (void)new QUtf8Codec;
|
---|
1057 |
|
---|
1058 | #ifndef Q_OS_SYMBIAN
|
---|
1059 | #if defined(Q_OS_UNIX) && !defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
|
---|
1060 | // QIconvCodec depends on the UTF-16 codec, so it needs to be created last
|
---|
1061 | (void) new QIconvCodec();
|
---|
1062 | #endif
|
---|
1063 | #endif
|
---|
1064 |
|
---|
1065 | if (!localeMapper)
|
---|
1066 | setupLocaleMapper();
|
---|
1067 | }
|
---|
1068 |
|
---|
1069 | /*!
|
---|
1070 | \enum QTextCodec::ConversionFlag
|
---|
1071 |
|
---|
1072 | \value DefaultConversion No flag is set.
|
---|
1073 | \value ConvertInvalidToNull If this flag is set, each invalid input
|
---|
1074 | character is output as a null character.
|
---|
1075 | \value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any.
|
---|
1076 |
|
---|
1077 | \omitvalue FreeFunction
|
---|
1078 | */
|
---|
1079 |
|
---|
1080 | /*!
|
---|
1081 | \fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags)
|
---|
1082 |
|
---|
1083 | Constructs a ConverterState object initialized with the given \a flags.
|
---|
1084 | */
|
---|
1085 |
|
---|
1086 | /*!
|
---|
1087 | Destroys the ConverterState object.
|
---|
1088 | */
|
---|
1089 | QTextCodec::ConverterState::~ConverterState()
|
---|
1090 | {
|
---|
1091 | if (flags & FreeFunction)
|
---|
1092 | (QTextCodecUnalignedPointer::decode(state_data))(this);
|
---|
1093 | else if (d)
|
---|
1094 | qFree(d);
|
---|
1095 | }
|
---|
1096 |
|
---|
1097 | static bool codecForLocaleSet = false;
|
---|
1098 | void qt_resetCodecForLocale()
|
---|
1099 | {
|
---|
1100 | #ifndef QT_NO_THREAD
|
---|
1101 | QMutexLocker locker(textCodecsMutex());
|
---|
1102 | #endif
|
---|
1103 | // if QTextCodec::codecForLocale() was called, we assume that the user has
|
---|
1104 | // explicitly set the codec he wants for the locale and don't attempt to
|
---|
1105 | // autodetect it again
|
---|
1106 | if (!codecForLocaleSet)
|
---|
1107 | setupLocaleMapper();
|
---|
1108 | }
|
---|
1109 |
|
---|
1110 | /*!
|
---|
1111 | \class QTextCodec
|
---|
1112 | \brief The QTextCodec class provides conversions between text encodings.
|
---|
1113 | \reentrant
|
---|
1114 | \ingroup i18n
|
---|
1115 |
|
---|
1116 | Qt uses Unicode to store, draw and manipulate strings. In many
|
---|
1117 | situations you may wish to deal with data that uses a different
|
---|
1118 | encoding. For example, most Japanese documents are still stored
|
---|
1119 | in Shift-JIS or ISO 2022-JP, while Russian users often have their
|
---|
1120 | documents in KOI8-R or Windows-1251.
|
---|
1121 |
|
---|
1122 | Qt provides a set of QTextCodec classes to help with converting
|
---|
1123 | non-Unicode formats to and from Unicode. You can also create your
|
---|
1124 | own codec classes.
|
---|
1125 |
|
---|
1126 | The supported encodings are:
|
---|
1127 |
|
---|
1128 | \list
|
---|
1129 | \o Apple Roman
|
---|
1130 | \o \l{Big5 Text Codec}{Big5}
|
---|
1131 | \o \l{Big5-HKSCS Text Codec}{Big5-HKSCS}
|
---|
1132 | \o CP949
|
---|
1133 | \o \l{EUC-JP Text Codec}{EUC-JP}
|
---|
1134 | \o \l{EUC-KR Text Codec}{EUC-KR}
|
---|
1135 | \o \l{GBK Text Codec}{GB18030-0}
|
---|
1136 | \o IBM 850
|
---|
1137 | \o IBM 866
|
---|
1138 | \o IBM 874
|
---|
1139 | \o \l{ISO 2022-JP (JIS) Text Codec}{ISO 2022-JP}
|
---|
1140 | \o ISO 8859-1 to 10
|
---|
1141 | \o ISO 8859-13 to 16
|
---|
1142 | \o Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml
|
---|
1143 | \o JIS X 0201
|
---|
1144 | \o JIS X 0208
|
---|
1145 | \o KOI8-R
|
---|
1146 | \o KOI8-U
|
---|
1147 | \o MuleLao-1
|
---|
1148 | \o ROMAN8
|
---|
1149 | \o \l{Shift-JIS Text Codec}{Shift-JIS}
|
---|
1150 | \o TIS-620
|
---|
1151 | \o \l{TSCII Text Codec}{TSCII}
|
---|
1152 | \o UTF-8
|
---|
1153 | \o UTF-16
|
---|
1154 | \o UTF-16BE
|
---|
1155 | \o UTF-16LE
|
---|
1156 | \o UTF-32
|
---|
1157 | \o UTF-32BE
|
---|
1158 | \o UTF-32LE
|
---|
1159 | \o Windows-1250 to 1258
|
---|
1160 | \o WINSAMI2
|
---|
1161 | \endlist
|
---|
1162 |
|
---|
1163 | QTextCodecs can be used as follows to convert some locally encoded
|
---|
1164 | string to Unicode. Suppose you have some string encoded in Russian
|
---|
1165 | KOI8-R encoding, and want to convert it to Unicode. The simple way
|
---|
1166 | to do it is like this:
|
---|
1167 |
|
---|
1168 | \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 0
|
---|
1169 |
|
---|
1170 | After this, \c string holds the text converted to Unicode.
|
---|
1171 | Converting a string from Unicode to the local encoding is just as
|
---|
1172 | easy:
|
---|
1173 |
|
---|
1174 | \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 1
|
---|
1175 |
|
---|
1176 | To read or write files in various encodings, use QTextStream and
|
---|
1177 | its \l{QTextStream::setCodec()}{setCodec()} function. See the
|
---|
1178 | \l{tools/codecs}{Codecs} example for an application of QTextCodec
|
---|
1179 | to file I/O.
|
---|
1180 |
|
---|
1181 | Some care must be taken when trying to convert the data in chunks,
|
---|
1182 | for example, when receiving it over a network. In such cases it is
|
---|
1183 | possible that a multi-byte character will be split over two
|
---|
1184 | chunks. At best this might result in the loss of a character and
|
---|
1185 | at worst cause the entire conversion to fail.
|
---|
1186 |
|
---|
1187 | The approach to use in these situations is to create a QTextDecoder
|
---|
1188 | object for the codec and use this QTextDecoder for the whole
|
---|
1189 | decoding process, as shown below:
|
---|
1190 |
|
---|
1191 | \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 2
|
---|
1192 |
|
---|
1193 | The QTextDecoder object maintains state between chunks and therefore
|
---|
1194 | works correctly even if a multi-byte character is split between
|
---|
1195 | chunks.
|
---|
1196 |
|
---|
1197 | \section1 Creating Your Own Codec Class
|
---|
1198 |
|
---|
1199 | Support for new text encodings can be added to Qt by creating
|
---|
1200 | QTextCodec subclasses.
|
---|
1201 |
|
---|
1202 | The pure virtual functions describe the encoder to the system and
|
---|
1203 | the coder is used as required in the different text file formats
|
---|
1204 | supported by QTextStream, and under X11, for the locale-specific
|
---|
1205 | character input and output.
|
---|
1206 |
|
---|
1207 | To add support for another encoding to Qt, make a subclass of
|
---|
1208 | QTextCodec and implement the functions listed in the table below.
|
---|
1209 |
|
---|
1210 | \table
|
---|
1211 | \header \o Function \o Description
|
---|
1212 |
|
---|
1213 | \row \o name()
|
---|
1214 | \o Returns the official name for the encoding. If the
|
---|
1215 | encoding is listed in the
|
---|
1216 | \l{IANA character-sets encoding file}, the name
|
---|
1217 | should be the preferred MIME name for the encoding.
|
---|
1218 |
|
---|
1219 | \row \o aliases()
|
---|
1220 | \o Returns a list of alternative names for the encoding.
|
---|
1221 | QTextCodec provides a default implementation that returns
|
---|
1222 | an empty list. For example, "ISO-8859-1" has "latin1",
|
---|
1223 | "CP819", "IBM819", and "iso-ir-100" as aliases.
|
---|
1224 |
|
---|
1225 | \row \o mibEnum()
|
---|
1226 | \o Return the MIB enum for the encoding if it is listed in
|
---|
1227 | the \l{IANA character-sets encoding file}.
|
---|
1228 |
|
---|
1229 | \row \o convertToUnicode()
|
---|
1230 | \o Converts an 8-bit character string to Unicode.
|
---|
1231 |
|
---|
1232 | \row \o convertFromUnicode()
|
---|
1233 | \o Converts a Unicode string to an 8-bit character string.
|
---|
1234 | \endtable
|
---|
1235 |
|
---|
1236 | You may find it more convenient to make your codec class
|
---|
1237 | available as a plugin; see \l{How to Create Qt Plugins} for
|
---|
1238 | details.
|
---|
1239 |
|
---|
1240 | \sa QTextStream, QTextDecoder, QTextEncoder, {Codecs Example}
|
---|
1241 | */
|
---|
1242 |
|
---|
1243 | /*!
|
---|
1244 | Constructs a QTextCodec, and gives it the highest precedence. The
|
---|
1245 | QTextCodec should always be constructed on the heap (i.e. with \c
|
---|
1246 | new). Qt takes ownership and will delete it when the application
|
---|
1247 | terminates.
|
---|
1248 | */
|
---|
1249 | QTextCodec::QTextCodec()
|
---|
1250 | {
|
---|
1251 | #ifndef QT_NO_THREAD
|
---|
1252 | QMutexLocker locker(textCodecsMutex());
|
---|
1253 | #endif
|
---|
1254 | setup();
|
---|
1255 | all->prepend(this);
|
---|
1256 | }
|
---|
1257 |
|
---|
1258 |
|
---|
1259 | /*!
|
---|
1260 | \nonreentrant
|
---|
1261 |
|
---|
1262 | Destroys the QTextCodec. Note that you should not delete codecs
|
---|
1263 | yourself: once created they become Qt's responsibility.
|
---|
1264 | */
|
---|
1265 | QTextCodec::~QTextCodec()
|
---|
1266 | {
|
---|
1267 | #ifdef Q_DEBUG_TEXTCODEC
|
---|
1268 | if (!destroying_is_ok)
|
---|
1269 | qWarning("QTextCodec::~QTextCodec: Called by application");
|
---|
1270 | #endif
|
---|
1271 | if (all) {
|
---|
1272 | #ifndef QT_NO_THREAD
|
---|
1273 | QMutexLocker locker(textCodecsMutex());
|
---|
1274 | #endif
|
---|
1275 | all->removeAll(this);
|
---|
1276 | QTextCodecCache *cache = qTextCodecCache();
|
---|
1277 | if (cache)
|
---|
1278 | cache->clear();
|
---|
1279 | }
|
---|
1280 | }
|
---|
1281 |
|
---|
1282 | /*!
|
---|
1283 | \fn QTextCodec *QTextCodec::codecForName(const char *name)
|
---|
1284 |
|
---|
1285 | Searches all installed QTextCodec objects and returns the one
|
---|
1286 | which best matches \a name; the match is case-insensitive. Returns
|
---|
1287 | 0 if no codec matching the name \a name could be found.
|
---|
1288 | */
|
---|
1289 |
|
---|
1290 | /*!
|
---|
1291 | Searches all installed QTextCodec objects and returns the one
|
---|
1292 | which best matches \a name; the match is case-insensitive. Returns
|
---|
1293 | 0 if no codec matching the name \a name could be found.
|
---|
1294 | */
|
---|
1295 | QTextCodec *QTextCodec::codecForName(const QByteArray &name)
|
---|
1296 | {
|
---|
1297 | if (name.isEmpty())
|
---|
1298 | return 0;
|
---|
1299 |
|
---|
1300 | #ifndef QT_NO_THREAD
|
---|
1301 | QMutexLocker locker(textCodecsMutex());
|
---|
1302 | #endif
|
---|
1303 | setup();
|
---|
1304 |
|
---|
1305 | if (!validCodecs())
|
---|
1306 | return 0;
|
---|
1307 |
|
---|
1308 | QTextCodecCache *cache = qTextCodecCache();
|
---|
1309 | QTextCodec *codec;
|
---|
1310 | if (cache) {
|
---|
1311 | codec = cache->value(name);
|
---|
1312 | if (codec)
|
---|
1313 | return codec;
|
---|
1314 | }
|
---|
1315 |
|
---|
1316 | for (int i = 0; i < all->size(); ++i) {
|
---|
1317 | QTextCodec *cursor = all->at(i);
|
---|
1318 | if (nameMatch(cursor->name(), name)) {
|
---|
1319 | if (cache)
|
---|
1320 | cache->insert(name, cursor);
|
---|
1321 | return cursor;
|
---|
1322 | }
|
---|
1323 | QList<QByteArray> aliases = cursor->aliases();
|
---|
1324 | for (int y = 0; y < aliases.size(); ++y)
|
---|
1325 | if (nameMatch(aliases.at(y), name)) {
|
---|
1326 | if (cache)
|
---|
1327 | cache->insert(name, cursor);
|
---|
1328 | return cursor;
|
---|
1329 | }
|
---|
1330 | }
|
---|
1331 |
|
---|
1332 | codec = createForName(name);
|
---|
1333 | if (codec && cache)
|
---|
1334 | cache->insert(name, codec);
|
---|
1335 | return codec;
|
---|
1336 | }
|
---|
1337 |
|
---|
1338 |
|
---|
1339 | /*!
|
---|
1340 | Returns the QTextCodec which matches the \link
|
---|
1341 | QTextCodec::mibEnum() MIBenum\endlink \a mib.
|
---|
1342 | */
|
---|
1343 | QTextCodec* QTextCodec::codecForMib(int mib)
|
---|
1344 | {
|
---|
1345 | #ifndef QT_NO_THREAD
|
---|
1346 | QMutexLocker locker(textCodecsMutex());
|
---|
1347 | #endif
|
---|
1348 | setup();
|
---|
1349 |
|
---|
1350 | if (!validCodecs())
|
---|
1351 | return 0;
|
---|
1352 |
|
---|
1353 | QByteArray key = "MIB: " + QByteArray::number(mib);
|
---|
1354 | QTextCodecCache *cache = qTextCodecCache();
|
---|
1355 | QTextCodec *codec;
|
---|
1356 | if (cache)
|
---|
1357 | codec = cache->value(key);
|
---|
1358 |
|
---|
1359 | QList<QTextCodec*>::ConstIterator i;
|
---|
1360 | for (int i = 0; i < all->size(); ++i) {
|
---|
1361 | QTextCodec *cursor = all->at(i);
|
---|
1362 | if (cursor->mibEnum() == mib) {
|
---|
1363 | if (cache)
|
---|
1364 | cache->insert(key, cursor);
|
---|
1365 | return cursor;
|
---|
1366 | }
|
---|
1367 | }
|
---|
1368 |
|
---|
1369 | codec = createForMib(mib);
|
---|
1370 |
|
---|
1371 | // Qt 3 used 1000 (mib for UCS2) as its identifier for the utf16 codec. Map
|
---|
1372 | // this correctly for compatibility.
|
---|
1373 | if (!codec && mib == 1000)
|
---|
1374 | return codecForMib(1015);
|
---|
1375 |
|
---|
1376 | if (codec && cache)
|
---|
1377 | cache->insert(key, codec);
|
---|
1378 | return codec;
|
---|
1379 | }
|
---|
1380 |
|
---|
1381 | /*!
|
---|
1382 | Returns the list of all available codecs, by name. Call
|
---|
1383 | QTextCodec::codecForName() to obtain the QTextCodec for the name.
|
---|
1384 |
|
---|
1385 | The list may contain many mentions of the same codec
|
---|
1386 | if the codec has aliases.
|
---|
1387 |
|
---|
1388 | \sa availableMibs(), name(), aliases()
|
---|
1389 | */
|
---|
1390 | QList<QByteArray> QTextCodec::availableCodecs()
|
---|
1391 | {
|
---|
1392 | #ifndef QT_NO_THREAD
|
---|
1393 | QMutexLocker locker(textCodecsMutex());
|
---|
1394 | #endif
|
---|
1395 | setup();
|
---|
1396 |
|
---|
1397 | QList<QByteArray> codecs;
|
---|
1398 |
|
---|
1399 | if (!validCodecs())
|
---|
1400 | return codecs;
|
---|
1401 |
|
---|
1402 | for (int i = 0; i < all->size(); ++i) {
|
---|
1403 | codecs += all->at(i)->name();
|
---|
1404 | codecs += all->at(i)->aliases();
|
---|
1405 | }
|
---|
1406 |
|
---|
1407 | #ifndef QT_NO_THREAD
|
---|
1408 | locker.unlock();
|
---|
1409 | #endif
|
---|
1410 |
|
---|
1411 | #if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
|
---|
1412 | QFactoryLoader *l = loader();
|
---|
1413 | QStringList keys = l->keys();
|
---|
1414 | for (int i = 0; i < keys.size(); ++i) {
|
---|
1415 | if (!keys.at(i).startsWith(QLatin1String("MIB: "))) {
|
---|
1416 | QByteArray name = keys.at(i).toLatin1();
|
---|
1417 | if (!codecs.contains(name))
|
---|
1418 | codecs += name;
|
---|
1419 | }
|
---|
1420 | }
|
---|
1421 | #endif
|
---|
1422 |
|
---|
1423 | return codecs;
|
---|
1424 | }
|
---|
1425 |
|
---|
1426 | /*!
|
---|
1427 | Returns the list of MIBs for all available codecs. Call
|
---|
1428 | QTextCodec::codecForMib() to obtain the QTextCodec for the MIB.
|
---|
1429 |
|
---|
1430 | \sa availableCodecs(), mibEnum()
|
---|
1431 | */
|
---|
1432 | QList<int> QTextCodec::availableMibs()
|
---|
1433 | {
|
---|
1434 | #ifndef QT_NO_THREAD
|
---|
1435 | QMutexLocker locker(textCodecsMutex());
|
---|
1436 | #endif
|
---|
1437 | setup();
|
---|
1438 |
|
---|
1439 | QList<int> codecs;
|
---|
1440 |
|
---|
1441 | if (!validCodecs())
|
---|
1442 | return codecs;
|
---|
1443 |
|
---|
1444 | for (int i = 0; i < all->size(); ++i)
|
---|
1445 | codecs += all->at(i)->mibEnum();
|
---|
1446 |
|
---|
1447 | #ifndef QT_NO_THREAD
|
---|
1448 | locker.unlock();
|
---|
1449 | #endif
|
---|
1450 |
|
---|
1451 | #if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
|
---|
1452 | QFactoryLoader *l = loader();
|
---|
1453 | QStringList keys = l->keys();
|
---|
1454 | for (int i = 0; i < keys.size(); ++i) {
|
---|
1455 | if (keys.at(i).startsWith(QLatin1String("MIB: "))) {
|
---|
1456 | int mib = keys.at(i).mid(5).toInt();
|
---|
1457 | if (!codecs.contains(mib))
|
---|
1458 | codecs += mib;
|
---|
1459 | }
|
---|
1460 | }
|
---|
1461 | #endif
|
---|
1462 |
|
---|
1463 | return codecs;
|
---|
1464 | }
|
---|
1465 |
|
---|
1466 | /*!
|
---|
1467 | Set the codec to \a c; this will be returned by
|
---|
1468 | codecForLocale(). If \a c is a null pointer, the codec is reset to
|
---|
1469 | the default.
|
---|
1470 |
|
---|
1471 | This might be needed for some applications that want to use their
|
---|
1472 | own mechanism for setting the locale.
|
---|
1473 |
|
---|
1474 | \sa codecForLocale()
|
---|
1475 | */
|
---|
1476 | void QTextCodec::setCodecForLocale(QTextCodec *c)
|
---|
1477 | {
|
---|
1478 | #ifndef QT_NO_THREAD
|
---|
1479 | QMutexLocker locker(textCodecsMutex());
|
---|
1480 | #endif
|
---|
1481 | codecForLocaleSet = true;
|
---|
1482 | localeMapper = c;
|
---|
1483 | if (!localeMapper)
|
---|
1484 | setupLocaleMapper();
|
---|
1485 | }
|
---|
1486 |
|
---|
1487 | /*!
|
---|
1488 | Returns a pointer to the codec most suitable for this locale.
|
---|
1489 |
|
---|
1490 | On Windows, the codec will be based on a system locale. On Unix
|
---|
1491 | systems, starting with Qt 4.2, the codec will be using the \e
|
---|
1492 | iconv library. Note that in both cases the codec's name will be
|
---|
1493 | "System".
|
---|
1494 | */
|
---|
1495 |
|
---|
1496 | QTextCodec* QTextCodec::codecForLocale()
|
---|
1497 | {
|
---|
1498 | if (!validCodecs())
|
---|
1499 | return 0;
|
---|
1500 |
|
---|
1501 | if (localeMapper)
|
---|
1502 | return localeMapper;
|
---|
1503 |
|
---|
1504 | #ifndef QT_NO_THREAD
|
---|
1505 | QMutexLocker locker(textCodecsMutex());
|
---|
1506 | #endif
|
---|
1507 | setup();
|
---|
1508 |
|
---|
1509 | return localeMapper;
|
---|
1510 | }
|
---|
1511 |
|
---|
1512 |
|
---|
1513 | /*!
|
---|
1514 | \fn QByteArray QTextCodec::name() const
|
---|
1515 |
|
---|
1516 | QTextCodec subclasses must reimplement this function. It returns
|
---|
1517 | the name of the encoding supported by the subclass.
|
---|
1518 |
|
---|
1519 | If the codec is registered as a character set in the
|
---|
1520 | \l{IANA character-sets encoding file} this method should
|
---|
1521 | return the preferred mime name for the codec if defined,
|
---|
1522 | otherwise its name.
|
---|
1523 | */
|
---|
1524 |
|
---|
1525 | /*!
|
---|
1526 | \fn int QTextCodec::mibEnum() const
|
---|
1527 |
|
---|
1528 | Subclasses of QTextCodec must reimplement this function. It
|
---|
1529 | returns the MIBenum (see \l{IANA character-sets encoding file}
|
---|
1530 | for more information). It is important that each QTextCodec
|
---|
1531 | subclass returns the correct unique value for this function.
|
---|
1532 | */
|
---|
1533 |
|
---|
1534 | /*!
|
---|
1535 | Subclasses can return a number of aliases for the codec in question.
|
---|
1536 |
|
---|
1537 | Standard aliases for codecs can be found in the
|
---|
1538 | \l{IANA character-sets encoding file}.
|
---|
1539 | */
|
---|
1540 | QList<QByteArray> QTextCodec::aliases() const
|
---|
1541 | {
|
---|
1542 | return QList<QByteArray>();
|
---|
1543 | }
|
---|
1544 |
|
---|
1545 | /*!
|
---|
1546 | \fn QString QTextCodec::convertToUnicode(const char *chars, int len,
|
---|
1547 | ConverterState *state) const
|
---|
1548 |
|
---|
1549 | QTextCodec subclasses must reimplement this function.
|
---|
1550 |
|
---|
1551 | Converts the first \a len characters of \a chars from the
|
---|
1552 | encoding of the subclass to Unicode, and returns the result in a
|
---|
1553 | QString.
|
---|
1554 |
|
---|
1555 | \a state can be 0, in which case the conversion is stateless and
|
---|
1556 | default conversion rules should be used. If state is not 0, the
|
---|
1557 | codec should save the state after the conversion in \a state, and
|
---|
1558 | adjust the remainingChars and invalidChars members of the struct.
|
---|
1559 | */
|
---|
1560 |
|
---|
1561 | /*!
|
---|
1562 | \fn QByteArray QTextCodec::convertFromUnicode(const QChar *input, int number,
|
---|
1563 | ConverterState *state) const
|
---|
1564 |
|
---|
1565 | QTextCodec subclasses must reimplement this function.
|
---|
1566 |
|
---|
1567 | Converts the first \a number of characters from the \a input array
|
---|
1568 | from Unicode to the encoding of the subclass, and returns the result
|
---|
1569 | in a QByteArray.
|
---|
1570 |
|
---|
1571 | \a state can be 0 in which case the conversion is stateless and
|
---|
1572 | default conversion rules should be used. If state is not 0, the
|
---|
1573 | codec should save the state after the conversion in \a state, and
|
---|
1574 | adjust the remainingChars and invalidChars members of the struct.
|
---|
1575 | */
|
---|
1576 |
|
---|
1577 | /*!
|
---|
1578 | Creates a QTextDecoder which stores enough state to decode chunks
|
---|
1579 | of \c{char *} data to create chunks of Unicode data.
|
---|
1580 |
|
---|
1581 | The caller is responsible for deleting the returned object.
|
---|
1582 | */
|
---|
1583 | QTextDecoder* QTextCodec::makeDecoder() const
|
---|
1584 | {
|
---|
1585 | return new QTextDecoder(this);
|
---|
1586 | }
|
---|
1587 |
|
---|
1588 | /*!
|
---|
1589 | Creates a QTextDecoder with a specified \a flags to decode chunks
|
---|
1590 | of \c{char *} data to create chunks of Unicode data.
|
---|
1591 |
|
---|
1592 | The caller is responsible for deleting the returned object.
|
---|
1593 |
|
---|
1594 | \since 4.7
|
---|
1595 | */
|
---|
1596 | QTextDecoder* QTextCodec::makeDecoder(QTextCodec::ConversionFlags flags) const
|
---|
1597 | {
|
---|
1598 | return new QTextDecoder(this, flags);
|
---|
1599 | }
|
---|
1600 |
|
---|
1601 |
|
---|
1602 | /*!
|
---|
1603 | Creates a QTextEncoder which stores enough state to encode chunks
|
---|
1604 | of Unicode data as \c{char *} data.
|
---|
1605 |
|
---|
1606 | The caller is responsible for deleting the returned object.
|
---|
1607 | */
|
---|
1608 | QTextEncoder* QTextCodec::makeEncoder() const
|
---|
1609 | {
|
---|
1610 | return new QTextEncoder(this);
|
---|
1611 | }
|
---|
1612 |
|
---|
1613 | /*!
|
---|
1614 | Creates a QTextEncoder with a specified \a flags to encode chunks
|
---|
1615 | of Unicode data as \c{char *} data.
|
---|
1616 |
|
---|
1617 | The caller is responsible for deleting the returned object.
|
---|
1618 |
|
---|
1619 | \since 4.7
|
---|
1620 | */
|
---|
1621 | QTextEncoder* QTextCodec::makeEncoder(QTextCodec::ConversionFlags flags) const
|
---|
1622 | {
|
---|
1623 | return new QTextEncoder(this, flags);
|
---|
1624 | }
|
---|
1625 |
|
---|
1626 | /*!
|
---|
1627 | \fn QByteArray QTextCodec::fromUnicode(const QChar *input, int number,
|
---|
1628 | ConverterState *state) const
|
---|
1629 |
|
---|
1630 | Converts the first \a number of characters from the \a input array
|
---|
1631 | from Unicode to the encoding of this codec, and returns the result
|
---|
1632 | in a QByteArray.
|
---|
1633 |
|
---|
1634 | The \a state of the convertor used is updated.
|
---|
1635 | */
|
---|
1636 |
|
---|
1637 | /*!
|
---|
1638 | Converts \a str from Unicode to the encoding of this codec, and
|
---|
1639 | returns the result in a QByteArray.
|
---|
1640 | */
|
---|
1641 | QByteArray QTextCodec::fromUnicode(const QString& str) const
|
---|
1642 | {
|
---|
1643 | return convertFromUnicode(str.constData(), str.length(), 0);
|
---|
1644 | }
|
---|
1645 |
|
---|
1646 | /*!
|
---|
1647 | \fn QString QTextCodec::toUnicode(const char *input, int size,
|
---|
1648 | ConverterState *state) const
|
---|
1649 |
|
---|
1650 | Converts the first \a size characters from the \a input from the
|
---|
1651 | encoding of this codec to Unicode, and returns the result in a
|
---|
1652 | QString.
|
---|
1653 |
|
---|
1654 | The \a state of the convertor used is updated.
|
---|
1655 | */
|
---|
1656 |
|
---|
1657 | /*!
|
---|
1658 | Converts \a a from the encoding of this codec to Unicode, and
|
---|
1659 | returns the result in a QString.
|
---|
1660 | */
|
---|
1661 | QString QTextCodec::toUnicode(const QByteArray& a) const
|
---|
1662 | {
|
---|
1663 | return convertToUnicode(a.constData(), a.length(), 0);
|
---|
1664 | }
|
---|
1665 |
|
---|
1666 | /*!
|
---|
1667 | Returns true if the Unicode character \a ch can be fully encoded
|
---|
1668 | with this codec; otherwise returns false.
|
---|
1669 | */
|
---|
1670 | bool QTextCodec::canEncode(QChar ch) const
|
---|
1671 | {
|
---|
1672 | ConverterState state;
|
---|
1673 | state.flags = ConvertInvalidToNull;
|
---|
1674 | convertFromUnicode(&ch, 1, &state);
|
---|
1675 | return (state.invalidChars == 0);
|
---|
1676 | }
|
---|
1677 |
|
---|
1678 | /*!
|
---|
1679 | \overload
|
---|
1680 |
|
---|
1681 | \a s contains the string being tested for encode-ability.
|
---|
1682 | */
|
---|
1683 | bool QTextCodec::canEncode(const QString& s) const
|
---|
1684 | {
|
---|
1685 | ConverterState state;
|
---|
1686 | state.flags = ConvertInvalidToNull;
|
---|
1687 | convertFromUnicode(s.constData(), s.length(), &state);
|
---|
1688 | return (state.invalidChars == 0);
|
---|
1689 | }
|
---|
1690 |
|
---|
1691 | #ifdef QT3_SUPPORT
|
---|
1692 | /*!
|
---|
1693 | Returns a string representing the current language and
|
---|
1694 | sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil.
|
---|
1695 |
|
---|
1696 | \sa QLocale
|
---|
1697 | */
|
---|
1698 | const char *QTextCodec::locale()
|
---|
1699 | {
|
---|
1700 | static char locale[6];
|
---|
1701 | QByteArray l = QLocale::system().name().toLatin1();
|
---|
1702 | int len = qMin(l.length(), 5);
|
---|
1703 | memcpy(locale, l.constData(), len);
|
---|
1704 | locale[len] = '\0';
|
---|
1705 |
|
---|
1706 | return locale;
|
---|
1707 | }
|
---|
1708 |
|
---|
1709 | /*!
|
---|
1710 | \overload
|
---|
1711 | */
|
---|
1712 |
|
---|
1713 | QByteArray QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
|
---|
1714 | {
|
---|
1715 | QByteArray result = convertFromUnicode(uc.constData(), lenInOut, 0);
|
---|
1716 | lenInOut = result.length();
|
---|
1717 | return result;
|
---|
1718 | }
|
---|
1719 |
|
---|
1720 | /*!
|
---|
1721 | \overload
|
---|
1722 |
|
---|
1723 | \a a contains the source characters; \a len contains the number of
|
---|
1724 | characters in \a a to use.
|
---|
1725 | */
|
---|
1726 | QString QTextCodec::toUnicode(const QByteArray& a, int len) const
|
---|
1727 | {
|
---|
1728 | len = qMin(a.size(), len);
|
---|
1729 | return convertToUnicode(a.constData(), len, 0);
|
---|
1730 | }
|
---|
1731 | #endif
|
---|
1732 |
|
---|
1733 | /*!
|
---|
1734 | \overload
|
---|
1735 |
|
---|
1736 | \a chars contains the source characters.
|
---|
1737 | */
|
---|
1738 | QString QTextCodec::toUnicode(const char *chars) const
|
---|
1739 | {
|
---|
1740 | int len = qstrlen(chars);
|
---|
1741 | return convertToUnicode(chars, len, 0);
|
---|
1742 | }
|
---|
1743 |
|
---|
1744 |
|
---|
1745 | /*!
|
---|
1746 | \class QTextEncoder
|
---|
1747 | \brief The QTextEncoder class provides a state-based encoder.
|
---|
1748 | \reentrant
|
---|
1749 | \ingroup i18n
|
---|
1750 |
|
---|
1751 | A text encoder converts text from Unicode into an encoded text format
|
---|
1752 | using a specific codec.
|
---|
1753 |
|
---|
1754 | The encoder converts Unicode into another format, remembering any
|
---|
1755 | state that is required between calls.
|
---|
1756 |
|
---|
1757 | \sa QTextCodec::makeEncoder(), QTextDecoder
|
---|
1758 | */
|
---|
1759 |
|
---|
1760 | /*!
|
---|
1761 | \fn QTextEncoder::QTextEncoder(const QTextCodec *codec)
|
---|
1762 |
|
---|
1763 | Constructs a text encoder for the given \a codec.
|
---|
1764 | */
|
---|
1765 |
|
---|
1766 | /*!
|
---|
1767 | Constructs a text encoder for the given \a codec and conversion \a flags.
|
---|
1768 |
|
---|
1769 | \since 4.7
|
---|
1770 | */
|
---|
1771 | QTextEncoder::QTextEncoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags)
|
---|
1772 | : c(codec), state()
|
---|
1773 | {
|
---|
1774 | state.flags = flags;
|
---|
1775 | }
|
---|
1776 |
|
---|
1777 | /*!
|
---|
1778 | Destroys the encoder.
|
---|
1779 | */
|
---|
1780 | QTextEncoder::~QTextEncoder()
|
---|
1781 | {
|
---|
1782 | }
|
---|
1783 |
|
---|
1784 | /*! \internal
|
---|
1785 | \since 4.5
|
---|
1786 | Determines whether the eecoder encountered a failure while decoding the input. If
|
---|
1787 | an error was encountered, the produced result is undefined, and gets converted as according
|
---|
1788 | to the conversion flags.
|
---|
1789 | */
|
---|
1790 | bool QTextEncoder::hasFailure() const
|
---|
1791 | {
|
---|
1792 | return state.invalidChars != 0;
|
---|
1793 | }
|
---|
1794 |
|
---|
1795 | /*!
|
---|
1796 | Converts the Unicode string \a str into an encoded QByteArray.
|
---|
1797 | */
|
---|
1798 | QByteArray QTextEncoder::fromUnicode(const QString& str)
|
---|
1799 | {
|
---|
1800 | QByteArray result = c->fromUnicode(str.constData(), str.length(), &state);
|
---|
1801 | return result;
|
---|
1802 | }
|
---|
1803 |
|
---|
1804 | /*!
|
---|
1805 | \overload
|
---|
1806 |
|
---|
1807 | Converts \a len characters (not bytes) from \a uc, and returns the
|
---|
1808 | result in a QByteArray.
|
---|
1809 | */
|
---|
1810 | QByteArray QTextEncoder::fromUnicode(const QChar *uc, int len)
|
---|
1811 | {
|
---|
1812 | QByteArray result = c->fromUnicode(uc, len, &state);
|
---|
1813 | return result;
|
---|
1814 | }
|
---|
1815 |
|
---|
1816 | #ifdef QT3_SUPPORT
|
---|
1817 | /*!
|
---|
1818 | \overload
|
---|
1819 |
|
---|
1820 | Converts \a lenInOut characters (not bytes) from \a uc, and returns the
|
---|
1821 | result in a QByteArray. The number of characters read is returned in
|
---|
1822 | the \a lenInOut parameter.
|
---|
1823 | */
|
---|
1824 | QByteArray QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
|
---|
1825 | {
|
---|
1826 | QByteArray result = c->fromUnicode(uc.constData(), lenInOut, &state);
|
---|
1827 | lenInOut = result.length();
|
---|
1828 | return result;
|
---|
1829 | }
|
---|
1830 | #endif
|
---|
1831 |
|
---|
1832 | /*!
|
---|
1833 | \class QTextDecoder
|
---|
1834 | \brief The QTextDecoder class provides a state-based decoder.
|
---|
1835 | \reentrant
|
---|
1836 | \ingroup i18n
|
---|
1837 |
|
---|
1838 | A text decoder converts text from an encoded text format into Unicode
|
---|
1839 | using a specific codec.
|
---|
1840 |
|
---|
1841 | The decoder converts text in this format into Unicode, remembering any
|
---|
1842 | state that is required between calls.
|
---|
1843 |
|
---|
1844 | \sa QTextCodec::makeDecoder(), QTextEncoder
|
---|
1845 | */
|
---|
1846 |
|
---|
1847 | /*!
|
---|
1848 | \fn QTextDecoder::QTextDecoder(const QTextCodec *codec)
|
---|
1849 |
|
---|
1850 | Constructs a text decoder for the given \a codec.
|
---|
1851 | */
|
---|
1852 |
|
---|
1853 | /*!
|
---|
1854 | Constructs a text decoder for the given \a codec and conversion \a flags.
|
---|
1855 |
|
---|
1856 | \since 4.7
|
---|
1857 | */
|
---|
1858 |
|
---|
1859 | QTextDecoder::QTextDecoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags)
|
---|
1860 | : c(codec), state()
|
---|
1861 | {
|
---|
1862 | state.flags = flags;
|
---|
1863 | }
|
---|
1864 |
|
---|
1865 | /*!
|
---|
1866 | Destroys the decoder.
|
---|
1867 | */
|
---|
1868 | QTextDecoder::~QTextDecoder()
|
---|
1869 | {
|
---|
1870 | }
|
---|
1871 |
|
---|
1872 | /*!
|
---|
1873 | \fn QString QTextDecoder::toUnicode(const char *chars, int len)
|
---|
1874 |
|
---|
1875 | Converts the first \a len bytes in \a chars to Unicode, returning
|
---|
1876 | the result.
|
---|
1877 |
|
---|
1878 | If not all characters are used (e.g. if only part of a multi-byte
|
---|
1879 | encoding is at the end of the characters), the decoder remembers
|
---|
1880 | enough state to continue with the next call to this function.
|
---|
1881 | */
|
---|
1882 | QString QTextDecoder::toUnicode(const char *chars, int len)
|
---|
1883 | {
|
---|
1884 | return c->toUnicode(chars, len, &state);
|
---|
1885 | }
|
---|
1886 |
|
---|
1887 |
|
---|
1888 | /*! \overload
|
---|
1889 |
|
---|
1890 | The converted string is returned in \a target.
|
---|
1891 | */
|
---|
1892 | void QTextDecoder::toUnicode(QString *target, const char *chars, int len)
|
---|
1893 | {
|
---|
1894 | Q_ASSERT(target);
|
---|
1895 | switch (c->mibEnum()) {
|
---|
1896 | case 106: // utf8
|
---|
1897 | static_cast<const QUtf8Codec*>(c)->convertToUnicode(target, chars, len, &state);
|
---|
1898 | break;
|
---|
1899 | case 4: { // latin1
|
---|
1900 | target->resize(len);
|
---|
1901 | ushort *data = (ushort*)target->data();
|
---|
1902 | for (int i = len; i >=0; --i)
|
---|
1903 | data[i] = (uchar) chars[i];
|
---|
1904 | } break;
|
---|
1905 | default:
|
---|
1906 | *target = c->toUnicode(chars, len, &state);
|
---|
1907 | }
|
---|
1908 | }
|
---|
1909 |
|
---|
1910 |
|
---|
1911 | /*!
|
---|
1912 | \overload
|
---|
1913 |
|
---|
1914 | Converts the bytes in the byte array specified by \a ba to Unicode
|
---|
1915 | and returns the result.
|
---|
1916 | */
|
---|
1917 | QString QTextDecoder::toUnicode(const QByteArray &ba)
|
---|
1918 | {
|
---|
1919 | return c->toUnicode(ba.constData(), ba.length(), &state);
|
---|
1920 | }
|
---|
1921 |
|
---|
1922 |
|
---|
1923 | /*!
|
---|
1924 | \fn QTextCodec* QTextCodec::codecForTr()
|
---|
1925 |
|
---|
1926 | Returns the codec used by QObject::tr() on its argument. If this
|
---|
1927 | function returns 0 (the default), tr() assumes Latin-1.
|
---|
1928 |
|
---|
1929 | \sa setCodecForTr()
|
---|
1930 | */
|
---|
1931 |
|
---|
1932 | /*!
|
---|
1933 | \fn void QTextCodec::setCodecForTr(QTextCodec *c)
|
---|
1934 | \nonreentrant
|
---|
1935 |
|
---|
1936 | Sets the codec used by QObject::tr() on its argument to \a c. If
|
---|
1937 | \a c is 0 (the default), tr() assumes Latin-1.
|
---|
1938 |
|
---|
1939 | If the literal quoted text in the program is not in the Latin-1
|
---|
1940 | encoding, this function can be used to set the appropriate
|
---|
1941 | encoding. For example, software developed by Korean programmers
|
---|
1942 | might use eucKR for all the text in the program, in which case the
|
---|
1943 | main() function might look like this:
|
---|
1944 |
|
---|
1945 | \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 3
|
---|
1946 |
|
---|
1947 | Note that this is not the way to select the encoding that the \e
|
---|
1948 | user has chosen. For example, to convert an application containing
|
---|
1949 | literal English strings to Korean, all that is needed is for the
|
---|
1950 | English strings to be passed through tr() and for translation
|
---|
1951 | files to be loaded. For details of internationalization, see
|
---|
1952 | \l{Internationalization with Qt}.
|
---|
1953 |
|
---|
1954 | \sa codecForTr(), setCodecForCStrings()
|
---|
1955 | */
|
---|
1956 |
|
---|
1957 |
|
---|
1958 | /*!
|
---|
1959 | \fn QTextCodec* QTextCodec::codecForCStrings()
|
---|
1960 |
|
---|
1961 | Returns the codec used by QString to convert to and from \c{const
|
---|
1962 | char *} and QByteArrays. If this function returns 0 (the default),
|
---|
1963 | QString assumes Latin-1.
|
---|
1964 |
|
---|
1965 | \sa setCodecForCStrings()
|
---|
1966 | */
|
---|
1967 |
|
---|
1968 | /*!
|
---|
1969 | \fn void QTextCodec::setCodecForCStrings(QTextCodec *codec)
|
---|
1970 | \nonreentrant
|
---|
1971 |
|
---|
1972 | Sets the codec used by QString to convert to and from \c{const
|
---|
1973 | char *} and QByteArrays. If the \a codec is 0 (the default),
|
---|
1974 | QString assumes Latin-1.
|
---|
1975 |
|
---|
1976 | \warning Some codecs do not preserve the characters in the ASCII
|
---|
1977 | range (0x00 to 0x7F). For example, the Japanese Shift-JIS
|
---|
1978 | encoding maps the backslash character (0x5A) to the Yen
|
---|
1979 | character. To avoid undesirable side-effects, we recommend
|
---|
1980 | avoiding such codecs with setCodecsForCString().
|
---|
1981 |
|
---|
1982 | \sa codecForCStrings(), setCodecForTr()
|
---|
1983 | */
|
---|
1984 |
|
---|
1985 | /*!
|
---|
1986 | \since 4.4
|
---|
1987 |
|
---|
1988 | Tries to detect the encoding of the provided snippet of HTML in
|
---|
1989 | the given byte array, \a ba, by checking the BOM (Byte Order Mark)
|
---|
1990 | and the content-type meta header and returns a QTextCodec instance
|
---|
1991 | that is capable of decoding the html to unicode. If the codec
|
---|
1992 | cannot be detected from the content provided, \a defaultCodec is
|
---|
1993 | returned.
|
---|
1994 |
|
---|
1995 | \sa codecForUtfText()
|
---|
1996 | */
|
---|
1997 | QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec)
|
---|
1998 | {
|
---|
1999 | // determine charset
|
---|
2000 | int pos;
|
---|
2001 | QTextCodec *c = 0;
|
---|
2002 |
|
---|
2003 | c = QTextCodec::codecForUtfText(ba, c);
|
---|
2004 | if (!c) {
|
---|
2005 | QByteArray header = ba.left(512).toLower();
|
---|
2006 | if ((pos = header.indexOf("http-equiv=")) != -1) {
|
---|
2007 | if ((pos = header.lastIndexOf("meta ", pos)) != -1) {
|
---|
2008 | pos = header.indexOf("charset=", pos) + int(strlen("charset="));
|
---|
2009 | if (pos != -1) {
|
---|
2010 | int pos2 = header.indexOf('\"', pos+1);
|
---|
2011 | QByteArray cs = header.mid(pos, pos2-pos);
|
---|
2012 | // qDebug("found charset: %s", cs.data());
|
---|
2013 | c = QTextCodec::codecForName(cs);
|
---|
2014 | }
|
---|
2015 | }
|
---|
2016 | }
|
---|
2017 | }
|
---|
2018 | if (!c)
|
---|
2019 | c = defaultCodec;
|
---|
2020 |
|
---|
2021 | return c;
|
---|
2022 | }
|
---|
2023 |
|
---|
2024 | /*!
|
---|
2025 | \overload
|
---|
2026 |
|
---|
2027 | Tries to detect the encoding of the provided snippet of HTML in
|
---|
2028 | the given byte array, \a ba, by checking the BOM (Byte Order Mark)
|
---|
2029 | and the content-type meta header and returns a QTextCodec instance
|
---|
2030 | that is capable of decoding the html to unicode. If the codec cannot
|
---|
2031 | be detected, this overload returns a Latin-1 QTextCodec.
|
---|
2032 | */
|
---|
2033 | QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
|
---|
2034 | {
|
---|
2035 | return codecForHtml(ba, QTextCodec::codecForMib(/*Latin 1*/ 4));
|
---|
2036 | }
|
---|
2037 |
|
---|
2038 | /*!
|
---|
2039 | \since 4.6
|
---|
2040 |
|
---|
2041 | Tries to detect the encoding of the provided snippet \a ba by
|
---|
2042 | using the BOM (Byte Order Mark) and returns a QTextCodec instance
|
---|
2043 | that is capable of decoding the text to unicode. If the codec
|
---|
2044 | cannot be detected from the content provided, \a defaultCodec is
|
---|
2045 | returned.
|
---|
2046 |
|
---|
2047 | \sa codecForHtml()
|
---|
2048 | */
|
---|
2049 | QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec)
|
---|
2050 | {
|
---|
2051 | const int arraySize = ba.size();
|
---|
2052 |
|
---|
2053 | if (arraySize > 3) {
|
---|
2054 | if ((uchar)ba[0] == 0x00
|
---|
2055 | && (uchar)ba[1] == 0x00
|
---|
2056 | && (uchar)ba[2] == 0xFE
|
---|
2057 | && (uchar)ba[3] == 0xFF)
|
---|
2058 | return QTextCodec::codecForMib(1018); // utf-32 be
|
---|
2059 | else if ((uchar)ba[0] == 0xFF
|
---|
2060 | && (uchar)ba[1] == 0xFE
|
---|
2061 | && (uchar)ba[2] == 0x00
|
---|
2062 | && (uchar)ba[3] == 0x00)
|
---|
2063 | return QTextCodec::codecForMib(1019); // utf-32 le
|
---|
2064 | }
|
---|
2065 |
|
---|
2066 | if (arraySize < 2)
|
---|
2067 | return defaultCodec;
|
---|
2068 | if ((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff)
|
---|
2069 | return QTextCodec::codecForMib(1013); // utf16 be
|
---|
2070 | else if ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe)
|
---|
2071 | return QTextCodec::codecForMib(1014); // utf16 le
|
---|
2072 |
|
---|
2073 | if (arraySize < 3)
|
---|
2074 | return defaultCodec;
|
---|
2075 | if ((uchar)ba[0] == 0xef
|
---|
2076 | && (uchar)ba[1] == 0xbb
|
---|
2077 | && (uchar)ba[2] == 0xbf)
|
---|
2078 | return QTextCodec::codecForMib(106); // utf-8
|
---|
2079 |
|
---|
2080 | return defaultCodec;
|
---|
2081 | }
|
---|
2082 |
|
---|
2083 | /*!
|
---|
2084 | \overload
|
---|
2085 |
|
---|
2086 | Tries to detect the encoding of the provided snippet \a ba by
|
---|
2087 | using the BOM (Byte Order Mark) and returns a QTextCodec instance
|
---|
2088 | that is capable of decoding the text to unicode. If the codec
|
---|
2089 | cannot be detected, this overload returns a Latin-1 QTextCodec.
|
---|
2090 |
|
---|
2091 | \sa codecForHtml()
|
---|
2092 | */
|
---|
2093 | QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba)
|
---|
2094 | {
|
---|
2095 | return codecForUtfText(ba, QTextCodec::codecForMib(/*Latin 1*/ 4));
|
---|
2096 | }
|
---|
2097 |
|
---|
2098 |
|
---|
2099 | /*! \internal
|
---|
2100 | \since 4.3
|
---|
2101 | Determines whether the decoder encountered a failure while decoding the input. If
|
---|
2102 | an error was encountered, the produced result is undefined, and gets converted as according
|
---|
2103 | to the conversion flags.
|
---|
2104 | */
|
---|
2105 | bool QTextDecoder::hasFailure() const
|
---|
2106 | {
|
---|
2107 | return state.invalidChars != 0;
|
---|
2108 | }
|
---|
2109 |
|
---|
2110 | /*!
|
---|
2111 | \fn QTextCodec *QTextCodec::codecForContent(const char *str, int size)
|
---|
2112 |
|
---|
2113 | This functionality is no longer provided by Qt. This
|
---|
2114 | compatibility function always returns a null pointer.
|
---|
2115 | */
|
---|
2116 |
|
---|
2117 | /*!
|
---|
2118 | \fn QTextCodec *QTextCodec::codecForName(const char *hint, int accuracy)
|
---|
2119 |
|
---|
2120 | Use the codecForName(const QByteArray &) overload instead.
|
---|
2121 | */
|
---|
2122 |
|
---|
2123 | /*!
|
---|
2124 | \fn QTextCodec *QTextCodec::codecForIndex(int i)
|
---|
2125 |
|
---|
2126 | Use availableCodecs() or availableMibs() instead and iterate
|
---|
2127 | through the resulting list.
|
---|
2128 | */
|
---|
2129 |
|
---|
2130 |
|
---|
2131 | /*!
|
---|
2132 | \fn QByteArray QTextCodec::mimeName() const
|
---|
2133 |
|
---|
2134 | Use name() instead.
|
---|
2135 | */
|
---|
2136 |
|
---|
2137 | QT_END_NAMESPACE
|
---|
2138 |
|
---|
2139 | #endif // QT_NO_TEXTCODEC
|
---|