source: trunk/src/corelib/codecs/qtextcodec.cpp@ 83

Last change on this file since 83 was 2, checked in by Dmitry A. Kuminov, 16 years ago

Initially imported qt-all-opensource-src-4.5.1 from Trolltech.

File size: 46.4 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4** Contact: Qt Software Information ([email protected])
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial Usage
10** Licensees holding valid Qt Commercial licenses may use this file in
11** accordance with the Qt Commercial License Agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Nokia.
14**
15** GNU Lesser General Public License Usage
16** Alternatively, this file may be used under the terms of the GNU Lesser
17** General Public License version 2.1 as published by the Free Software
18** Foundation and appearing in the file LICENSE.LGPL included in the
19** packaging of this file. Please review the following information to
20** ensure the GNU Lesser General Public License version 2.1 requirements
21** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22**
23** In addition, as a special exception, Nokia gives you certain
24** additional rights. These rights are described in the Nokia Qt LGPL
25** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26** package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you are unsure which license is appropriate for your use, please
37** contact the sales department at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "qplatformdefs.h"
43#include "qtextcodec.h"
44#include "qtextcodec_p.h"
45
46#ifndef QT_NO_TEXTCODEC
47
48#include "qlist.h"
49#include "qfile.h"
50#ifndef QT_NO_LIBRARY
51# include "qcoreapplication.h"
52# include "qtextcodecplugin.h"
53# include "private/qfactoryloader_p.h"
54#endif
55#include "qstringlist.h"
56
57#ifdef Q_OS_UNIX
58# include "qiconvcodec_p.h"
59#endif
60
61#include "qutfcodec_p.h"
62#include "qsimplecodec_p.h"
63#include "qlatincodec_p.h"
64#ifndef QT_NO_CODECS
65# include "qtsciicodec_p.h"
66# include "qisciicodec_p.h"
67# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
68// no iconv(3) support, must build all codecs into the library
69# include "../../plugins/codecs/cn/qgb18030codec.h"
70# include "../../plugins/codecs/jp/qeucjpcodec.h"
71# include "../../plugins/codecs/jp/qjiscodec.h"
72# include "../../plugins/codecs/jp/qsjiscodec.h"
73# include "../../plugins/codecs/kr/qeuckrcodec.h"
74# include "../../plugins/codecs/tw/qbig5codec.h"
75# endif // QT_NO_ICONV
76# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
77# include "qfontlaocodec_p.h"
78# include "../../plugins/codecs/jp/qfontjpcodec.h"
79# endif
80#endif // QT_NO_CODECS
81#include "qlocale.h"
82#include "private/qmutexpool_p.h"
83
84#include <stdlib.h>
85#include <ctype.h>
86#include <locale.h>
87#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
88#include <langinfo.h>
89#endif
90
91#if defined(Q_OS_WINCE)
92# define QT_NO_SETLOCALE
93#endif
94
95QT_BEGIN_NAMESPACE
96
97#ifndef QT_NO_TEXTCODECPLUGIN
98Q_GLOBAL_STATIC_WITH_ARGS(QFactoryLoader, loader,
99 (QTextCodecFactoryInterface_iid, QLatin1String("/codecs")))
100#endif
101
102
103static bool nameMatch(const QByteArray &name, const QByteArray &test)
104{
105 // if they're the same, return a perfect score
106 if (qstricmp(name, test) == 0)
107 return true;
108
109 const char *n = name.constData();
110 const char *h = test.constData();
111
112 // if the letters and numbers are the same, we have a match
113 while (*n != '\0') {
114 if (isalnum((uchar)*n)) {
115 for (;;) {
116 if (*h == '\0')
117 return false;
118 if (isalnum((uchar)*h))
119 break;
120 ++h;
121 }
122 if (tolower((uchar)*n) != tolower((uchar)*h))
123 return false;
124 ++h;
125 }
126 ++n;
127 }
128 while (*h && !isalnum((uchar)*h))
129 ++h;
130 return (*h == '\0');
131}
132
133
134static QTextCodec *createForName(const QByteArray &name)
135{
136#ifndef QT_NO_TEXTCODECPLUGIN
137 QFactoryLoader *l = loader();
138 QStringList keys = l->keys();
139 for (int i = 0; i < keys.size(); ++i) {
140 if (nameMatch(name, keys.at(i).toLatin1())) {
141 QString realName = keys.at(i);
142 if (QTextCodecFactoryInterface *factory
143 = qobject_cast<QTextCodecFactoryInterface*>(l->instance(realName))) {
144 return factory->create(realName);
145 }
146 }
147 }
148#else
149 Q_UNUSED(name);
150#endif
151 return 0;
152}
153
154static QTextCodec *createForMib(int mib)
155{
156#ifndef QT_NO_TEXTCODECPLUGIN
157 QString name = QLatin1String("MIB: ") + QString::number(mib);
158 if (QTextCodecFactoryInterface *factory
159 = qobject_cast<QTextCodecFactoryInterface*>(loader()->instance(name)))
160 return factory->create(name);
161#else
162 Q_UNUSED(mib);
163#endif
164 return 0;
165}
166
167static QList<QTextCodec*> *all = 0;
168static bool destroying_is_ok = false;
169
170static QTextCodec *localeMapper = 0;
171QTextCodec *QTextCodec::cftr = 0;
172
173
174class QTextCodecCleanup
175{
176public:
177 ~QTextCodecCleanup();
178};
179
180/*
181 Deletes all the created codecs. This destructor is called just
182 before exiting to delete any QTextCodec objects that may be lying
183 around.
184*/
185QTextCodecCleanup::~QTextCodecCleanup()
186{
187 if (!all)
188 return;
189
190 destroying_is_ok = true;
191
192 while (all->size())
193 delete all->takeFirst();
194 delete all;
195 all = 0;
196 localeMapper = 0;
197
198 destroying_is_ok = false;
199}
200
201Q_GLOBAL_STATIC(QTextCodecCleanup, createQTextCodecCleanup)
202
203#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
204class QWindowsLocalCodec: public QTextCodec
205{
206public:
207 QWindowsLocalCodec();
208 ~QWindowsLocalCodec();
209
210 QString convertToUnicode(const char *, int, ConverterState *) const;
211 QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
212 QString convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const;
213
214 QByteArray name() const;
215 int mibEnum() const;
216
217};
218
219QWindowsLocalCodec::QWindowsLocalCodec()
220{
221}
222
223QWindowsLocalCodec::~QWindowsLocalCodec()
224{
225}
226
227QString QWindowsLocalCodec::convertToUnicode(const char *chars, int length, ConverterState *state) const
228{
229 const char *mb = chars;
230 int mblen = length;
231
232 if (!mb || !mblen)
233 return QString();
234
235 const int wclen_auto = 4096;
236 WCHAR wc_auto[wclen_auto];
237 int wclen = wclen_auto;
238 WCHAR *wc = wc_auto;
239 int len;
240 QString sp;
241 bool prepend = false;
242 char state_data = 0;
243 int remainingChars = 0;
244
245 //save the current state information
246 if (state) {
247 state_data = (char)state->state_data[0];
248 remainingChars = state->remainingChars;
249 }
250
251 //convert the pending charcter (if available)
252 if (state && remainingChars) {
253 char prev[3] = {0};
254 prev[0] = state_data;
255 prev[1] = mb[0];
256 remainingChars = 0;
257 len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
258 prev, 2, wc, wclen);
259 if (len) {
260 prepend = true;
261 sp.append(QChar(wc[0]));
262 mb++;
263 mblen--;
264 wc[0] = 0;
265 }
266 }
267
268 while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
269 mb, mblen, wc, wclen))) {
270 int r = GetLastError();
271 if (r == ERROR_INSUFFICIENT_BUFFER) {
272 if (wc != wc_auto) {
273 qWarning("MultiByteToWideChar: Size changed");
274 break;
275 } else {
276 wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
277 mb, mblen, 0, 0);
278 wc = new WCHAR[wclen];
279 // and try again...
280 }
281 } else if (r == ERROR_NO_UNICODE_TRANSLATION) {
282 //find the last non NULL character
283 while (mblen > 1 && !(mb[mblen-1]))
284 mblen--;
285 //check whether, we hit an invalid character in the middle
286 if ((mblen <= 1) || (remainingChars && state_data))
287 return convertToUnicodeCharByChar(chars, length, state);
288 //Remove the last character and try again...
289 state_data = mb[mblen-1];
290 remainingChars = 1;
291 mblen--;
292 } else {
293 // Fail.
294 qWarning("MultiByteToWideChar: Cannot convert multibyte text");
295 break;
296 }
297 }
298 if (len <= 0)
299 return QString();
300 if (wc[len-1] == 0) // len - 1: we don't want terminator
301 --len;
302
303 //save the new state information
304 if (state) {
305 state->state_data[0] = (char)state_data;
306 state->remainingChars = remainingChars;
307 }
308 QString s((QChar*)wc, len);
309 if (wc != wc_auto)
310 delete [] wc;
311 if (prepend) {
312 return sp+s;
313 }
314 return s;
315}
316
317QString QWindowsLocalCodec::convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const
318{
319 if (!chars || !length)
320 return QString();
321
322 int copyLocation = 0;
323 int extra = 2;
324 if (state && state->remainingChars) {
325 copyLocation = state->remainingChars;
326 extra += copyLocation;
327 }
328 int newLength = length + extra;
329 char *mbcs = new char[newLength];
330 //ensure that we have a NULL terminated string
331 mbcs[newLength-1] = 0;
332 mbcs[newLength-2] = 0;
333 memcpy(&(mbcs[copyLocation]), chars, length);
334 if (copyLocation) {
335 //copy the last character from the state
336 mbcs[0] = (char)state->state_data[0];
337 state->remainingChars = 0;
338 }
339 const char *mb = mbcs;
340#ifndef Q_OS_WINCE
341 const char *next = 0;
342 QString s;
343 while((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
344 WCHAR wc[2] ={0};
345 int charlength = next - mb;
346 int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
347 if (len>0) {
348 s.append(QChar(wc[0]));
349 } else {
350 int r = GetLastError();
351 //check if the character being dropped is the last character
352 if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) {
353 state->remainingChars = 1;
354 state->state_data[0] = (char)*mb;
355 }
356 }
357 mb = next;
358 }
359#else
360 QString s;
361 int size = mbstowcs(NULL, mb, length);
362 if (size < 0) {
363 Q_ASSERT("Error in CE TextCodec");
364 return QString();
365 }
366 wchar_t* ws = new wchar_t[size + 2];
367 ws[size +1] = 0;
368 ws[size] = 0;
369 size = mbstowcs(ws, mb, length);
370 for (int i=0; i< size; i++)
371 s.append(QChar(ws[i]));
372 delete [] ws;
373#endif
374 delete mbcs;
375 return s;
376}
377
378QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *) const
379{
380 return qt_winQString2MB(uc, len);
381}
382
383
384QByteArray QWindowsLocalCodec::name() const
385{
386 return "System";
387}
388
389int QWindowsLocalCodec::mibEnum() const
390{
391 return 0;
392}
393
394#else
395
396/* locale names mostly copied from XFree86 */
397static const char * const iso8859_2locales[] = {
398 "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
399 "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
400 "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
401 "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
402
403static const char * const iso8859_3locales[] = {
404 "eo", 0 };
405
406static const char * const iso8859_4locales[] = {
407 "ee", "ee_EE", 0 };
408
409static const char * const iso8859_5locales[] = {
410 "mk", "mk_MK", "sp", "sp_YU", 0 };
411
412static const char * const cp_1251locales[] = {
413 "be", "be_BY", "bg", "bg_BG", "bulgarian", 0 };
414
415static const char * const pt_154locales[] = {
416 "ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 };
417
418static const char * const iso8859_6locales[] = {
419 "ar_AA", "ar_SA", "arabic", 0 };
420
421static const char * const iso8859_7locales[] = {
422 "el", "el_GR", "greek", 0 };
423
424static const char * const iso8859_8locales[] = {
425 "hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
426
427static const char * const iso8859_9locales[] = {
428 "tr", "tr_TR", "turkish", 0 };
429
430static const char * const iso8859_13locales[] = {
431 "lt", "lt_LT", "lv", "lv_LV", 0 };
432
433static const char * const iso8859_15locales[] = {
434 "et", "et_EE",
435 // Euro countries
436 "br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE",
437 "es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR",
438 "fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR",
439 "nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE",
440 0 };
441
442static const char * const koi8_ulocales[] = {
443 "uk", "uk_UA", "ru_UA", "ukrainian", 0 };
444
445static const char * const tis_620locales[] = {
446 "th", "th_TH", "thai", 0 };
447
448// static const char * const tcvnlocales[] = {
449// "vi", "vi_VN", 0 };
450
451static bool try_locale_list(const char * const locale[], const char * lang)
452{
453 int i;
454 for(i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++)
455 ;
456 return locale[i] != 0;
457}
458
459// For the probably_koi8_locales we have to look. the standard says
460// these are 8859-5, but almost all Russian users use KOI8-R and
461// incorrectly set $LANG to ru_RU. We'll check tolower() to see what
462// it thinks ru_RU means.
463
464// If you read the history, it seems that many Russians blame ISO and
465// Perestroika for the confusion.
466//
467// The real bug is that some programs break if the user specifies
468// ru_RU.KOI8-R.
469
470static const char * const probably_koi8_rlocales[] = {
471 "ru", "ru_SU", "ru_RU", "russian", 0 };
472
473static QTextCodec * ru_RU_hack(const char * i) {
474 QTextCodec * ru_RU_codec = 0;
475
476#if !defined(QT_NO_SETLOCALE)
477 QByteArray origlocale(setlocale(LC_CTYPE, i));
478#else
479 QByteArray origlocale(i);
480#endif
481 // unicode koi8r latin5 name
482 // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
483 // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
484 int latin5 = tolower(0xCE);
485 int koi8r = tolower(0xE0);
486 if (koi8r == 0xC0 && latin5 != 0xEE) {
487 ru_RU_codec = QTextCodec::codecForName("KOI8-R");
488 } else if (koi8r != 0xC0 && latin5 == 0xEE) {
489 ru_RU_codec = QTextCodec::codecForName("ISO 8859-5");
490 } else {
491 // something else again... let's assume... *throws dice*
492 ru_RU_codec = QTextCodec::codecForName("KOI8-R");
493 qWarning("QTextCodec: Using KOI8-R, probe failed (%02x %02x %s)",
494 koi8r, latin5, i);
495 }
496#if !defined(QT_NO_SETLOCALE)
497 setlocale(LC_CTYPE, origlocale);
498#endif
499
500 return ru_RU_codec;
501}
502
503#endif
504
505#if !defined(Q_OS_WIN32) && !defined(Q_OS_WINCE)
506static QTextCodec *checkForCodec(const char *name) {
507 QTextCodec *c = QTextCodec::codecForName(name);
508 if (!c) {
509 const char *at = strchr(name, '@');
510 if (at) {
511 QByteArray n(name, at - name);
512 c = QTextCodec::codecForName(n.data());
513 }
514 }
515 return c;
516}
517#endif
518
519/* the next two functions are implicitely thread safe,
520 as they are only called by setup() which uses a mutex.
521*/
522static void setupLocaleMapper()
523{
524#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
525 localeMapper = QTextCodec::codecForName("System");
526#else
527
528#ifndef QT_NO_ICONV
529 localeMapper = QTextCodec::codecForName("System");
530#endif
531
532#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
533 if (!localeMapper) {
534 char *charset = nl_langinfo (CODESET);
535 if (charset)
536 localeMapper = QTextCodec::codecForName(charset);
537 }
538#endif
539
540 if (!localeMapper) {
541 // Very poorly defined and followed standards causes lots of
542 // code to try to get all the cases... This logic is
543 // duplicated in QIconvCodec, so if you change it here, change
544 // it there too.
545
546 // Try to determine locale codeset from locale name assigned to
547 // LC_CTYPE category.
548
549 // First part is getting that locale name. First try setlocale() which
550 // definitely knows it, but since we cannot fully trust it, get ready
551 // to fall back to environment variables.
552#if !defined(QT_NO_SETLOCALE)
553 char * ctype = qstrdup(setlocale(LC_CTYPE, 0));
554#else
555 char * ctype = qstrdup("");
556#endif
557
558 // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
559 // environment variables.
560 char * lang = qstrdup(qgetenv("LC_ALL").constData());
561 if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) {
562 if (lang) delete [] lang;
563 lang = qstrdup(qgetenv("LC_CTYPE").constData());
564 }
565 if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) {
566 if (lang) delete [] lang;
567 lang = qstrdup(qgetenv("LANG").constData());
568 }
569
570 // Now try these in order:
571 // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
572 // 2. CODESET from lang if it contains a .CODESET part
573 // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
574 // 4. locale (ditto)
575 // 5. check for "@euro"
576 // 6. guess locale from ctype unless ctype is "C"
577 // 7. guess locale from lang
578
579 // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
580 char * codeset = ctype ? strchr(ctype, '.') : 0;
581 if (codeset && *codeset == '.')
582 localeMapper = checkForCodec(codeset + 1);
583
584 // 2. CODESET from lang if it contains a .CODESET part
585 codeset = lang ? strchr(lang, '.') : 0;
586 if (!localeMapper && codeset && *codeset == '.')
587 localeMapper = checkForCodec(codeset + 1);
588
589 // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
590 if (!localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0)
591 localeMapper = checkForCodec(ctype);
592
593 // 4. locale (ditto)
594 if (!localeMapper && lang && *lang != 0)
595 localeMapper = checkForCodec(lang);
596
597 // 5. "@euro"
598 if ((!localeMapper && ctype && strstr(ctype, "@euro")) || (lang && strstr(lang, "@euro")))
599 localeMapper = checkForCodec("ISO 8859-15");
600
601 // 6. guess locale from ctype unless ctype is "C"
602 // 7. guess locale from lang
603 char * try_by_name = ctype;
604 if (ctype && *ctype != 0 && strcmp (ctype, "C") != 0)
605 try_by_name = lang;
606
607 // Now do the guessing.
608 if (lang && *lang && !localeMapper && try_by_name && *try_by_name) {
609 if (try_locale_list(iso8859_15locales, lang))
610 localeMapper = QTextCodec::codecForName("ISO 8859-15");
611 else if (try_locale_list(iso8859_2locales, lang))
612 localeMapper = QTextCodec::codecForName("ISO 8859-2");
613 else if (try_locale_list(iso8859_3locales, lang))
614 localeMapper = QTextCodec::codecForName("ISO 8859-3");
615 else if (try_locale_list(iso8859_4locales, lang))
616 localeMapper = QTextCodec::codecForName("ISO 8859-4");
617 else if (try_locale_list(iso8859_5locales, lang))
618 localeMapper = QTextCodec::codecForName("ISO 8859-5");
619 else if (try_locale_list(iso8859_6locales, lang))
620 localeMapper = QTextCodec::codecForName("ISO 8859-6");
621 else if (try_locale_list(iso8859_7locales, lang))
622 localeMapper = QTextCodec::codecForName("ISO 8859-7");
623 else if (try_locale_list(iso8859_8locales, lang))
624 localeMapper = QTextCodec::codecForName("ISO 8859-8-I");
625 else if (try_locale_list(iso8859_9locales, lang))
626 localeMapper = QTextCodec::codecForName("ISO 8859-9");
627 else if (try_locale_list(iso8859_13locales, lang))
628 localeMapper = QTextCodec::codecForName("ISO 8859-13");
629 else if (try_locale_list(tis_620locales, lang))
630 localeMapper = QTextCodec::codecForName("ISO 8859-11");
631 else if (try_locale_list(koi8_ulocales, lang))
632 localeMapper = QTextCodec::codecForName("KOI8-U");
633 else if (try_locale_list(cp_1251locales, lang))
634 localeMapper = QTextCodec::codecForName("CP 1251");
635 else if (try_locale_list(pt_154locales, lang))
636 localeMapper = QTextCodec::codecForName("PT 154");
637 else if (try_locale_list(probably_koi8_rlocales, lang))
638 localeMapper = ru_RU_hack(lang);
639 }
640
641 delete [] ctype;
642 delete [] lang;
643 }
644
645 // If everything failed, we default to 8859-1
646 // We could perhaps default to 8859-15.
647 if (!localeMapper)
648 localeMapper = QTextCodec::codecForName("ISO 8859-1");
649#endif
650}
651
652
653static void setup()
654{
655#ifndef QT_NO_THREAD
656 QMutexLocker locker(QMutexPool::globalInstanceGet(&all));
657#endif
658
659 if (all)
660 return;
661
662 if (destroying_is_ok)
663 qWarning("QTextCodec: Creating new codec during codec cleanup");
664 all = new QList<QTextCodec*>;
665 // create the cleanup object to cleanup all codecs on exit
666 (void) createQTextCodecCleanup();
667
668#ifndef QT_NO_CODECS
669# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
670 // no font codecs when bootstrapping
671 (void)new QFontLaoCodec;
672# if defined(QT_NO_ICONV)
673 // no iconv(3) support, must build all codecs into the library
674 (void)new QFontGb2312Codec;
675 (void)new QFontGbkCodec;
676 (void)new QFontGb18030_0Codec;
677 (void)new QFontJis0208Codec;
678 (void)new QFontJis0201Codec;
679 (void)new QFontKsc5601Codec;
680 (void)new QFontBig5hkscsCodec;
681 (void)new QFontBig5Codec;
682# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
683# endif // Q_WS_X11
684
685 (void)new QTsciiCodec;
686
687 for (int i = 0; i < 9; ++i)
688 (void)new QIsciiCodec(i);
689
690
691# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
692 // no asian codecs when bootstrapping, sorry
693 (void)new QGb18030Codec;
694 (void)new QGbkCodec;
695 (void)new QGb2312Codec;
696 (void)new QEucJpCodec;
697 (void)new QJisCodec;
698 (void)new QSjisCodec;
699 (void)new QEucKrCodec;
700 (void)new QBig5Codec;
701 (void)new QBig5hkscsCodec;
702# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
703#endif // QT_NO_CODECS
704
705#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
706 (void) new QWindowsLocalCodec;
707#endif // Q_OS_WIN32
708
709 (void)new QUtf16Codec;
710 (void)new QUtf16BECodec;
711 (void)new QUtf16LECodec;
712 (void)new QUtf32Codec;
713 (void)new QUtf32BECodec;
714 (void)new QUtf32LECodec;
715 (void)new QLatin15Codec;
716 (void)new QLatin1Codec;
717 (void)new QUtf8Codec;
718
719 for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i)
720 (void)new QSimpleTextCodec(i);
721
722#if defined(Q_OS_UNIX) && !defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
723 // QIconvCodec depends on the UTF-16 codec, so it needs to be created last
724 (void) new QIconvCodec();
725#endif
726
727 if (!localeMapper)
728 setupLocaleMapper();
729}
730
731QTextCodec::ConverterState::~ConverterState()
732{
733 if (flags & FreeFunction)
734 (QTextCodecUnalignedPointer::decode(state_data))(this);
735 else if (d)
736 qFree(d);
737}
738
739/*!
740 \class QTextCodec
741 \brief The QTextCodec class provides conversions between text encodings.
742 \reentrant
743 \ingroup i18n
744
745 Qt uses Unicode to store, draw and manipulate strings. In many
746 situations you may wish to deal with data that uses a different
747 encoding. For example, most Japanese documents are still stored
748 in Shift-JIS or ISO 2022-JP, while Russian users often have their
749 documents in KOI8-R or Windows-1251.
750
751 Qt provides a set of QTextCodec classes to help with converting
752 non-Unicode formats to and from Unicode. You can also create your
753 own codec classes.
754
755 The supported encodings are:
756
757 \list
758 \o Apple Roman
759 \o \l{Big5 Text Codec}{Big5}
760 \o \l{Big5-HKSCS Text Codec}{Big5-HKSCS}
761 \o CP949
762 \o \l{EUC-JP Text Codec}{EUC-JP}
763 \o \l{EUC-KR Text Codec}{EUC-KR}
764 \o \l{GBK Text Codec}{GB18030-0}
765 \o IBM 850
766 \o IBM 866
767 \o IBM 874
768 \o \l{ISO 2022-JP (JIS) Text Codec}{ISO 2022-JP}
769 \o ISO 8859-1 to 10
770 \o ISO 8859-13 to 16
771 \o Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml
772 \o JIS X 0201
773 \o JIS X 0208
774 \o KOI8-R
775 \o KOI8-U
776 \o MuleLao-1
777 \o ROMAN8
778 \o \l{Shift-JIS Text Codec}{Shift-JIS}
779 \o TIS-620
780 \o \l{TSCII Text Codec}{TSCII}
781 \o UTF-8
782 \o UTF-16
783 \o UTF-16BE
784 \o UTF-16LE
785 \o UTF-32
786 \o UTF-32BE
787 \o UTF-32LE
788 \o Windows-1250 to 1258
789 \o WINSAMI2
790 \endlist
791
792 QTextCodecs can be used as follows to convert some locally encoded
793 string to Unicode. Suppose you have some string encoded in Russian
794 KOI8-R encoding, and want to convert it to Unicode. The simple way
795 to do it is like this:
796
797 \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 0
798
799 After this, \c string holds the text converted to Unicode.
800 Converting a string from Unicode to the local encoding is just as
801 easy:
802
803 \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 1
804
805 To read or write files in various encodings, use QTextStream and
806 its \l{QTextStream::setCodec()}{setCodec()} function. See the
807 \l{tools/codecs}{Codecs} example for an application of QTextCodec
808 to file I/O.
809
810 Some care must be taken when trying to convert the data in chunks,
811 for example, when receiving it over a network. In such cases it is
812 possible that a multi-byte character will be split over two
813 chunks. At best this might result in the loss of a character and
814 at worst cause the entire conversion to fail.
815
816 The approach to use in these situations is to create a QTextDecoder
817 object for the codec and use this QTextDecoder for the whole
818 decoding process, as shown below:
819
820 \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 2
821
822 The QTextDecoder object maintains state between chunks and therefore
823 works correctly even if a multi-byte character is split between
824 chunks.
825
826 \section1 Creating Your Own Codec Class
827
828 Support for new text encodings can be added to Qt by creating
829 QTextCodec subclasses.
830
831 The pure virtual functions describe the encoder to the system and
832 the coder is used as required in the different text file formats
833 supported by QTextStream, and under X11, for the locale-specific
834 character input and output.
835
836 To add support for another encoding to Qt, make a subclass of
837 QTextCodec and implement the functions listed in the table below.
838
839 \table
840 \header \o Function \o Description
841
842 \row \o name()
843 \o Returns the official name for the encoding. If the
844 encoding is listed in the
845 \l{IANA character-sets encoding file}, the name
846 should be the preferred MIME name for the encoding.
847
848 \row \o aliases()
849 \o Returns a list of alternative names for the encoding.
850 QTextCodec provides a default implementation that returns
851 an empty list. For example, "ISO-8859-1" has "latin1",
852 "CP819", "IBM819", and "iso-ir-100" as aliases.
853
854 \row \o mibEnum()
855 \o Return the MIB enum for the encoding if it is listed in
856 the \l{IANA character-sets encoding file}.
857
858 \row \o convertToUnicode()
859 \o Converts an 8-bit character string to Unicode.
860
861 \row \o convertFromUnicode()
862 \o Converts a Unicode string to an 8-bit character string.
863 \endtable
864
865 You may find it more convenient to make your codec class
866 available as a plugin; see \l{How to Create Qt Plugins} for
867 details.
868
869 \sa QTextStream, QTextDecoder, QTextEncoder, {Codecs Example}
870*/
871
872/*!
873 \enum QTextCodec::ConversionFlag
874
875 \value DefaultConversion No flag is set.
876 \value ConvertInvalidToNull If this flag is set, each invalid input
877 character is output as a null character.
878 \value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any.
879
880 \omitvalue FreeFunction
881*/
882
883/*!
884 \fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags)
885
886 Constructs a ConverterState object initialized with the given \a flags.
887*/
888
889/*!
890 \fn QTextCodec::ConverterState::~ConverterState()
891
892 Destroys the ConverterState object.
893*/
894
895/*!
896 \nonreentrant
897
898 Constructs a QTextCodec, and gives it the highest precedence. The
899 QTextCodec should always be constructed on the heap (i.e. with \c
900 new). Qt takes ownership and will delete it when the application
901 terminates.
902*/
903QTextCodec::QTextCodec()
904{
905 setup();
906 all->prepend(this);
907}
908
909
910/*!
911 \nonreentrant
912
913 Destroys the QTextCodec. Note that you should not delete codecs
914 yourself: once created they become Qt's responsibility.
915*/
916QTextCodec::~QTextCodec()
917{
918 if (!destroying_is_ok)
919 qWarning("QTextCodec::~QTextCodec: Called by application");
920 if (all)
921 all->removeAll(this);
922}
923
924/*!
925 \fn QTextCodec *QTextCodec::codecForName(const char *name)
926
927 Searches all installed QTextCodec objects and returns the one
928 which best matches \a name; the match is case-insensitive. Returns
929 0 if no codec matching the name \a name could be found.
930*/
931
932/*!
933 Searches all installed QTextCodec objects and returns the one
934 which best matches \a name; the match is case-insensitive. Returns
935 0 if no codec matching the name \a name could be found.
936*/
937QTextCodec *QTextCodec::codecForName(const QByteArray &name)
938{
939 if (name.isEmpty())
940 return 0;
941
942 setup();
943
944 for (int i = 0; i < all->size(); ++i) {
945 QTextCodec *cursor = all->at(i);
946 if (nameMatch(cursor->name(), name))
947 return cursor;
948 QList<QByteArray> aliases = cursor->aliases();
949 for (int i = 0; i < aliases.size(); ++i)
950 if (nameMatch(aliases.at(i), name))
951 return cursor;
952 }
953
954 return createForName(name);
955}
956
957
958/*!
959 Returns the QTextCodec which matches the \link
960 QTextCodec::mibEnum() MIBenum\endlink \a mib.
961*/
962QTextCodec* QTextCodec::codecForMib(int mib)
963{
964 setup();
965
966 // Qt 3 used 1000 (mib for UCS2) as its identifier for the utf16 codec. Map
967 // this correctly for compatibility.
968 if (mib == 1000)
969 mib = 1015;
970
971 QList<QTextCodec*>::ConstIterator i;
972 for (int i = 0; i < all->size(); ++i) {
973 QTextCodec *cursor = all->at(i);
974 if (cursor->mibEnum() == mib)
975 return cursor;
976 }
977
978 return createForMib(mib);
979}
980
981/*!
982 Returns the list of all available codecs, by name. Call
983 QTextCodec::codecForName() to obtain the QTextCodec for the name.
984
985 The list may contain many mentions of the same codec
986 if the codec has aliases.
987
988 \sa availableMibs(), name(), aliases()
989*/
990QList<QByteArray> QTextCodec::availableCodecs()
991{
992 setup();
993
994 QList<QByteArray> codecs;
995 for (int i = 0; i < all->size(); ++i) {
996 codecs += all->at(i)->name();
997 codecs += all->at(i)->aliases();
998 }
999#ifndef QT_NO_TEXTCODECPLUGIN
1000 QFactoryLoader *l = loader();
1001 QStringList keys = l->keys();
1002 for (int i = 0; i < keys.size(); ++i) {
1003 if (!keys.at(i).startsWith(QLatin1String("MIB: "))) {
1004 QByteArray name = keys.at(i).toLatin1();
1005 if (!codecs.contains(name))
1006 codecs += name;
1007 }
1008 }
1009#endif
1010
1011 return codecs;
1012}
1013
1014/*!
1015 Returns the list of MIBs for all available codecs. Call
1016 QTextCodec::codecForMib() to obtain the QTextCodec for the MIB.
1017
1018 \sa availableCodecs(), mibEnum()
1019*/
1020QList<int> QTextCodec::availableMibs()
1021{
1022 setup();
1023
1024 QList<int> codecs;
1025 for (int i = 0; i < all->size(); ++i)
1026 codecs += all->at(i)->mibEnum();
1027#ifndef QT_NO_TEXTCODECPLUGIN
1028 QFactoryLoader *l = loader();
1029 QStringList keys = l->keys();
1030 for (int i = 0; i < keys.size(); ++i) {
1031 if (keys.at(i).startsWith(QLatin1String("MIB: "))) {
1032 int mib = keys.at(i).mid(5).toInt();
1033 if (!codecs.contains(mib))
1034 codecs += mib;
1035 }
1036 }
1037#endif
1038
1039 return codecs;
1040}
1041
1042/*!
1043 Set the codec to \a c; this will be returned by
1044 codecForLocale(). If \a c is a null pointer, the codec is reset to
1045 the default.
1046
1047 This might be needed for some applications that want to use their
1048 own mechanism for setting the locale.
1049
1050 Setting this codec is not supported on DOS based Windows.
1051
1052 \sa codecForLocale()
1053*/
1054void QTextCodec::setCodecForLocale(QTextCodec *c)
1055{
1056#ifdef Q_WS_WIN
1057 if (QSysInfo::WindowsVersion& QSysInfo::WV_DOS_based)
1058 return;
1059#endif
1060 localeMapper = c;
1061 if (!localeMapper)
1062 setupLocaleMapper();
1063}
1064
1065/*!
1066 Returns a pointer to the codec most suitable for this locale.
1067
1068 On Windows, the codec will be based on a system locale. On Unix
1069 systems, starting with Qt 4.2, the codec will be using the \e
1070 iconv library. Note that in both cases the codec's name will be
1071 "System".
1072*/
1073
1074QTextCodec* QTextCodec::codecForLocale()
1075{
1076 if (localeMapper)
1077 return localeMapper;
1078
1079 setup();
1080
1081 return localeMapper;
1082}
1083
1084
1085/*!
1086 \fn QByteArray QTextCodec::name() const
1087
1088 QTextCodec subclasses must reimplement this function. It returns
1089 the name of the encoding supported by the subclass.
1090
1091 If the codec is registered as a character set in the
1092 \l{IANA character-sets encoding file} this method should
1093 return the preferred mime name for the codec if defined,
1094 otherwise its name.
1095*/
1096
1097/*!
1098 \fn int QTextCodec::mibEnum() const
1099
1100 Subclasses of QTextCodec must reimplement this function. It
1101 returns the MIBenum (see \l{IANA character-sets encoding file}
1102 for more information). It is important that each QTextCodec
1103 subclass returns the correct unique value for this function.
1104*/
1105
1106/*!
1107 Subclasses can return a number of aliases for the codec in question.
1108
1109 Standard aliases for codecs can be found in the
1110 \l{IANA character-sets encoding file}.
1111*/
1112QList<QByteArray> QTextCodec::aliases() const
1113{
1114 return QList<QByteArray>();
1115}
1116
1117/*!
1118 \fn QString QTextCodec::convertToUnicode(const char *chars, int len,
1119 ConverterState *state) const
1120
1121 QTextCodec subclasses must reimplement this function.
1122
1123 Converts the first \a len characters of \a chars from the
1124 encoding of the subclass to Unicode, and returns the result in a
1125 QString.
1126
1127 \a state can be 0, in which case the conversion is stateless and
1128 default conversion rules should be used. If state is not 0, the
1129 codec should save the state after the conversion in \a state, and
1130 adjust the remainingChars and invalidChars members of the struct.
1131*/
1132
1133/*!
1134 \fn QByteArray QTextCodec::convertFromUnicode(const QChar *input, int number,
1135 ConverterState *state) const
1136
1137 QTextCodec subclasses must reimplement this function.
1138
1139 Converts the first \a number of characters from the \a input array
1140 from Unicode to the encoding of the subclass, and returns the result
1141 in a QByteArray.
1142
1143 \a state can be 0 in which case the conversion is stateless and
1144 default conversion rules should be used. If state is not 0, the
1145 codec should save the state after the conversion in \a state, and
1146 adjust the remainingChars and invalidChars members of the struct.
1147*/
1148
1149/*!
1150 Creates a QTextDecoder which stores enough state to decode chunks
1151 of \c{char *} data to create chunks of Unicode data.
1152
1153 The caller is responsible for deleting the returned object.
1154*/
1155QTextDecoder* QTextCodec::makeDecoder() const
1156{
1157 return new QTextDecoder(this);
1158}
1159
1160
1161/*!
1162 Creates a QTextEncoder which stores enough state to encode chunks
1163 of Unicode data as \c{char *} data.
1164
1165 The caller is responsible for deleting the returned object.
1166*/
1167QTextEncoder* QTextCodec::makeEncoder() const
1168{
1169 return new QTextEncoder(this);
1170}
1171
1172/*!
1173 \fn QByteArray QTextCodec::fromUnicode(const QChar *input, int number,
1174 ConverterState *state) const
1175
1176 Converts the first \a number of characters from the \a input array
1177 from Unicode to the encoding of this codec, and returns the result
1178 in a QByteArray.
1179
1180 The \a state of the convertor used is updated.
1181*/
1182
1183/*!
1184 Converts \a str from Unicode to the encoding of this codec, and
1185 returns the result in a QByteArray.
1186*/
1187QByteArray QTextCodec::fromUnicode(const QString& str) const
1188{
1189 return convertFromUnicode(str.constData(), str.length(), 0);
1190}
1191
1192/*!
1193 \fn QString QTextCodec::toUnicode(const char *input, int size,
1194 ConverterState *state) const
1195
1196 Converts the first \a size characters from the \a input from the
1197 encoding of this codec to Unicode, and returns the result in a
1198 QString.
1199
1200 The \a state of the convertor used is updated.
1201*/
1202
1203/*!
1204 Converts \a a from the encoding of this codec to Unicode, and
1205 returns the result in a QString.
1206*/
1207QString QTextCodec::toUnicode(const QByteArray& a) const
1208{
1209 return convertToUnicode(a.constData(), a.length(), 0);
1210}
1211
1212/*!
1213 Returns true if the Unicode character \a ch can be fully encoded
1214 with this codec; otherwise returns false.
1215*/
1216bool QTextCodec::canEncode(QChar ch) const
1217{
1218 ConverterState state;
1219 state.flags = ConvertInvalidToNull;
1220 convertFromUnicode(&ch, 1, &state);
1221 return (state.invalidChars == 0);
1222}
1223
1224/*!
1225 \overload
1226
1227 \a s contains the string being tested for encode-ability.
1228*/
1229bool QTextCodec::canEncode(const QString& s) const
1230{
1231 ConverterState state;
1232 state.flags = ConvertInvalidToNull;
1233 convertFromUnicode(s.constData(), s.length(), &state);
1234 return (state.invalidChars == 0);
1235}
1236
1237#ifdef QT3_SUPPORT
1238/*!
1239 Returns a string representing the current language and
1240 sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil.
1241
1242 \sa QLocale
1243*/
1244const char *QTextCodec::locale()
1245{
1246 static char locale[6];
1247 QByteArray l = QLocale::system().name().toLatin1();
1248 int len = qMin(l.length(), 5);
1249 memcpy(locale, l.constData(), len);
1250 locale[len] = '\0';
1251
1252 return locale;
1253}
1254
1255/*!
1256 \overload
1257*/
1258
1259QByteArray QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
1260{
1261 QByteArray result = convertFromUnicode(uc.constData(), lenInOut, 0);
1262 lenInOut = result.length();
1263 return result;
1264}
1265
1266/*!
1267 \overload
1268
1269 \a a contains the source characters; \a len contains the number of
1270 characters in \a a to use.
1271*/
1272QString QTextCodec::toUnicode(const QByteArray& a, int len) const
1273{
1274 len = qMin(a.size(), len);
1275 return convertToUnicode(a.constData(), len, 0);
1276}
1277#endif
1278
1279/*!
1280 \overload
1281
1282 \a chars contains the source characters.
1283*/
1284QString QTextCodec::toUnicode(const char *chars) const
1285{
1286 int len = qstrlen(chars);
1287 return convertToUnicode(chars, len, 0);
1288}
1289
1290
1291/*!
1292 \class QTextEncoder
1293 \brief The QTextEncoder class provides a state-based encoder.
1294 \reentrant
1295 \ingroup i18n
1296
1297 A text encoder converts text from Unicode into an encoded text format
1298 using a specific codec.
1299
1300 The encoder converts Unicode into another format, remembering any
1301 state that is required between calls.
1302
1303 \sa QTextCodec::makeEncoder(), QTextDecoder
1304*/
1305
1306/*!
1307 \fn QTextEncoder::QTextEncoder(const QTextCodec *codec)
1308
1309 Constructs a text encoder for the given \a codec.
1310*/
1311
1312/*!
1313 Destroys the encoder.
1314*/
1315QTextEncoder::~QTextEncoder()
1316{
1317}
1318
1319/*! \internal
1320 \since 4.5
1321 Determines whether the eecoder encountered a failure while decoding the input. If
1322 an error was encountered, the produced result is undefined, and gets converted as according
1323 to the conversion flags.
1324 */
1325bool QTextEncoder::hasFailure() const
1326{
1327 return state.invalidChars != 0;
1328}
1329
1330/*!
1331 Converts the Unicode string \a str into an encoded QByteArray.
1332*/
1333QByteArray QTextEncoder::fromUnicode(const QString& str)
1334{
1335 QByteArray result = c->fromUnicode(str.constData(), str.length(), &state);
1336 return result;
1337}
1338
1339/*!
1340 \overload
1341
1342 Converts \a len characters (not bytes) from \a uc, and returns the
1343 result in a QByteArray.
1344*/
1345QByteArray QTextEncoder::fromUnicode(const QChar *uc, int len)
1346{
1347 QByteArray result = c->fromUnicode(uc, len, &state);
1348 return result;
1349}
1350
1351#ifdef QT3_SUPPORT
1352/*!
1353 \overload
1354
1355 Converts \a lenInOut characters (not bytes) from \a uc, and returns the
1356 result in a QByteArray. The number of characters read is returned in
1357 the \a lenInOut parameter.
1358*/
1359QByteArray QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
1360{
1361 QByteArray result = c->fromUnicode(uc.constData(), lenInOut, &state);
1362 lenInOut = result.length();
1363 return result;
1364}
1365#endif
1366
1367/*!
1368 \class QTextDecoder
1369 \brief The QTextDecoder class provides a state-based decoder.
1370 \reentrant
1371 \ingroup i18n
1372
1373 A text decoder converts text from an encoded text format into Unicode
1374 using a specific codec.
1375
1376 The decoder converts text in this format into Unicode, remembering any
1377 state that is required between calls.
1378
1379 \sa QTextCodec::makeDecoder(), QTextEncoder
1380*/
1381
1382/*!
1383 \fn QTextDecoder::QTextDecoder(const QTextCodec *codec)
1384
1385 Constructs a text decoder for the given \a codec.
1386*/
1387
1388/*!
1389 Destroys the decoder.
1390*/
1391QTextDecoder::~QTextDecoder()
1392{
1393}
1394
1395/*!
1396 \fn QString QTextDecoder::toUnicode(const char *chars, int len)
1397
1398 Converts the first \a len bytes in \a chars to Unicode, returning
1399 the result.
1400
1401 If not all characters are used (e.g. if only part of a multi-byte
1402 encoding is at the end of the characters), the decoder remembers
1403 enough state to continue with the next call to this function.
1404*/
1405QString QTextDecoder::toUnicode(const char *chars, int len)
1406{
1407 return c->toUnicode(chars, len, &state);
1408}
1409
1410
1411/*! \overload
1412
1413 The converted string is returned in \a target.
1414 */
1415void QTextDecoder::toUnicode(QString *target, const char *chars, int len)
1416{
1417 Q_ASSERT(target);
1418 switch (c->mibEnum()) {
1419 case 106: // utf8
1420 static_cast<const QUtf8Codec*>(c)->convertToUnicode(target, chars, len, &state);
1421 break;
1422 case 4: { // latin1
1423 target->resize(len);
1424 ushort *data = (ushort*)target->data();
1425 for (int i = len; i >=0; --i)
1426 data[i] = (uchar) chars[i];
1427 } break;
1428 default:
1429 *target = c->toUnicode(chars, len, &state);
1430 }
1431}
1432
1433
1434/*!
1435 \overload
1436
1437 Converts the bytes in the byte array specified by \a ba to Unicode
1438 and returns the result.
1439*/
1440QString QTextDecoder::toUnicode(const QByteArray &ba)
1441{
1442 return c->toUnicode(ba.constData(), ba.length(), &state);
1443}
1444
1445
1446/*!
1447 \fn QTextCodec* QTextCodec::codecForTr()
1448
1449 Returns the codec used by QObject::tr() on its argument. If this
1450 function returns 0 (the default), tr() assumes Latin-1.
1451
1452 \sa setCodecForTr()
1453*/
1454
1455/*!
1456 \fn void QTextCodec::setCodecForTr(QTextCodec *c)
1457 \nonreentrant
1458
1459 Sets the codec used by QObject::tr() on its argument to \a c. If
1460 \a c is 0 (the default), tr() assumes Latin-1.
1461
1462 If the literal quoted text in the program is not in the Latin-1
1463 encoding, this function can be used to set the appropriate
1464 encoding. For example, software developed by Korean programmers
1465 might use eucKR for all the text in the program, in which case the
1466 main() function might look like this:
1467
1468 \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 3
1469
1470 Note that this is not the way to select the encoding that the \e
1471 user has chosen. For example, to convert an application containing
1472 literal English strings to Korean, all that is needed is for the
1473 English strings to be passed through tr() and for translation
1474 files to be loaded. For details of internationalization, see
1475 \l{Internationalization with Qt}.
1476
1477 \sa codecForTr(), setCodecForCStrings()
1478*/
1479
1480
1481/*!
1482 \fn QTextCodec* QTextCodec::codecForCStrings()
1483
1484 Returns the codec used by QString to convert to and from \c{const
1485 char *} and QByteArrays. If this function returns 0 (the default),
1486 QString assumes Latin-1.
1487
1488 \sa setCodecForCStrings()
1489*/
1490
1491/*!
1492 \fn void QTextCodec::setCodecForCStrings(QTextCodec *codec)
1493 \nonreentrant
1494
1495 Sets the codec used by QString to convert to and from \c{const
1496 char *} and QByteArrays. If the \a codec is 0 (the default),
1497 QString assumes Latin-1.
1498
1499 \warning Some codecs do not preserve the characters in the ASCII
1500 range (0x00 to 0x7F). For example, the Japanese Shift-JIS
1501 encoding maps the backslash character (0x5A) to the Yen
1502 character. To avoid undesirable side-effects, we recommend
1503 avoiding such codecs with setCodecsForCString().
1504
1505 \sa codecForCStrings(), setCodecForTr()
1506*/
1507
1508/*!
1509 \since 4.4
1510
1511 Tries to detect the encoding of the provided snippet of HTML in the given byte array, \a ba,
1512 and returns a QTextCodec instance that is capable of decoding the html to unicode.
1513 If the codec cannot be detected from the content provided, \a defaultCodec is returned.
1514*/
1515QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec)
1516{
1517 // determine charset
1518 int pos;
1519 QTextCodec *c = 0;
1520
1521 if (ba.size() > 1 && (((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff)
1522 || ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe))) {
1523 c = QTextCodec::codecForMib(1015); // utf16
1524 } else if (ba.size() > 2
1525 && (uchar)ba[0] == 0xef
1526 && (uchar)ba[1] == 0xbb
1527 && (uchar)ba[2] == 0xbf) {
1528 c = QTextCodec::codecForMib(106); // utf-8
1529 } else {
1530 QByteArray header = ba.left(512).toLower();
1531 if ((pos = header.indexOf("http-equiv=")) != -1) {
1532 pos = header.indexOf("charset=", pos) + int(strlen("charset="));
1533 if (pos != -1) {
1534 int pos2 = header.indexOf('\"', pos+1);
1535 QByteArray cs = header.mid(pos, pos2-pos);
1536 // qDebug("found charset: %s", cs.data());
1537 c = QTextCodec::codecForName(cs);
1538 }
1539 }
1540 }
1541 if (!c)
1542 c = defaultCodec;
1543
1544 return c;
1545}
1546
1547/*!
1548 \overload
1549
1550 If the codec cannot be detected, this overload returns a Latin-1 QTextCodec.
1551*/
1552QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
1553{
1554 return codecForHtml(ba, QTextCodec::codecForMib(/*Latin 1*/ 4));
1555}
1556
1557
1558/*! \internal
1559 \since 4.3
1560 Determines whether the decoder encountered a failure while decoding the input. If
1561 an error was encountered, the produced result is undefined, and gets converted as according
1562 to the conversion flags.
1563 */
1564bool QTextDecoder::hasFailure() const
1565{
1566 return state.invalidChars != 0;
1567}
1568
1569/*!
1570 \fn QTextCodec *QTextCodec::codecForContent(const char *str, int size)
1571
1572 This functionality is no longer provided by Qt. This
1573 compatibility function always returns a null pointer.
1574*/
1575
1576/*!
1577 \fn QTextCodec *QTextCodec::codecForName(const char *hint, int accuracy)
1578
1579 Use the codecForName(const QByteArray &) overload instead.
1580*/
1581
1582/*!
1583 \fn QTextCodec *QTextCodec::codecForIndex(int i)
1584
1585 Use availableCodecs() or availableMibs() instead and iterate
1586 through the resulting list.
1587*/
1588
1589
1590/*!
1591 \fn QByteArray QTextCodec::mimeName() const
1592
1593 Use name() instead.
1594*/
1595
1596QT_END_NAMESPACE
1597
1598#endif // QT_NO_TEXTCODEC
Note: See TracBrowser for help on using the repository browser.