Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

qtextcodec.cpp@ 83

Last change on this file since 83 was 2, checked in by Dmitry A. Kuminov, 16 years ago
Initially imported qt-all-opensource-src-4.5.1 from Trolltech.
File size: 46.4 KB

Line
1	/****************************************************************************
2	**
3	** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4	** Contact: Qt Software Information ([email protected])
5	**
6	** This file is part of the QtCore module of the Qt Toolkit.
7	**
8	** $QT_BEGIN_LICENSE:LGPL$
9	** Commercial Usage
10	** Licensees holding valid Qt Commercial licenses may use this file in
11	** accordance with the Qt Commercial License Agreement provided with the
12	** Software or, alternatively, in accordance with the terms contained in
13	** a written agreement between you and Nokia.
14	**
15	** GNU Lesser General Public License Usage
16	** Alternatively, this file may be used under the terms of the GNU Lesser
17	** General Public License version 2.1 as published by the Free Software
18	** Foundation and appearing in the file LICENSE.LGPL included in the
19	** packaging of this file. Please review the following information to
20	** ensure the GNU Lesser General Public License version 2.1 requirements
21	** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22	**
23	** In addition, as a special exception, Nokia gives you certain
24	** additional rights. These rights are described in the Nokia Qt LGPL
25	** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26	** package.
27	**
28	** GNU General Public License Usage
29	** Alternatively, this file may be used under the terms of the GNU
30	** General Public License version 3.0 as published by the Free Software
31	** Foundation and appearing in the file LICENSE.GPL included in the
32	** packaging of this file. Please review the following information to
33	** ensure the GNU General Public License version 3.0 requirements will be
34	** met: http://www.gnu.org/copyleft/gpl.html.
35	**
36	** If you are unsure which license is appropriate for your use, please
37	** contact the sales department at [email protected].
38	** $QT_END_LICENSE$
39	**
40	****************************************************************************/
41
42	#include "qplatformdefs.h"
43	#include "qtextcodec.h"
44	#include "qtextcodec_p.h"
45
46	#ifndef QT_NO_TEXTCODEC
47
48	#include "qlist.h"
49	#include "qfile.h"
50	#ifndef QT_NO_LIBRARY
51	# include "qcoreapplication.h"
52	# include "qtextcodecplugin.h"
53	# include "private/qfactoryloader_p.h"
54	#endif
55	#include "qstringlist.h"
56
57	#ifdef Q_OS_UNIX
58	# include "qiconvcodec_p.h"
59	#endif
60
61	#include "qutfcodec_p.h"
62	#include "qsimplecodec_p.h"
63	#include "qlatincodec_p.h"
64	#ifndef QT_NO_CODECS
65	# include "qtsciicodec_p.h"
66	# include "qisciicodec_p.h"
67	# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
68	// no iconv(3) support, must build all codecs into the library
69	# include "../../plugins/codecs/cn/qgb18030codec.h"
70	# include "../../plugins/codecs/jp/qeucjpcodec.h"
71	# include "../../plugins/codecs/jp/qjiscodec.h"
72	# include "../../plugins/codecs/jp/qsjiscodec.h"
73	# include "../../plugins/codecs/kr/qeuckrcodec.h"
74	# include "../../plugins/codecs/tw/qbig5codec.h"
75	# endif // QT_NO_ICONV
76	# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
77	# include "qfontlaocodec_p.h"
78	# include "../../plugins/codecs/jp/qfontjpcodec.h"
79	# endif
80	#endif // QT_NO_CODECS
81	#include "qlocale.h"
82	#include "private/qmutexpool_p.h"
83
84	#include <stdlib.h>
85	#include <ctype.h>
86	#include <locale.h>
87	#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
88	#include <langinfo.h>
89	#endif
90
91	#if defined(Q_OS_WINCE)
92	# define QT_NO_SETLOCALE
93	#endif
94
95	QT_BEGIN_NAMESPACE
96
97	#ifndef QT_NO_TEXTCODECPLUGIN
98	Q_GLOBAL_STATIC_WITH_ARGS(QFactoryLoader, loader,
99	(QTextCodecFactoryInterface_iid, QLatin1String("/codecs")))
100	#endif
101
102
103	static bool nameMatch(const QByteArray &name, const QByteArray &test)
104	{
105	// if they're the same, return a perfect score
106	if (qstricmp(name, test) == 0)
107	return true;
108
109	const char *n = name.constData();
110	const char *h = test.constData();
111
112	// if the letters and numbers are the same, we have a match
113	while (*n != '\0') {
114	if (isalnum((uchar)*n)) {
115	for (;;) {
116	if (*h == '\0')
117	return false;
118	if (isalnum((uchar)*h))
119	break;
120	++h;
121	}
122	if (tolower((uchar)n) != tolower((uchar)h))
123	return false;
124	++h;
125	}
126	++n;
127	}
128	while (h && !isalnum((uchar)h))
129	++h;
130	return (*h == '\0');
131	}
132
133
134	static QTextCodec *createForName(const QByteArray &name)
135	{
136	#ifndef QT_NO_TEXTCODECPLUGIN
137	QFactoryLoader *l = loader();
138	QStringList keys = l->keys();
139	for (int i = 0; i < keys.size(); ++i) {
140	if (nameMatch(name, keys.at(i).toLatin1())) {
141	QString realName = keys.at(i);
142	if (QTextCodecFactoryInterface *factory
143	= qobject_cast<QTextCodecFactoryInterface*>(l->instance(realName))) {
144	return factory->create(realName);
145	}
146	}
147	}
148	#else
149	Q_UNUSED(name);
150	#endif
151	return 0;
152	}
153
154	static QTextCodec *createForMib(int mib)
155	{
156	#ifndef QT_NO_TEXTCODECPLUGIN
157	QString name = QLatin1String("MIB: ") + QString::number(mib);
158	if (QTextCodecFactoryInterface *factory
159	= qobject_cast<QTextCodecFactoryInterface*>(loader()->instance(name)))
160	return factory->create(name);
161	#else
162	Q_UNUSED(mib);
163	#endif
164	return 0;
165	}
166
167	static QList<QTextCodec> all = 0;
168	static bool destroying_is_ok = false;
169
170	static QTextCodec *localeMapper = 0;
171	QTextCodec *QTextCodec::cftr = 0;
172
173
174	class QTextCodecCleanup
175	{
176	public:
177	~QTextCodecCleanup();
178	};
179
180	/*
181	Deletes all the created codecs. This destructor is called just
182	before exiting to delete any QTextCodec objects that may be lying
183	around.
184	*/
185	QTextCodecCleanup::~QTextCodecCleanup()
186	{
187	if (!all)
188	return;
189
190	destroying_is_ok = true;
191
192	while (all->size())
193	delete all->takeFirst();
194	delete all;
195	all = 0;
196	localeMapper = 0;
197
198	destroying_is_ok = false;
199	}
200
201	Q_GLOBAL_STATIC(QTextCodecCleanup, createQTextCodecCleanup)
202
203	#if defined(Q_OS_WIN32) \|\| defined(Q_OS_WINCE)
204	class QWindowsLocalCodec: public QTextCodec
205	{
206	public:
207	QWindowsLocalCodec();
208	~QWindowsLocalCodec();
209
210	QString convertToUnicode(const char , int, ConverterState ) const;
211	QByteArray convertFromUnicode(const QChar , int, ConverterState ) const;
212	QString convertToUnicodeCharByChar(const char chars, int length, ConverterState state) const;
213
214	QByteArray name() const;
215	int mibEnum() const;
216
217	};
218
219	QWindowsLocalCodec::QWindowsLocalCodec()
220	{
221	}
222
223	QWindowsLocalCodec::~QWindowsLocalCodec()
224	{
225	}
226
227	QString QWindowsLocalCodec::convertToUnicode(const char chars, int length, ConverterState state) const
228	{
229	const char *mb = chars;
230	int mblen = length;
231
232	if (!mb \|\| !mblen)
233	return QString();
234
235	const int wclen_auto = 4096;
236	WCHAR wc_auto[wclen_auto];
237	int wclen = wclen_auto;
238	WCHAR *wc = wc_auto;
239	int len;
240	QString sp;
241	bool prepend = false;
242	char state_data = 0;
243	int remainingChars = 0;
244
245	//save the current state information
246	if (state) {
247	state_data = (char)state->state_data[0];
248	remainingChars = state->remainingChars;
249	}
250
251	//convert the pending charcter (if available)
252	if (state && remainingChars) {
253	char prev[3] = {0};
254	prev[0] = state_data;
255	prev[1] = mb[0];
256	remainingChars = 0;
257	len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
258	prev, 2, wc, wclen);
259	if (len) {
260	prepend = true;
261	sp.append(QChar(wc[0]));
262	mb++;
263	mblen--;
264	wc[0] = 0;
265	}
266	}
267
268	while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED\|MB_ERR_INVALID_CHARS,
269	mb, mblen, wc, wclen))) {
270	int r = GetLastError();
271	if (r == ERROR_INSUFFICIENT_BUFFER) {
272	if (wc != wc_auto) {
273	qWarning("MultiByteToWideChar: Size changed");
274	break;
275	} else {
276	wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
277	mb, mblen, 0, 0);
278	wc = new WCHAR[wclen];
279	// and try again...
280	}
281	} else if (r == ERROR_NO_UNICODE_TRANSLATION) {
282	//find the last non NULL character
283	while (mblen > 1 && !(mb[mblen-1]))
284	mblen--;
285	//check whether, we hit an invalid character in the middle
286	if ((mblen <= 1) \|\| (remainingChars && state_data))
287	return convertToUnicodeCharByChar(chars, length, state);
288	//Remove the last character and try again...
289	state_data = mb[mblen-1];
290	remainingChars = 1;
291	mblen--;
292	} else {
293	// Fail.
294	qWarning("MultiByteToWideChar: Cannot convert multibyte text");
295	break;
296	}
297	}
298	if (len <= 0)
299	return QString();
300	if (wc[len-1] == 0) // len - 1: we don't want terminator
301	--len;
302
303	//save the new state information
304	if (state) {
305	state->state_data[0] = (char)state_data;
306	state->remainingChars = remainingChars;
307	}
308	QString s((QChar*)wc, len);
309	if (wc != wc_auto)
310	delete [] wc;
311	if (prepend) {
312	return sp+s;
313	}
314	return s;
315	}
316
317	QString QWindowsLocalCodec::convertToUnicodeCharByChar(const char chars, int length, ConverterState state) const
318	{
319	if (!chars \|\| !length)
320	return QString();
321
322	int copyLocation = 0;
323	int extra = 2;
324	if (state && state->remainingChars) {
325	copyLocation = state->remainingChars;
326	extra += copyLocation;
327	}
328	int newLength = length + extra;
329	char *mbcs = new char[newLength];
330	//ensure that we have a NULL terminated string
331	mbcs[newLength-1] = 0;
332	mbcs[newLength-2] = 0;
333	memcpy(&(mbcs[copyLocation]), chars, length);
334	if (copyLocation) {
335	//copy the last character from the state
336	mbcs[0] = (char)state->state_data[0];
337	state->remainingChars = 0;
338	}
339	const char *mb = mbcs;
340	#ifndef Q_OS_WINCE
341	const char *next = 0;
342	QString s;
343	while((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
344	WCHAR wc[2] ={0};
345	int charlength = next - mb;
346	int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED\|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
347	if (len>0) {
348	s.append(QChar(wc[0]));
349	} else {
350	int r = GetLastError();
351	//check if the character being dropped is the last character
352	if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) {
353	state->remainingChars = 1;
354	state->state_data[0] = (char)*mb;
355	}
356	}
357	mb = next;
358	}
359	#else
360	QString s;
361	int size = mbstowcs(NULL, mb, length);
362	if (size < 0) {
363	Q_ASSERT("Error in CE TextCodec");
364	return QString();
365	}
366	wchar_t* ws = new wchar_t[size + 2];
367	ws[size +1] = 0;
368	ws[size] = 0;
369	size = mbstowcs(ws, mb, length);
370	for (int i=0; i< size; i++)
371	s.append(QChar(ws[i]));
372	delete [] ws;
373	#endif
374	delete mbcs;
375	return s;
376	}
377
378	QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar uc, int len, ConverterState ) const
379	{
380	return qt_winQString2MB(uc, len);
381	}
382
383
384	QByteArray QWindowsLocalCodec::name() const
385	{
386	return "System";
387	}
388
389	int QWindowsLocalCodec::mibEnum() const
390	{
391	return 0;
392	}
393
394	#else
395
396	/* locale names mostly copied from XFree86 */
397	static const char * const iso8859_2locales[] = {
398	"croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
399	"hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
400	"ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
401	"sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
402
403	static const char * const iso8859_3locales[] = {
404	"eo", 0 };
405
406	static const char * const iso8859_4locales[] = {
407	"ee", "ee_EE", 0 };
408
409	static const char * const iso8859_5locales[] = {
410	"mk", "mk_MK", "sp", "sp_YU", 0 };
411
412	static const char * const cp_1251locales[] = {
413	"be", "be_BY", "bg", "bg_BG", "bulgarian", 0 };
414
415	static const char * const pt_154locales[] = {
416	"ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 };
417
418	static const char * const iso8859_6locales[] = {
419	"ar_AA", "ar_SA", "arabic", 0 };
420
421	static const char * const iso8859_7locales[] = {
422	"el", "el_GR", "greek", 0 };
423
424	static const char * const iso8859_8locales[] = {
425	"hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
426
427	static const char * const iso8859_9locales[] = {
428	"tr", "tr_TR", "turkish", 0 };
429
430	static const char * const iso8859_13locales[] = {
431	"lt", "lt_LT", "lv", "lv_LV", 0 };
432
433	static const char * const iso8859_15locales[] = {
434	"et", "et_EE",
435	// Euro countries
436	"br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE",
437	"es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR",
438	"fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR",
439	"nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE",
440	0 };
441
442	static const char * const koi8_ulocales[] = {
443	"uk", "uk_UA", "ru_UA", "ukrainian", 0 };
444
445	static const char * const tis_620locales[] = {
446	"th", "th_TH", "thai", 0 };
447
448	// static const char * const tcvnlocales[] = {
449	// "vi", "vi_VN", 0 };
450
451	static bool try_locale_list(const char * const locale[], const char * lang)
452	{
453	int i;
454	for(i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++)
455	;
456	return locale[i] != 0;
457	}
458
459	// For the probably_koi8_locales we have to look. the standard says
460	// these are 8859-5, but almost all Russian users use KOI8-R and
461	// incorrectly set $LANG to ru_RU. We'll check tolower() to see what
462	// it thinks ru_RU means.
463
464	// If you read the history, it seems that many Russians blame ISO and
465	// Perestroika for the confusion.
466	//
467	// The real bug is that some programs break if the user specifies
468	// ru_RU.KOI8-R.
469
470	static const char * const probably_koi8_rlocales[] = {
471	"ru", "ru_SU", "ru_RU", "russian", 0 };
472
473	static QTextCodec * ru_RU_hack(const char * i) {
474	QTextCodec * ru_RU_codec = 0;
475
476	#if !defined(QT_NO_SETLOCALE)
477	QByteArray origlocale(setlocale(LC_CTYPE, i));
478	#else
479	QByteArray origlocale(i);
480	#endif
481	// unicode koi8r latin5 name
482	// 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
483	// 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
484	int latin5 = tolower(0xCE);
485	int koi8r = tolower(0xE0);
486	if (koi8r == 0xC0 && latin5 != 0xEE) {
487	ru_RU_codec = QTextCodec::codecForName("KOI8-R");
488	} else if (koi8r != 0xC0 && latin5 == 0xEE) {
489	ru_RU_codec = QTextCodec::codecForName("ISO 8859-5");
490	} else {
491	// something else again... let's assume... throws dice
492	ru_RU_codec = QTextCodec::codecForName("KOI8-R");
493	qWarning("QTextCodec: Using KOI8-R, probe failed (%02x %02x %s)",
494	koi8r, latin5, i);
495	}
496	#if !defined(QT_NO_SETLOCALE)
497	setlocale(LC_CTYPE, origlocale);
498	#endif
499
500	return ru_RU_codec;
501	}
502
503	#endif
504
505	#if !defined(Q_OS_WIN32) && !defined(Q_OS_WINCE)
506	static QTextCodec checkForCodec(const char name) {
507	QTextCodec *c = QTextCodec::codecForName(name);
508	if (!c) {
509	const char *at = strchr(name, '@');
510	if (at) {
511	QByteArray n(name, at - name);
512	c = QTextCodec::codecForName(n.data());
513	}
514	}
515	return c;
516	}
517	#endif
518
519	/* the next two functions are implicitely thread safe,
520	as they are only called by setup() which uses a mutex.
521	*/
522	static void setupLocaleMapper()
523	{
524	#if defined(Q_OS_WIN32) \|\| defined(Q_OS_WINCE)
525	localeMapper = QTextCodec::codecForName("System");
526	#else
527
528	#ifndef QT_NO_ICONV
529	localeMapper = QTextCodec::codecForName("System");
530	#endif
531
532	#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
533	if (!localeMapper) {
534	char *charset = nl_langinfo (CODESET);
535	if (charset)
536	localeMapper = QTextCodec::codecForName(charset);
537	}
538	#endif
539
540	if (!localeMapper) {
541	// Very poorly defined and followed standards causes lots of
542	// code to try to get all the cases... This logic is
543	// duplicated in QIconvCodec, so if you change it here, change
544	// it there too.
545
546	// Try to determine locale codeset from locale name assigned to
547	// LC_CTYPE category.
548
549	// First part is getting that locale name. First try setlocale() which
550	// definitely knows it, but since we cannot fully trust it, get ready
551	// to fall back to environment variables.
552	#if !defined(QT_NO_SETLOCALE)
553	char * ctype = qstrdup(setlocale(LC_CTYPE, 0));
554	#else
555	char * ctype = qstrdup("");
556	#endif
557
558	// Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
559	// environment variables.
560	char * lang = qstrdup(qgetenv("LC_ALL").constData());
561	if (!lang \|\| lang[0] == 0 \|\| strcmp(lang, "C") == 0) {
562	if (lang) delete [] lang;
563	lang = qstrdup(qgetenv("LC_CTYPE").constData());
564	}
565	if (!lang \|\| lang[0] == 0 \|\| strcmp(lang, "C") == 0) {
566	if (lang) delete [] lang;
567	lang = qstrdup(qgetenv("LANG").constData());
568	}
569
570	// Now try these in order:
571	// 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
572	// 2. CODESET from lang if it contains a .CODESET part
573	// 3. ctype (maybe the locale is named "ISO-8859-1" or something)
574	// 4. locale (ditto)
575	// 5. check for "@euro"
576	// 6. guess locale from ctype unless ctype is "C"
577	// 7. guess locale from lang
578
579	// 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
580	char * codeset = ctype ? strchr(ctype, '.') : 0;
581	if (codeset && *codeset == '.')
582	localeMapper = checkForCodec(codeset + 1);
583
584	// 2. CODESET from lang if it contains a .CODESET part
585	codeset = lang ? strchr(lang, '.') : 0;
586	if (!localeMapper && codeset && *codeset == '.')
587	localeMapper = checkForCodec(codeset + 1);
588
589	// 3. ctype (maybe the locale is named "ISO-8859-1" or something)
590	if (!localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0)
591	localeMapper = checkForCodec(ctype);
592
593	// 4. locale (ditto)
594	if (!localeMapper && lang && *lang != 0)
595	localeMapper = checkForCodec(lang);
596
597	// 5. "@euro"
598	if ((!localeMapper && ctype && strstr(ctype, "@euro")) \|\| (lang && strstr(lang, "@euro")))
599	localeMapper = checkForCodec("ISO 8859-15");
600
601	// 6. guess locale from ctype unless ctype is "C"
602	// 7. guess locale from lang
603	char * try_by_name = ctype;
604	if (ctype && *ctype != 0 && strcmp (ctype, "C") != 0)
605	try_by_name = lang;
606
607	// Now do the guessing.
608	if (lang && lang && !localeMapper && try_by_name && try_by_name) {
609	if (try_locale_list(iso8859_15locales, lang))
610	localeMapper = QTextCodec::codecForName("ISO 8859-15");
611	else if (try_locale_list(iso8859_2locales, lang))
612	localeMapper = QTextCodec::codecForName("ISO 8859-2");
613	else if (try_locale_list(iso8859_3locales, lang))
614	localeMapper = QTextCodec::codecForName("ISO 8859-3");
615	else if (try_locale_list(iso8859_4locales, lang))
616	localeMapper = QTextCodec::codecForName("ISO 8859-4");
617	else if (try_locale_list(iso8859_5locales, lang))
618	localeMapper = QTextCodec::codecForName("ISO 8859-5");
619	else if (try_locale_list(iso8859_6locales, lang))
620	localeMapper = QTextCodec::codecForName("ISO 8859-6");
621	else if (try_locale_list(iso8859_7locales, lang))
622	localeMapper = QTextCodec::codecForName("ISO 8859-7");
623	else if (try_locale_list(iso8859_8locales, lang))
624	localeMapper = QTextCodec::codecForName("ISO 8859-8-I");
625	else if (try_locale_list(iso8859_9locales, lang))
626	localeMapper = QTextCodec::codecForName("ISO 8859-9");
627	else if (try_locale_list(iso8859_13locales, lang))
628	localeMapper = QTextCodec::codecForName("ISO 8859-13");
629	else if (try_locale_list(tis_620locales, lang))
630	localeMapper = QTextCodec::codecForName("ISO 8859-11");
631	else if (try_locale_list(koi8_ulocales, lang))
632	localeMapper = QTextCodec::codecForName("KOI8-U");
633	else if (try_locale_list(cp_1251locales, lang))
634	localeMapper = QTextCodec::codecForName("CP 1251");
635	else if (try_locale_list(pt_154locales, lang))
636	localeMapper = QTextCodec::codecForName("PT 154");
637	else if (try_locale_list(probably_koi8_rlocales, lang))
638	localeMapper = ru_RU_hack(lang);
639	}
640
641	delete [] ctype;
642	delete [] lang;
643	}
644
645	// If everything failed, we default to 8859-1
646	// We could perhaps default to 8859-15.
647	if (!localeMapper)
648	localeMapper = QTextCodec::codecForName("ISO 8859-1");
649	#endif
650	}
651
652
653	static void setup()
654	{
655	#ifndef QT_NO_THREAD
656	QMutexLocker locker(QMutexPool::globalInstanceGet(&all));
657	#endif
658
659	if (all)
660	return;
661
662	if (destroying_is_ok)
663	qWarning("QTextCodec: Creating new codec during codec cleanup");
664	all = new QList<QTextCodec*>;
665	// create the cleanup object to cleanup all codecs on exit
666	(void) createQTextCodecCleanup();
667
668	#ifndef QT_NO_CODECS
669	# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
670	// no font codecs when bootstrapping
671	(void)new QFontLaoCodec;
672	# if defined(QT_NO_ICONV)
673	// no iconv(3) support, must build all codecs into the library
674	(void)new QFontGb2312Codec;
675	(void)new QFontGbkCodec;
676	(void)new QFontGb18030_0Codec;
677	(void)new QFontJis0208Codec;
678	(void)new QFontJis0201Codec;
679	(void)new QFontKsc5601Codec;
680	(void)new QFontBig5hkscsCodec;
681	(void)new QFontBig5Codec;
682	# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
683	# endif // Q_WS_X11
684
685	(void)new QTsciiCodec;
686
687	for (int i = 0; i < 9; ++i)
688	(void)new QIsciiCodec(i);
689
690
691	# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
692	// no asian codecs when bootstrapping, sorry
693	(void)new QGb18030Codec;
694	(void)new QGbkCodec;
695	(void)new QGb2312Codec;
696	(void)new QEucJpCodec;
697	(void)new QJisCodec;
698	(void)new QSjisCodec;
699	(void)new QEucKrCodec;
700	(void)new QBig5Codec;
701	(void)new QBig5hkscsCodec;
702	# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
703	#endif // QT_NO_CODECS
704
705	#if defined(Q_OS_WIN32) \|\| defined(Q_OS_WINCE)
706	(void) new QWindowsLocalCodec;
707	#endif // Q_OS_WIN32
708
709	(void)new QUtf16Codec;
710	(void)new QUtf16BECodec;
711	(void)new QUtf16LECodec;
712	(void)new QUtf32Codec;
713	(void)new QUtf32BECodec;
714	(void)new QUtf32LECodec;
715	(void)new QLatin15Codec;
716	(void)new QLatin1Codec;
717	(void)new QUtf8Codec;
718
719	for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i)
720	(void)new QSimpleTextCodec(i);
721
722	#if defined(Q_OS_UNIX) && !defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
723	// QIconvCodec depends on the UTF-16 codec, so it needs to be created last
724	(void) new QIconvCodec();
725	#endif
726
727	if (!localeMapper)
728	setupLocaleMapper();
729	}
730
731	QTextCodec::ConverterState::~ConverterState()
732	{
733	if (flags & FreeFunction)
734	(QTextCodecUnalignedPointer::decode(state_data))(this);
735	else if (d)
736	qFree(d);
737	}
738
739	/*!
740	\class QTextCodec
741	\brief The QTextCodec class provides conversions between text encodings.
742	\reentrant
743	\ingroup i18n
744
745	Qt uses Unicode to store, draw and manipulate strings. In many
746	situations you may wish to deal with data that uses a different
747	encoding. For example, most Japanese documents are still stored
748	in Shift-JIS or ISO 2022-JP, while Russian users often have their
749	documents in KOI8-R or Windows-1251.
750
751	Qt provides a set of QTextCodec classes to help with converting
752	non-Unicode formats to and from Unicode. You can also create your
753	own codec classes.
754
755	The supported encodings are:
756
757	\list
758	\o Apple Roman
759	\o \l{Big5 Text Codec}{Big5}
760	\o \l{Big5-HKSCS Text Codec}{Big5-HKSCS}
761	\o CP949
762	\o \l{EUC-JP Text Codec}{EUC-JP}
763	\o \l{EUC-KR Text Codec}{EUC-KR}
764	\o \l{GBK Text Codec}{GB18030-0}
765	\o IBM 850
766	\o IBM 866
767	\o IBM 874
768	\o \l{ISO 2022-JP (JIS) Text Codec}{ISO 2022-JP}
769	\o ISO 8859-1 to 10
770	\o ISO 8859-13 to 16
771	\o Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml
772	\o JIS X 0201
773	\o JIS X 0208
774	\o KOI8-R
775	\o KOI8-U
776	\o MuleLao-1
777	\o ROMAN8
778	\o \l{Shift-JIS Text Codec}{Shift-JIS}
779	\o TIS-620
780	\o \l{TSCII Text Codec}{TSCII}
781	\o UTF-8
782	\o UTF-16
783	\o UTF-16BE
784	\o UTF-16LE
785	\o UTF-32
786	\o UTF-32BE
787	\o UTF-32LE
788	\o Windows-1250 to 1258
789	\o WINSAMI2
790	\endlist
791
792	QTextCodecs can be used as follows to convert some locally encoded
793	string to Unicode. Suppose you have some string encoded in Russian
794	KOI8-R encoding, and want to convert it to Unicode. The simple way
795	to do it is like this:
796
797	\snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 0
798
799	After this, \c string holds the text converted to Unicode.
800	Converting a string from Unicode to the local encoding is just as
801	easy:
802
803	\snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 1
804
805	To read or write files in various encodings, use QTextStream and
806	its \l{QTextStream::setCodec()}{setCodec()} function. See the
807	\l{tools/codecs}{Codecs} example for an application of QTextCodec
808	to file I/O.
809
810	Some care must be taken when trying to convert the data in chunks,
811	for example, when receiving it over a network. In such cases it is
812	possible that a multi-byte character will be split over two
813	chunks. At best this might result in the loss of a character and
814	at worst cause the entire conversion to fail.
815
816	The approach to use in these situations is to create a QTextDecoder
817	object for the codec and use this QTextDecoder for the whole
818	decoding process, as shown below:
819
820	\snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 2
821
822	The QTextDecoder object maintains state between chunks and therefore
823	works correctly even if a multi-byte character is split between
824	chunks.
825
826	\section1 Creating Your Own Codec Class
827
828	Support for new text encodings can be added to Qt by creating
829	QTextCodec subclasses.
830
831	The pure virtual functions describe the encoder to the system and
832	the coder is used as required in the different text file formats
833	supported by QTextStream, and under X11, for the locale-specific
834	character input and output.
835
836	To add support for another encoding to Qt, make a subclass of
837	QTextCodec and implement the functions listed in the table below.
838
839	\table
840	\header \o Function \o Description
841
842	\row \o name()
843	\o Returns the official name for the encoding. If the
844	encoding is listed in the
845	\l{IANA character-sets encoding file}, the name
846	should be the preferred MIME name for the encoding.
847
848	\row \o aliases()
849	\o Returns a list of alternative names for the encoding.
850	QTextCodec provides a default implementation that returns
851	an empty list. For example, "ISO-8859-1" has "latin1",
852	"CP819", "IBM819", and "iso-ir-100" as aliases.
853
854	\row \o mibEnum()
855	\o Return the MIB enum for the encoding if it is listed in
856	the \l{IANA character-sets encoding file}.
857
858	\row \o convertToUnicode()
859	\o Converts an 8-bit character string to Unicode.
860
861	\row \o convertFromUnicode()
862	\o Converts a Unicode string to an 8-bit character string.
863	\endtable
864
865	You may find it more convenient to make your codec class
866	available as a plugin; see \l{How to Create Qt Plugins} for
867	details.
868
869	\sa QTextStream, QTextDecoder, QTextEncoder, {Codecs Example}
870	*/
871
872	/*!
873	\enum QTextCodec::ConversionFlag
874
875	\value DefaultConversion No flag is set.
876	\value ConvertInvalidToNull If this flag is set, each invalid input
877	character is output as a null character.
878	\value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any.
879
880	\omitvalue FreeFunction
881	*/
882
883	/*!
884	\fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags)
885
886	Constructs a ConverterState object initialized with the given \a flags.
887	*/
888
889	/*!
890	\fn QTextCodec::ConverterState::~ConverterState()
891
892	Destroys the ConverterState object.
893	*/
894
895	/*!
896	\nonreentrant
897
898	Constructs a QTextCodec, and gives it the highest precedence. The
899	QTextCodec should always be constructed on the heap (i.e. with \c
900	new). Qt takes ownership and will delete it when the application
901	terminates.
902	*/
903	QTextCodec::QTextCodec()
904	{
905	setup();
906	all->prepend(this);
907	}
908
909
910	/*!
911	\nonreentrant
912
913	Destroys the QTextCodec. Note that you should not delete codecs
914	yourself: once created they become Qt's responsibility.
915	*/
916	QTextCodec::~QTextCodec()
917	{
918	if (!destroying_is_ok)
919	qWarning("QTextCodec::~QTextCodec: Called by application");
920	if (all)
921	all->removeAll(this);
922	}
923
924	/*!
925	\fn QTextCodec QTextCodec::codecForName(const char name)
926
927	Searches all installed QTextCodec objects and returns the one
928	which best matches \a name; the match is case-insensitive. Returns
929	0 if no codec matching the name \a name could be found.
930	*/
931
932	/*!
933	Searches all installed QTextCodec objects and returns the one
934	which best matches \a name; the match is case-insensitive. Returns
935	0 if no codec matching the name \a name could be found.
936	*/
937	QTextCodec *QTextCodec::codecForName(const QByteArray &name)
938	{
939	if (name.isEmpty())
940	return 0;
941
942	setup();
943
944	for (int i = 0; i < all->size(); ++i) {
945	QTextCodec *cursor = all->at(i);
946	if (nameMatch(cursor->name(), name))
947	return cursor;
948	QList<QByteArray> aliases = cursor->aliases();
949	for (int i = 0; i < aliases.size(); ++i)
950	if (nameMatch(aliases.at(i), name))
951	return cursor;
952	}
953
954	return createForName(name);
955	}
956
957
958	/*!
959	Returns the QTextCodec which matches the \link
960	QTextCodec::mibEnum() MIBenum\endlink \a mib.
961	*/
962	QTextCodec* QTextCodec::codecForMib(int mib)
963	{
964	setup();
965
966	// Qt 3 used 1000 (mib for UCS2) as its identifier for the utf16 codec. Map
967	// this correctly for compatibility.
968	if (mib == 1000)
969	mib = 1015;
970
971	QList<QTextCodec*>::ConstIterator i;
972	for (int i = 0; i < all->size(); ++i) {
973	QTextCodec *cursor = all->at(i);
974	if (cursor->mibEnum() == mib)
975	return cursor;
976	}
977
978	return createForMib(mib);
979	}
980
981	/*!
982	Returns the list of all available codecs, by name. Call
983	QTextCodec::codecForName() to obtain the QTextCodec for the name.
984
985	The list may contain many mentions of the same codec
986	if the codec has aliases.
987
988	\sa availableMibs(), name(), aliases()
989	*/
990	QList<QByteArray> QTextCodec::availableCodecs()
991	{
992	setup();
993
994	QList<QByteArray> codecs;
995	for (int i = 0; i < all->size(); ++i) {
996	codecs += all->at(i)->name();
997	codecs += all->at(i)->aliases();
998	}
999	#ifndef QT_NO_TEXTCODECPLUGIN
1000	QFactoryLoader *l = loader();
1001	QStringList keys = l->keys();
1002	for (int i = 0; i < keys.size(); ++i) {
1003	if (!keys.at(i).startsWith(QLatin1String("MIB: "))) {
1004	QByteArray name = keys.at(i).toLatin1();
1005	if (!codecs.contains(name))
1006	codecs += name;
1007	}
1008	}
1009	#endif
1010
1011	return codecs;
1012	}
1013
1014	/*!
1015	Returns the list of MIBs for all available codecs. Call
1016	QTextCodec::codecForMib() to obtain the QTextCodec for the MIB.
1017
1018	\sa availableCodecs(), mibEnum()
1019	*/
1020	QList<int> QTextCodec::availableMibs()
1021	{
1022	setup();
1023
1024	QList<int> codecs;
1025	for (int i = 0; i < all->size(); ++i)
1026	codecs += all->at(i)->mibEnum();
1027	#ifndef QT_NO_TEXTCODECPLUGIN
1028	QFactoryLoader *l = loader();
1029	QStringList keys = l->keys();
1030	for (int i = 0; i < keys.size(); ++i) {
1031	if (keys.at(i).startsWith(QLatin1String("MIB: "))) {
1032	int mib = keys.at(i).mid(5).toInt();
1033	if (!codecs.contains(mib))
1034	codecs += mib;
1035	}
1036	}
1037	#endif
1038
1039	return codecs;
1040	}
1041
1042	/*!
1043	Set the codec to \a c; this will be returned by
1044	codecForLocale(). If \a c is a null pointer, the codec is reset to
1045	the default.
1046
1047	This might be needed for some applications that want to use their
1048	own mechanism for setting the locale.
1049
1050	Setting this codec is not supported on DOS based Windows.
1051
1052	\sa codecForLocale()
1053	*/
1054	void QTextCodec::setCodecForLocale(QTextCodec *c)
1055	{
1056	#ifdef Q_WS_WIN
1057	if (QSysInfo::WindowsVersion& QSysInfo::WV_DOS_based)
1058	return;
1059	#endif
1060	localeMapper = c;
1061	if (!localeMapper)
1062	setupLocaleMapper();
1063	}
1064
1065	/*!
1066	Returns a pointer to the codec most suitable for this locale.
1067
1068	On Windows, the codec will be based on a system locale. On Unix
1069	systems, starting with Qt 4.2, the codec will be using the \e
1070	iconv library. Note that in both cases the codec's name will be
1071	"System".
1072	*/
1073
1074	QTextCodec* QTextCodec::codecForLocale()
1075	{
1076	if (localeMapper)
1077	return localeMapper;
1078
1079	setup();
1080
1081	return localeMapper;
1082	}
1083
1084
1085	/*!
1086	\fn QByteArray QTextCodec::name() const
1087
1088	QTextCodec subclasses must reimplement this function. It returns
1089	the name of the encoding supported by the subclass.
1090
1091	If the codec is registered as a character set in the
1092	\l{IANA character-sets encoding file} this method should
1093	return the preferred mime name for the codec if defined,
1094	otherwise its name.
1095	*/
1096
1097	/*!
1098	\fn int QTextCodec::mibEnum() const
1099
1100	Subclasses of QTextCodec must reimplement this function. It
1101	returns the MIBenum (see \l{IANA character-sets encoding file}
1102	for more information). It is important that each QTextCodec
1103	subclass returns the correct unique value for this function.
1104	*/
1105
1106	/*!
1107	Subclasses can return a number of aliases for the codec in question.
1108
1109	Standard aliases for codecs can be found in the
1110	\l{IANA character-sets encoding file}.
1111	*/
1112	QList<QByteArray> QTextCodec::aliases() const
1113	{
1114	return QList<QByteArray>();
1115	}
1116
1117	/*!
1118	\fn QString QTextCodec::convertToUnicode(const char *chars, int len,
1119	ConverterState *state) const
1120
1121	QTextCodec subclasses must reimplement this function.
1122
1123	Converts the first \a len characters of \a chars from the
1124	encoding of the subclass to Unicode, and returns the result in a
1125	QString.
1126
1127	\a state can be 0, in which case the conversion is stateless and
1128	default conversion rules should be used. If state is not 0, the
1129	codec should save the state after the conversion in \a state, and
1130	adjust the remainingChars and invalidChars members of the struct.
1131	*/
1132
1133	/*!
1134	\fn QByteArray QTextCodec::convertFromUnicode(const QChar *input, int number,
1135	ConverterState *state) const
1136
1137	QTextCodec subclasses must reimplement this function.
1138
1139	Converts the first \a number of characters from the \a input array
1140	from Unicode to the encoding of the subclass, and returns the result
1141	in a QByteArray.
1142
1143	\a state can be 0 in which case the conversion is stateless and
1144	default conversion rules should be used. If state is not 0, the
1145	codec should save the state after the conversion in \a state, and
1146	adjust the remainingChars and invalidChars members of the struct.
1147	*/
1148
1149	/*!
1150	Creates a QTextDecoder which stores enough state to decode chunks
1151	of \c{char *} data to create chunks of Unicode data.
1152
1153	The caller is responsible for deleting the returned object.
1154	*/
1155	QTextDecoder* QTextCodec::makeDecoder() const
1156	{
1157	return new QTextDecoder(this);
1158	}
1159
1160
1161	/*!
1162	Creates a QTextEncoder which stores enough state to encode chunks
1163	of Unicode data as \c{char *} data.
1164
1165	The caller is responsible for deleting the returned object.
1166	*/
1167	QTextEncoder* QTextCodec::makeEncoder() const
1168	{
1169	return new QTextEncoder(this);
1170	}
1171
1172	/*!
1173	\fn QByteArray QTextCodec::fromUnicode(const QChar *input, int number,
1174	ConverterState *state) const
1175
1176	Converts the first \a number of characters from the \a input array
1177	from Unicode to the encoding of this codec, and returns the result
1178	in a QByteArray.
1179
1180	The \a state of the convertor used is updated.
1181	*/
1182
1183	/*!
1184	Converts \a str from Unicode to the encoding of this codec, and
1185	returns the result in a QByteArray.
1186	*/
1187	QByteArray QTextCodec::fromUnicode(const QString& str) const
1188	{
1189	return convertFromUnicode(str.constData(), str.length(), 0);
1190	}
1191
1192	/*!
1193	\fn QString QTextCodec::toUnicode(const char *input, int size,
1194	ConverterState *state) const
1195
1196	Converts the first \a size characters from the \a input from the
1197	encoding of this codec to Unicode, and returns the result in a
1198	QString.
1199
1200	The \a state of the convertor used is updated.
1201	*/
1202
1203	/*!
1204	Converts \a a from the encoding of this codec to Unicode, and
1205	returns the result in a QString.
1206	*/
1207	QString QTextCodec::toUnicode(const QByteArray& a) const
1208	{
1209	return convertToUnicode(a.constData(), a.length(), 0);
1210	}
1211
1212	/*!
1213	Returns true if the Unicode character \a ch can be fully encoded
1214	with this codec; otherwise returns false.
1215	*/
1216	bool QTextCodec::canEncode(QChar ch) const
1217	{
1218	ConverterState state;
1219	state.flags = ConvertInvalidToNull;
1220	convertFromUnicode(&ch, 1, &state);
1221	return (state.invalidChars == 0);
1222	}
1223
1224	/*!
1225	\overload
1226
1227	\a s contains the string being tested for encode-ability.
1228	*/
1229	bool QTextCodec::canEncode(const QString& s) const
1230	{
1231	ConverterState state;
1232	state.flags = ConvertInvalidToNull;
1233	convertFromUnicode(s.constData(), s.length(), &state);
1234	return (state.invalidChars == 0);
1235	}
1236
1237	#ifdef QT3_SUPPORT
1238	/*!
1239	Returns a string representing the current language and
1240	sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil.
1241
1242	\sa QLocale
1243	*/
1244	const char *QTextCodec::locale()
1245	{
1246	static char locale[6];
1247	QByteArray l = QLocale::system().name().toLatin1();
1248	int len = qMin(l.length(), 5);
1249	memcpy(locale, l.constData(), len);
1250	locale[len] = '\0';
1251
1252	return locale;
1253	}
1254
1255	/*!
1256	\overload
1257	*/
1258
1259	QByteArray QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
1260	{
1261	QByteArray result = convertFromUnicode(uc.constData(), lenInOut, 0);
1262	lenInOut = result.length();
1263	return result;
1264	}
1265
1266	/*!
1267	\overload
1268
1269	\a a contains the source characters; \a len contains the number of
1270	characters in \a a to use.
1271	*/
1272	QString QTextCodec::toUnicode(const QByteArray& a, int len) const
1273	{
1274	len = qMin(a.size(), len);
1275	return convertToUnicode(a.constData(), len, 0);
1276	}
1277	#endif
1278
1279	/*!
1280	\overload
1281
1282	\a chars contains the source characters.
1283	*/
1284	QString QTextCodec::toUnicode(const char *chars) const
1285	{
1286	int len = qstrlen(chars);
1287	return convertToUnicode(chars, len, 0);
1288	}
1289
1290
1291	/*!
1292	\class QTextEncoder
1293	\brief The QTextEncoder class provides a state-based encoder.
1294	\reentrant
1295	\ingroup i18n
1296
1297	A text encoder converts text from Unicode into an encoded text format
1298	using a specific codec.
1299
1300	The encoder converts Unicode into another format, remembering any
1301	state that is required between calls.
1302
1303	\sa QTextCodec::makeEncoder(), QTextDecoder
1304	*/
1305
1306	/*!
1307	\fn QTextEncoder::QTextEncoder(const QTextCodec *codec)
1308
1309	Constructs a text encoder for the given \a codec.
1310	*/
1311
1312	/*!
1313	Destroys the encoder.
1314	*/
1315	QTextEncoder::~QTextEncoder()
1316	{
1317	}
1318
1319	/*! \internal
1320	\since 4.5
1321	Determines whether the eecoder encountered a failure while decoding the input. If
1322	an error was encountered, the produced result is undefined, and gets converted as according
1323	to the conversion flags.
1324	*/
1325	bool QTextEncoder::hasFailure() const
1326	{
1327	return state.invalidChars != 0;
1328	}
1329
1330	/*!
1331	Converts the Unicode string \a str into an encoded QByteArray.
1332	*/
1333	QByteArray QTextEncoder::fromUnicode(const QString& str)
1334	{
1335	QByteArray result = c->fromUnicode(str.constData(), str.length(), &state);
1336	return result;
1337	}
1338
1339	/*!
1340	\overload
1341
1342	Converts \a len characters (not bytes) from \a uc, and returns the
1343	result in a QByteArray.
1344	*/
1345	QByteArray QTextEncoder::fromUnicode(const QChar *uc, int len)
1346	{
1347	QByteArray result = c->fromUnicode(uc, len, &state);
1348	return result;
1349	}
1350
1351	#ifdef QT3_SUPPORT
1352	/*!
1353	\overload
1354
1355	Converts \a lenInOut characters (not bytes) from \a uc, and returns the
1356	result in a QByteArray. The number of characters read is returned in
1357	the \a lenInOut parameter.
1358	*/
1359	QByteArray QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
1360	{
1361	QByteArray result = c->fromUnicode(uc.constData(), lenInOut, &state);
1362	lenInOut = result.length();
1363	return result;
1364	}
1365	#endif
1366
1367	/*!
1368	\class QTextDecoder
1369	\brief The QTextDecoder class provides a state-based decoder.
1370	\reentrant
1371	\ingroup i18n
1372
1373	A text decoder converts text from an encoded text format into Unicode
1374	using a specific codec.
1375
1376	The decoder converts text in this format into Unicode, remembering any
1377	state that is required between calls.
1378
1379	\sa QTextCodec::makeDecoder(), QTextEncoder
1380	*/
1381
1382	/*!
1383	\fn QTextDecoder::QTextDecoder(const QTextCodec *codec)
1384
1385	Constructs a text decoder for the given \a codec.
1386	*/
1387
1388	/*!
1389	Destroys the decoder.
1390	*/
1391	QTextDecoder::~QTextDecoder()
1392	{
1393	}
1394
1395	/*!
1396	\fn QString QTextDecoder::toUnicode(const char *chars, int len)
1397
1398	Converts the first \a len bytes in \a chars to Unicode, returning
1399	the result.
1400
1401	If not all characters are used (e.g. if only part of a multi-byte
1402	encoding is at the end of the characters), the decoder remembers
1403	enough state to continue with the next call to this function.
1404	*/
1405	QString QTextDecoder::toUnicode(const char *chars, int len)
1406	{
1407	return c->toUnicode(chars, len, &state);
1408	}
1409
1410
1411	/*! \overload
1412
1413	The converted string is returned in \a target.
1414	*/
1415	void QTextDecoder::toUnicode(QString target, const char chars, int len)
1416	{
1417	Q_ASSERT(target);
1418	switch (c->mibEnum()) {
1419	case 106: // utf8
1420	static_cast<const QUtf8Codec*>(c)->convertToUnicode(target, chars, len, &state);
1421	break;
1422	case 4: { // latin1
1423	target->resize(len);
1424	ushort data = (ushort)target->data();
1425	for (int i = len; i >=0; --i)
1426	data[i] = (uchar) chars[i];
1427	} break;
1428	default:
1429	*target = c->toUnicode(chars, len, &state);
1430	}
1431	}
1432
1433
1434	/*!
1435	\overload
1436
1437	Converts the bytes in the byte array specified by \a ba to Unicode
1438	and returns the result.
1439	*/
1440	QString QTextDecoder::toUnicode(const QByteArray &ba)
1441	{
1442	return c->toUnicode(ba.constData(), ba.length(), &state);
1443	}
1444
1445
1446	/*!
1447	\fn QTextCodec* QTextCodec::codecForTr()
1448
1449	Returns the codec used by QObject::tr() on its argument. If this
1450	function returns 0 (the default), tr() assumes Latin-1.
1451
1452	\sa setCodecForTr()
1453	*/
1454
1455	/*!
1456	\fn void QTextCodec::setCodecForTr(QTextCodec *c)
1457	\nonreentrant
1458
1459	Sets the codec used by QObject::tr() on its argument to \a c. If
1460	\a c is 0 (the default), tr() assumes Latin-1.
1461
1462	If the literal quoted text in the program is not in the Latin-1
1463	encoding, this function can be used to set the appropriate
1464	encoding. For example, software developed by Korean programmers
1465	might use eucKR for all the text in the program, in which case the
1466	main() function might look like this:
1467
1468	\snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 3
1469
1470	Note that this is not the way to select the encoding that the \e
1471	user has chosen. For example, to convert an application containing
1472	literal English strings to Korean, all that is needed is for the
1473	English strings to be passed through tr() and for translation
1474	files to be loaded. For details of internationalization, see
1475	\l{Internationalization with Qt}.
1476
1477	\sa codecForTr(), setCodecForCStrings()
1478	*/
1479
1480
1481	/*!
1482	\fn QTextCodec* QTextCodec::codecForCStrings()
1483
1484	Returns the codec used by QString to convert to and from \c{const
1485	char *} and QByteArrays. If this function returns 0 (the default),
1486	QString assumes Latin-1.
1487
1488	\sa setCodecForCStrings()
1489	*/
1490
1491	/*!
1492	\fn void QTextCodec::setCodecForCStrings(QTextCodec *codec)
1493	\nonreentrant
1494
1495	Sets the codec used by QString to convert to and from \c{const
1496	char *} and QByteArrays. If the \a codec is 0 (the default),
1497	QString assumes Latin-1.
1498
1499	\warning Some codecs do not preserve the characters in the ASCII
1500	range (0x00 to 0x7F). For example, the Japanese Shift-JIS
1501	encoding maps the backslash character (0x5A) to the Yen
1502	character. To avoid undesirable side-effects, we recommend
1503	avoiding such codecs with setCodecsForCString().
1504
1505	\sa codecForCStrings(), setCodecForTr()
1506	*/
1507
1508	/*!
1509	\since 4.4
1510
1511	Tries to detect the encoding of the provided snippet of HTML in the given byte array, \a ba,
1512	and returns a QTextCodec instance that is capable of decoding the html to unicode.
1513	If the codec cannot be detected from the content provided, \a defaultCodec is returned.
1514	*/
1515	QTextCodec QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec defaultCodec)
1516	{
1517	// determine charset
1518	int pos;
1519	QTextCodec *c = 0;
1520
1521	if (ba.size() > 1 && (((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff)
1522	\|\| ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe))) {
1523	c = QTextCodec::codecForMib(1015); // utf16
1524	} else if (ba.size() > 2
1525	&& (uchar)ba[0] == 0xef
1526	&& (uchar)ba[1] == 0xbb
1527	&& (uchar)ba[2] == 0xbf) {
1528	c = QTextCodec::codecForMib(106); // utf-8
1529	} else {
1530	QByteArray header = ba.left(512).toLower();
1531	if ((pos = header.indexOf("http-equiv=")) != -1) {
1532	pos = header.indexOf("charset=", pos) + int(strlen("charset="));
1533	if (pos != -1) {
1534	int pos2 = header.indexOf('\"', pos+1);
1535	QByteArray cs = header.mid(pos, pos2-pos);
1536	// qDebug("found charset: %s", cs.data());
1537	c = QTextCodec::codecForName(cs);
1538	}
1539	}
1540	}
1541	if (!c)
1542	c = defaultCodec;
1543
1544	return c;
1545	}
1546
1547	/*!
1548	\overload
1549
1550	If the codec cannot be detected, this overload returns a Latin-1 QTextCodec.
1551	*/
1552	QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
1553	{
1554	return codecForHtml(ba, QTextCodec::codecForMib(/Latin 1/ 4));
1555	}
1556
1557
1558	/*! \internal
1559	\since 4.3
1560	Determines whether the decoder encountered a failure while decoding the input. If
1561	an error was encountered, the produced result is undefined, and gets converted as according
1562	to the conversion flags.
1563	*/
1564	bool QTextDecoder::hasFailure() const
1565	{
1566	return state.invalidChars != 0;
1567	}
1568
1569	/*!
1570	\fn QTextCodec QTextCodec::codecForContent(const char str, int size)
1571
1572	This functionality is no longer provided by Qt. This
1573	compatibility function always returns a null pointer.
1574	*/
1575
1576	/*!
1577	\fn QTextCodec QTextCodec::codecForName(const char hint, int accuracy)
1578
1579	Use the codecForName(const QByteArray &) overload instead.
1580	*/
1581
1582	/*!
1583	\fn QTextCodec *QTextCodec::codecForIndex(int i)
1584
1585	Use availableCodecs() or availableMibs() instead and iterate
1586	through the resulting list.
1587	*/
1588
1589
1590	/*!
1591	\fn QByteArray QTextCodec::mimeName() const
1592
1593	Use name() instead.
1594	*/
1595
1596	QT_END_NAMESPACE
1597
1598	#endif // QT_NO_TEXTCODEC

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/src/corelib/codecs/qtextcodec.cpp@ 83

Download in other formats: