Context Navigation

source: trunk/src/corelib/codecs/qtextcodec.cpp@ 168

Last change on this file since 168 was 135, checked in by Dmitry A. Kuminov, 16 years ago
core: Added a hack to use cp866 on OS/2 if LANG is ru_RU.
File size: 46.6 KB

Line
1	/****************************************************************************
2	**
3	** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4	** Contact: Qt Software Information ([email protected])
5	**
6	** This file is part of the QtCore module of the Qt Toolkit.
7	**
8	** $QT_BEGIN_LICENSE:LGPL$
9	** Commercial Usage
10	** Licensees holding valid Qt Commercial licenses may use this file in
11	** accordance with the Qt Commercial License Agreement provided with the
12	** Software or, alternatively, in accordance with the terms contained in
13	** a written agreement between you and Nokia.
14	**
15	** GNU Lesser General Public License Usage
16	** Alternatively, this file may be used under the terms of the GNU Lesser
17	** General Public License version 2.1 as published by the Free Software
18	** Foundation and appearing in the file LICENSE.LGPL included in the
19	** packaging of this file. Please review the following information to
20	** ensure the GNU Lesser General Public License version 2.1 requirements
21	** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22	**
23	** In addition, as a special exception, Nokia gives you certain
24	** additional rights. These rights are described in the Nokia Qt LGPL
25	** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26	** package.
27	**
28	** GNU General Public License Usage
29	** Alternatively, this file may be used under the terms of the GNU
30	** General Public License version 3.0 as published by the Free Software
31	** Foundation and appearing in the file LICENSE.GPL included in the
32	** packaging of this file. Please review the following information to
33	** ensure the GNU General Public License version 3.0 requirements will be
34	** met: http://www.gnu.org/copyleft/gpl.html.
35	**
36	** If you are unsure which license is appropriate for your use, please
37	** contact the sales department at [email protected].
38	** $QT_END_LICENSE$
39	**
40	****************************************************************************/
41
42	#include "qplatformdefs.h"
43	#include "qtextcodec.h"
44	#include "qtextcodec_p.h"
45
46	#ifndef QT_NO_TEXTCODEC
47
48	#include "qlist.h"
49	#include "qfile.h"
50	#ifndef QT_NO_LIBRARY
51	# include "qcoreapplication.h"
52	# include "qtextcodecplugin.h"
53	# include "private/qfactoryloader_p.h"
54	#endif
55	#include "qstringlist.h"
56
57	#ifdef Q_OS_UNIX
58	# include "qiconvcodec_p.h"
59	#endif
60
61	#include "qutfcodec_p.h"
62	#include "qsimplecodec_p.h"
63	#include "qlatincodec_p.h"
64	#ifndef QT_NO_CODECS
65	# include "qtsciicodec_p.h"
66	# include "qisciicodec_p.h"
67	# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
68	// no iconv(3) support, must build all codecs into the library
69	# include "../../plugins/codecs/cn/qgb18030codec.h"
70	# include "../../plugins/codecs/jp/qeucjpcodec.h"
71	# include "../../plugins/codecs/jp/qjiscodec.h"
72	# include "../../plugins/codecs/jp/qsjiscodec.h"
73	# include "../../plugins/codecs/kr/qeuckrcodec.h"
74	# include "../../plugins/codecs/tw/qbig5codec.h"
75	# endif // QT_NO_ICONV
76	# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
77	# include "qfontlaocodec_p.h"
78	# include "../../plugins/codecs/jp/qfontjpcodec.h"
79	# endif
80	#endif // QT_NO_CODECS
81	#include "qlocale.h"
82	#include "private/qmutexpool_p.h"
83
84	#include <stdlib.h>
85	#include <ctype.h>
86	#include <locale.h>
87	#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
88	#include <langinfo.h>
89	#endif
90
91	#if defined(Q_OS_WINCE)
92	# define QT_NO_SETLOCALE
93	#endif
94
95	QT_BEGIN_NAMESPACE
96
97	#ifndef QT_NO_TEXTCODECPLUGIN
98	Q_GLOBAL_STATIC_WITH_ARGS(QFactoryLoader, loader,
99	(QTextCodecFactoryInterface_iid, QLatin1String("/codecs")))
100	#endif
101
102
103	static bool nameMatch(const QByteArray &name, const QByteArray &test)
104	{
105	// if they're the same, return a perfect score
106	if (qstricmp(name, test) == 0)
107	return true;
108
109	const char *n = name.constData();
110	const char *h = test.constData();
111
112	// if the letters and numbers are the same, we have a match
113	while (*n != '\0') {
114	if (isalnum((uchar)*n)) {
115	for (;;) {
116	if (*h == '\0')
117	return false;
118	if (isalnum((uchar)*h))
119	break;
120	++h;
121	}
122	if (tolower((uchar)n) != tolower((uchar)h))
123	return false;
124	++h;
125	}
126	++n;
127	}
128	while (h && !isalnum((uchar)h))
129	++h;
130	return (*h == '\0');
131	}
132
133
134	static QTextCodec *createForName(const QByteArray &name)
135	{
136	#ifndef QT_NO_TEXTCODECPLUGIN
137	QFactoryLoader *l = loader();
138	QStringList keys = l->keys();
139	for (int i = 0; i < keys.size(); ++i) {
140	if (nameMatch(name, keys.at(i).toLatin1())) {
141	QString realName = keys.at(i);
142	if (QTextCodecFactoryInterface *factory
143	= qobject_cast<QTextCodecFactoryInterface*>(l->instance(realName))) {
144	return factory->create(realName);
145	}
146	}
147	}
148	#else
149	Q_UNUSED(name);
150	#endif
151	return 0;
152	}
153
154	static QTextCodec *createForMib(int mib)
155	{
156	#ifndef QT_NO_TEXTCODECPLUGIN
157	QString name = QLatin1String("MIB: ") + QString::number(mib);
158	if (QTextCodecFactoryInterface *factory
159	= qobject_cast<QTextCodecFactoryInterface*>(loader()->instance(name)))
160	return factory->create(name);
161	#else
162	Q_UNUSED(mib);
163	#endif
164	return 0;
165	}
166
167	static QList<QTextCodec> all = 0;
168	static bool destroying_is_ok = false;
169
170	static QTextCodec *localeMapper = 0;
171	QTextCodec *QTextCodec::cftr = 0;
172
173
174	class QTextCodecCleanup
175	{
176	public:
177	~QTextCodecCleanup();
178	};
179
180	/*
181	Deletes all the created codecs. This destructor is called just
182	before exiting to delete any QTextCodec objects that may be lying
183	around.
184	*/
185	QTextCodecCleanup::~QTextCodecCleanup()
186	{
187	if (!all)
188	return;
189
190	destroying_is_ok = true;
191
192	while (all->size())
193	delete all->takeFirst();
194	delete all;
195	all = 0;
196	localeMapper = 0;
197
198	destroying_is_ok = false;
199	}
200
201	Q_GLOBAL_STATIC(QTextCodecCleanup, createQTextCodecCleanup)
202
203	#if defined(Q_OS_WIN32) \|\| defined(Q_OS_WINCE)
204	class QWindowsLocalCodec: public QTextCodec
205	{
206	public:
207	QWindowsLocalCodec();
208	~QWindowsLocalCodec();
209
210	QString convertToUnicode(const char , int, ConverterState ) const;
211	QByteArray convertFromUnicode(const QChar , int, ConverterState ) const;
212	QString convertToUnicodeCharByChar(const char chars, int length, ConverterState state) const;
213
214	QByteArray name() const;
215	int mibEnum() const;
216
217	};
218
219	QWindowsLocalCodec::QWindowsLocalCodec()
220	{
221	}
222
223	QWindowsLocalCodec::~QWindowsLocalCodec()
224	{
225	}
226
227	QString QWindowsLocalCodec::convertToUnicode(const char chars, int length, ConverterState state) const
228	{
229	const char *mb = chars;
230	int mblen = length;
231
232	if (!mb \|\| !mblen)
233	return QString();
234
235	const int wclen_auto = 4096;
236	WCHAR wc_auto[wclen_auto];
237	int wclen = wclen_auto;
238	WCHAR *wc = wc_auto;
239	int len;
240	QString sp;
241	bool prepend = false;
242	char state_data = 0;
243	int remainingChars = 0;
244
245	//save the current state information
246	if (state) {
247	state_data = (char)state->state_data[0];
248	remainingChars = state->remainingChars;
249	}
250
251	//convert the pending charcter (if available)
252	if (state && remainingChars) {
253	char prev[3] = {0};
254	prev[0] = state_data;
255	prev[1] = mb[0];
256	remainingChars = 0;
257	len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
258	prev, 2, wc, wclen);
259	if (len) {
260	prepend = true;
261	sp.append(QChar(wc[0]));
262	mb++;
263	mblen--;
264	wc[0] = 0;
265	}
266	}
267
268	while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED\|MB_ERR_INVALID_CHARS,
269	mb, mblen, wc, wclen))) {
270	int r = GetLastError();
271	if (r == ERROR_INSUFFICIENT_BUFFER) {
272	if (wc != wc_auto) {
273	qWarning("MultiByteToWideChar: Size changed");
274	break;
275	} else {
276	wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
277	mb, mblen, 0, 0);
278	wc = new WCHAR[wclen];
279	// and try again...
280	}
281	} else if (r == ERROR_NO_UNICODE_TRANSLATION) {
282	//find the last non NULL character
283	while (mblen > 1 && !(mb[mblen-1]))
284	mblen--;
285	//check whether, we hit an invalid character in the middle
286	if ((mblen <= 1) \|\| (remainingChars && state_data))
287	return convertToUnicodeCharByChar(chars, length, state);
288	//Remove the last character and try again...
289	state_data = mb[mblen-1];
290	remainingChars = 1;
291	mblen--;
292	} else {
293	// Fail.
294	qWarning("MultiByteToWideChar: Cannot convert multibyte text");
295	break;
296	}
297	}
298	if (len <= 0)
299	return QString();
300	if (wc[len-1] == 0) // len - 1: we don't want terminator
301	--len;
302
303	//save the new state information
304	if (state) {
305	state->state_data[0] = (char)state_data;
306	state->remainingChars = remainingChars;
307	}
308	QString s((QChar*)wc, len);
309	if (wc != wc_auto)
310	delete [] wc;
311	if (prepend) {
312	return sp+s;
313	}
314	return s;
315	}
316
317	QString QWindowsLocalCodec::convertToUnicodeCharByChar(const char chars, int length, ConverterState state) const
318	{
319	if (!chars \|\| !length)
320	return QString();
321
322	int copyLocation = 0;
323	int extra = 2;
324	if (state && state->remainingChars) {
325	copyLocation = state->remainingChars;
326	extra += copyLocation;
327	}
328	int newLength = length + extra;
329	char *mbcs = new char[newLength];
330	//ensure that we have a NULL terminated string
331	mbcs[newLength-1] = 0;
332	mbcs[newLength-2] = 0;
333	memcpy(&(mbcs[copyLocation]), chars, length);
334	if (copyLocation) {
335	//copy the last character from the state
336	mbcs[0] = (char)state->state_data[0];
337	state->remainingChars = 0;
338	}
339	const char *mb = mbcs;
340	#ifndef Q_OS_WINCE
341	const char *next = 0;
342	QString s;
343	while((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
344	WCHAR wc[2] ={0};
345	int charlength = next - mb;
346	int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED\|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
347	if (len>0) {
348	s.append(QChar(wc[0]));
349	} else {
350	int r = GetLastError();
351	//check if the character being dropped is the last character
352	if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) {
353	state->remainingChars = 1;
354	state->state_data[0] = (char)*mb;
355	}
356	}
357	mb = next;
358	}
359	#else
360	QString s;
361	int size = mbstowcs(NULL, mb, length);
362	if (size < 0) {
363	Q_ASSERT("Error in CE TextCodec");
364	return QString();
365	}
366	wchar_t* ws = new wchar_t[size + 2];
367	ws[size +1] = 0;
368	ws[size] = 0;
369	size = mbstowcs(ws, mb, length);
370	for (int i=0; i< size; i++)
371	s.append(QChar(ws[i]));
372	delete [] ws;
373	#endif
374	delete mbcs;
375	return s;
376	}
377
378	QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar uc, int len, ConverterState ) const
379	{
380	return qt_winQString2MB(uc, len);
381	}
382
383
384	QByteArray QWindowsLocalCodec::name() const
385	{
386	return "System";
387	}
388
389	int QWindowsLocalCodec::mibEnum() const
390	{
391	return 0;
392	}
393
394	#else
395
396	/* locale names mostly copied from XFree86 */
397	static const char * const iso8859_2locales[] = {
398	"croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
399	"hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
400	"ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
401	"sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
402
403	static const char * const iso8859_3locales[] = {
404	"eo", 0 };
405
406	static const char * const iso8859_4locales[] = {
407	"ee", "ee_EE", 0 };
408
409	static const char * const iso8859_5locales[] = {
410	"mk", "mk_MK", "sp", "sp_YU", 0 };
411
412	static const char * const cp_1251locales[] = {
413	"be", "be_BY", "bg", "bg_BG", "bulgarian", 0 };
414
415	static const char * const pt_154locales[] = {
416	"ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 };
417
418	static const char * const iso8859_6locales[] = {
419	"ar_AA", "ar_SA", "arabic", 0 };
420
421	static const char * const iso8859_7locales[] = {
422	"el", "el_GR", "greek", 0 };
423
424	static const char * const iso8859_8locales[] = {
425	"hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
426
427	static const char * const iso8859_9locales[] = {
428	"tr", "tr_TR", "turkish", 0 };
429
430	static const char * const iso8859_13locales[] = {
431	"lt", "lt_LT", "lv", "lv_LV", 0 };
432
433	static const char * const iso8859_15locales[] = {
434	"et", "et_EE",
435	// Euro countries
436	"br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE",
437	"es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR",
438	"fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR",
439	"nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE",
440	0 };
441
442	static const char * const koi8_ulocales[] = {
443	"uk", "uk_UA", "ru_UA", "ukrainian", 0 };
444
445	static const char * const tis_620locales[] = {
446	"th", "th_TH", "thai", 0 };
447
448	// static const char * const tcvnlocales[] = {
449	// "vi", "vi_VN", 0 };
450
451	static bool try_locale_list(const char * const locale[], const char * lang)
452	{
453	int i;
454	for(i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++)
455	;
456	return locale[i] != 0;
457	}
458
459	// For the probably_koi8_locales we have to look. the standard says
460	// these are 8859-5, but almost all Russian users use KOI8-R and
461	// incorrectly set $LANG to ru_RU. We'll check tolower() to see what
462	// it thinks ru_RU means.
463
464	// If you read the history, it seems that many Russians blame ISO and
465	// Perestroika for the confusion.
466	//
467	// The real bug is that some programs break if the user specifies
468	// ru_RU.KOI8-R.
469
470	static const char * const probably_koi8_rlocales[] = {
471	"ru", "ru_SU", "ru_RU", "russian", 0 };
472
473	static QTextCodec * ru_RU_hack(const char * i) {
474	#if defined(Q_OS_OS2)
475	// @todo temporary hack. the proper one is to use the current process'
476	// code page if LANG or its codepage part is missing
477	return QTextCodec::codecForName("cp866");
478	#else
479	QTextCodec * ru_RU_codec = 0;
480
481	#if !defined(QT_NO_SETLOCALE)
482	QByteArray origlocale(setlocale(LC_CTYPE, i));
483	#else
484	QByteArray origlocale(i);
485	#endif
486	// unicode koi8r latin5 name
487	// 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
488	// 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
489	int latin5 = tolower(0xCE);
490	int koi8r = tolower(0xE0);
491	if (koi8r == 0xC0 && latin5 != 0xEE) {
492	ru_RU_codec = QTextCodec::codecForName("KOI8-R");
493	} else if (koi8r != 0xC0 && latin5 == 0xEE) {
494	ru_RU_codec = QTextCodec::codecForName("ISO 8859-5");
495	} else {
496	// something else again... let's assume... throws dice
497	ru_RU_codec = QTextCodec::codecForName("KOI8-R");
498	qWarning("QTextCodec: Using KOI8-R, probe failed (%02x %02x %s)",
499	koi8r, latin5, i);
500	}
501	#if !defined(QT_NO_SETLOCALE)
502	setlocale(LC_CTYPE, origlocale);
503	#endif
504
505	return ru_RU_codec;
506	#endif // defined(Q_OS_OS2)
507	}
508
509	#endif
510
511	#if !defined(Q_OS_WIN32) && !defined(Q_OS_WINCE)
512	static QTextCodec checkForCodec(const char name) {
513	QTextCodec *c = QTextCodec::codecForName(name);
514	if (!c) {
515	const char *at = strchr(name, '@');
516	if (at) {
517	QByteArray n(name, at - name);
518	c = QTextCodec::codecForName(n.data());
519	}
520	}
521	return c;
522	}
523	#endif
524
525	/* the next two functions are implicitely thread safe,
526	as they are only called by setup() which uses a mutex.
527	*/
528	static void setupLocaleMapper()
529	{
530	#if defined(Q_OS_WIN32) \|\| defined(Q_OS_WINCE)
531	localeMapper = QTextCodec::codecForName("System");
532	#else
533
534	#ifndef QT_NO_ICONV
535	localeMapper = QTextCodec::codecForName("System");
536	#endif
537
538	#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
539	if (!localeMapper) {
540	char *charset = nl_langinfo (CODESET);
541	if (charset)
542	localeMapper = QTextCodec::codecForName(charset);
543	}
544	#endif
545
546	if (!localeMapper) {
547	// Very poorly defined and followed standards causes lots of
548	// code to try to get all the cases... This logic is
549	// duplicated in QIconvCodec, so if you change it here, change
550	// it there too.
551
552	// Try to determine locale codeset from locale name assigned to
553	// LC_CTYPE category.
554
555	// First part is getting that locale name. First try setlocale() which
556	// definitely knows it, but since we cannot fully trust it, get ready
557	// to fall back to environment variables.
558	#if !defined(QT_NO_SETLOCALE)
559	char * ctype = qstrdup(setlocale(LC_CTYPE, 0));
560	#else
561	char * ctype = qstrdup("");
562	#endif
563
564	// Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
565	// environment variables.
566	char * lang = qstrdup(qgetenv("LC_ALL").constData());
567	if (!lang \|\| lang[0] == 0 \|\| strcmp(lang, "C") == 0) {
568	if (lang) delete [] lang;
569	lang = qstrdup(qgetenv("LC_CTYPE").constData());
570	}
571	if (!lang \|\| lang[0] == 0 \|\| strcmp(lang, "C") == 0) {
572	if (lang) delete [] lang;
573	lang = qstrdup(qgetenv("LANG").constData());
574	}
575
576	// Now try these in order:
577	// 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
578	// 2. CODESET from lang if it contains a .CODESET part
579	// 3. ctype (maybe the locale is named "ISO-8859-1" or something)
580	// 4. locale (ditto)
581	// 5. check for "@euro"
582	// 6. guess locale from ctype unless ctype is "C"
583	// 7. guess locale from lang
584
585	// 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
586	char * codeset = ctype ? strchr(ctype, '.') : 0;
587	if (codeset && *codeset == '.')
588	localeMapper = checkForCodec(codeset + 1);
589
590	// 2. CODESET from lang if it contains a .CODESET part
591	codeset = lang ? strchr(lang, '.') : 0;
592	if (!localeMapper && codeset && *codeset == '.')
593	localeMapper = checkForCodec(codeset + 1);
594
595	// 3. ctype (maybe the locale is named "ISO-8859-1" or something)
596	if (!localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0)
597	localeMapper = checkForCodec(ctype);
598
599	// 4. locale (ditto)
600	if (!localeMapper && lang && *lang != 0)
601	localeMapper = checkForCodec(lang);
602
603	// 5. "@euro"
604	if ((!localeMapper && ctype && strstr(ctype, "@euro")) \|\| (lang && strstr(lang, "@euro")))
605	localeMapper = checkForCodec("ISO 8859-15");
606
607	// 6. guess locale from ctype unless ctype is "C"
608	// 7. guess locale from lang
609	char * try_by_name = ctype;
610	if (ctype && *ctype != 0 && strcmp (ctype, "C") != 0)
611	try_by_name = lang;
612
613	// Now do the guessing.
614	if (lang && lang && !localeMapper && try_by_name && try_by_name) {
615	if (try_locale_list(iso8859_15locales, lang))
616	localeMapper = QTextCodec::codecForName("ISO 8859-15");
617	else if (try_locale_list(iso8859_2locales, lang))
618	localeMapper = QTextCodec::codecForName("ISO 8859-2");
619	else if (try_locale_list(iso8859_3locales, lang))
620	localeMapper = QTextCodec::codecForName("ISO 8859-3");
621	else if (try_locale_list(iso8859_4locales, lang))
622	localeMapper = QTextCodec::codecForName("ISO 8859-4");
623	else if (try_locale_list(iso8859_5locales, lang))
624	localeMapper = QTextCodec::codecForName("ISO 8859-5");
625	else if (try_locale_list(iso8859_6locales, lang))
626	localeMapper = QTextCodec::codecForName("ISO 8859-6");
627	else if (try_locale_list(iso8859_7locales, lang))
628	localeMapper = QTextCodec::codecForName("ISO 8859-7");
629	else if (try_locale_list(iso8859_8locales, lang))
630	localeMapper = QTextCodec::codecForName("ISO 8859-8-I");
631	else if (try_locale_list(iso8859_9locales, lang))
632	localeMapper = QTextCodec::codecForName("ISO 8859-9");
633	else if (try_locale_list(iso8859_13locales, lang))
634	localeMapper = QTextCodec::codecForName("ISO 8859-13");
635	else if (try_locale_list(tis_620locales, lang))
636	localeMapper = QTextCodec::codecForName("ISO 8859-11");
637	else if (try_locale_list(koi8_ulocales, lang))
638	localeMapper = QTextCodec::codecForName("KOI8-U");
639	else if (try_locale_list(cp_1251locales, lang))
640	localeMapper = QTextCodec::codecForName("CP 1251");
641	else if (try_locale_list(pt_154locales, lang))
642	localeMapper = QTextCodec::codecForName("PT 154");
643	else if (try_locale_list(probably_koi8_rlocales, lang))
644	localeMapper = ru_RU_hack(lang);
645	}
646
647	delete [] ctype;
648	delete [] lang;
649	}
650
651	// If everything failed, we default to 8859-1
652	// We could perhaps default to 8859-15.
653	if (!localeMapper)
654	localeMapper = QTextCodec::codecForName("ISO 8859-1");
655	#endif
656	}
657
658
659	static void setup()
660	{
661	#ifndef QT_NO_THREAD
662	QMutexLocker locker(QMutexPool::globalInstanceGet(&all));
663	#endif
664
665	if (all)
666	return;
667
668	if (destroying_is_ok)
669	qWarning("QTextCodec: Creating new codec during codec cleanup");
670	all = new QList<QTextCodec*>;
671	// create the cleanup object to cleanup all codecs on exit
672	(void) createQTextCodecCleanup();
673
674	#ifndef QT_NO_CODECS
675	# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
676	// no font codecs when bootstrapping
677	(void)new QFontLaoCodec;
678	# if defined(QT_NO_ICONV)
679	// no iconv(3) support, must build all codecs into the library
680	(void)new QFontGb2312Codec;
681	(void)new QFontGbkCodec;
682	(void)new QFontGb18030_0Codec;
683	(void)new QFontJis0208Codec;
684	(void)new QFontJis0201Codec;
685	(void)new QFontKsc5601Codec;
686	(void)new QFontBig5hkscsCodec;
687	(void)new QFontBig5Codec;
688	# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
689	# endif // Q_WS_X11
690
691	(void)new QTsciiCodec;
692
693	for (int i = 0; i < 9; ++i)
694	(void)new QIsciiCodec(i);
695
696
697	# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
698	// no asian codecs when bootstrapping, sorry
699	(void)new QGb18030Codec;
700	(void)new QGbkCodec;
701	(void)new QGb2312Codec;
702	(void)new QEucJpCodec;
703	(void)new QJisCodec;
704	(void)new QSjisCodec;
705	(void)new QEucKrCodec;
706	(void)new QBig5Codec;
707	(void)new QBig5hkscsCodec;
708	# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
709	#endif // QT_NO_CODECS
710
711	#if defined(Q_OS_WIN32) \|\| defined(Q_OS_WINCE)
712	(void) new QWindowsLocalCodec;
713	#endif // Q_OS_WIN32
714
715	(void)new QUtf16Codec;
716	(void)new QUtf16BECodec;
717	(void)new QUtf16LECodec;
718	(void)new QUtf32Codec;
719	(void)new QUtf32BECodec;
720	(void)new QUtf32LECodec;
721	(void)new QLatin15Codec;
722	(void)new QLatin1Codec;
723	(void)new QUtf8Codec;
724
725	for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i)
726	(void)new QSimpleTextCodec(i);
727
728	#if defined(Q_OS_UNIX) && !defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
729	// QIconvCodec depends on the UTF-16 codec, so it needs to be created last
730	(void) new QIconvCodec();
731	#endif
732
733	if (!localeMapper)
734	setupLocaleMapper();
735	}
736
737	QTextCodec::ConverterState::~ConverterState()
738	{
739	if (flags & FreeFunction)
740	(QTextCodecUnalignedPointer::decode(state_data))(this);
741	else if (d)
742	qFree(d);
743	}
744
745	/*!
746	\class QTextCodec
747	\brief The QTextCodec class provides conversions between text encodings.
748	\reentrant
749	\ingroup i18n
750
751	Qt uses Unicode to store, draw and manipulate strings. In many
752	situations you may wish to deal with data that uses a different
753	encoding. For example, most Japanese documents are still stored
754	in Shift-JIS or ISO 2022-JP, while Russian users often have their
755	documents in KOI8-R or Windows-1251.
756
757	Qt provides a set of QTextCodec classes to help with converting
758	non-Unicode formats to and from Unicode. You can also create your
759	own codec classes.
760
761	The supported encodings are:
762
763	\list
764	\o Apple Roman
765	\o \l{Big5 Text Codec}{Big5}
766	\o \l{Big5-HKSCS Text Codec}{Big5-HKSCS}
767	\o CP949
768	\o \l{EUC-JP Text Codec}{EUC-JP}
769	\o \l{EUC-KR Text Codec}{EUC-KR}
770	\o \l{GBK Text Codec}{GB18030-0}
771	\o IBM 850
772	\o IBM 866
773	\o IBM 874
774	\o \l{ISO 2022-JP (JIS) Text Codec}{ISO 2022-JP}
775	\o ISO 8859-1 to 10
776	\o ISO 8859-13 to 16
777	\o Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml
778	\o JIS X 0201
779	\o JIS X 0208
780	\o KOI8-R
781	\o KOI8-U
782	\o MuleLao-1
783	\o ROMAN8
784	\o \l{Shift-JIS Text Codec}{Shift-JIS}
785	\o TIS-620
786	\o \l{TSCII Text Codec}{TSCII}
787	\o UTF-8
788	\o UTF-16
789	\o UTF-16BE
790	\o UTF-16LE
791	\o UTF-32
792	\o UTF-32BE
793	\o UTF-32LE
794	\o Windows-1250 to 1258
795	\o WINSAMI2
796	\endlist
797
798	QTextCodecs can be used as follows to convert some locally encoded
799	string to Unicode. Suppose you have some string encoded in Russian
800	KOI8-R encoding, and want to convert it to Unicode. The simple way
801	to do it is like this:
802
803	\snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 0
804
805	After this, \c string holds the text converted to Unicode.
806	Converting a string from Unicode to the local encoding is just as
807	easy:
808
809	\snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 1
810
811	To read or write files in various encodings, use QTextStream and
812	its \l{QTextStream::setCodec()}{setCodec()} function. See the
813	\l{tools/codecs}{Codecs} example for an application of QTextCodec
814	to file I/O.
815
816	Some care must be taken when trying to convert the data in chunks,
817	for example, when receiving it over a network. In such cases it is
818	possible that a multi-byte character will be split over two
819	chunks. At best this might result in the loss of a character and
820	at worst cause the entire conversion to fail.
821
822	The approach to use in these situations is to create a QTextDecoder
823	object for the codec and use this QTextDecoder for the whole
824	decoding process, as shown below:
825
826	\snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 2
827
828	The QTextDecoder object maintains state between chunks and therefore
829	works correctly even if a multi-byte character is split between
830	chunks.
831
832	\section1 Creating Your Own Codec Class
833
834	Support for new text encodings can be added to Qt by creating
835	QTextCodec subclasses.
836
837	The pure virtual functions describe the encoder to the system and
838	the coder is used as required in the different text file formats
839	supported by QTextStream, and under X11, for the locale-specific
840	character input and output.
841
842	To add support for another encoding to Qt, make a subclass of
843	QTextCodec and implement the functions listed in the table below.
844
845	\table
846	\header \o Function \o Description
847
848	\row \o name()
849	\o Returns the official name for the encoding. If the
850	encoding is listed in the
851	\l{IANA character-sets encoding file}, the name
852	should be the preferred MIME name for the encoding.
853
854	\row \o aliases()
855	\o Returns a list of alternative names for the encoding.
856	QTextCodec provides a default implementation that returns
857	an empty list. For example, "ISO-8859-1" has "latin1",
858	"CP819", "IBM819", and "iso-ir-100" as aliases.
859
860	\row \o mibEnum()
861	\o Return the MIB enum for the encoding if it is listed in
862	the \l{IANA character-sets encoding file}.
863
864	\row \o convertToUnicode()
865	\o Converts an 8-bit character string to Unicode.
866
867	\row \o convertFromUnicode()
868	\o Converts a Unicode string to an 8-bit character string.
869	\endtable
870
871	You may find it more convenient to make your codec class
872	available as a plugin; see \l{How to Create Qt Plugins} for
873	details.
874
875	\sa QTextStream, QTextDecoder, QTextEncoder, {Codecs Example}
876	*/
877
878	/*!
879	\enum QTextCodec::ConversionFlag
880
881	\value DefaultConversion No flag is set.
882	\value ConvertInvalidToNull If this flag is set, each invalid input
883	character is output as a null character.
884	\value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any.
885
886	\omitvalue FreeFunction
887	*/
888
889	/*!
890	\fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags)
891
892	Constructs a ConverterState object initialized with the given \a flags.
893	*/
894
895	/*!
896	\fn QTextCodec::ConverterState::~ConverterState()
897
898	Destroys the ConverterState object.
899	*/
900
901	/*!
902	\nonreentrant
903
904	Constructs a QTextCodec, and gives it the highest precedence. The
905	QTextCodec should always be constructed on the heap (i.e. with \c
906	new). Qt takes ownership and will delete it when the application
907	terminates.
908	*/
909	QTextCodec::QTextCodec()
910	{
911	setup();
912	all->prepend(this);
913	}
914
915
916	/*!
917	\nonreentrant
918
919	Destroys the QTextCodec. Note that you should not delete codecs
920	yourself: once created they become Qt's responsibility.
921	*/
922	QTextCodec::~QTextCodec()
923	{
924	if (!destroying_is_ok)
925	qWarning("QTextCodec::~QTextCodec: Called by application");
926	if (all)
927	all->removeAll(this);
928	}
929
930	/*!
931	\fn QTextCodec QTextCodec::codecForName(const char name)
932
933	Searches all installed QTextCodec objects and returns the one
934	which best matches \a name; the match is case-insensitive. Returns
935	0 if no codec matching the name \a name could be found.
936	*/
937
938	/*!
939	Searches all installed QTextCodec objects and returns the one
940	which best matches \a name; the match is case-insensitive. Returns
941	0 if no codec matching the name \a name could be found.
942	*/
943	QTextCodec *QTextCodec::codecForName(const QByteArray &name)
944	{
945	if (name.isEmpty())
946	return 0;
947
948	setup();
949
950	for (int i = 0; i < all->size(); ++i) {
951	QTextCodec *cursor = all->at(i);
952	if (nameMatch(cursor->name(), name))
953	return cursor;
954	QList<QByteArray> aliases = cursor->aliases();
955	for (int i = 0; i < aliases.size(); ++i)
956	if (nameMatch(aliases.at(i), name))
957	return cursor;
958	}
959
960	return createForName(name);
961	}
962
963
964	/*!
965	Returns the QTextCodec which matches the \link
966	QTextCodec::mibEnum() MIBenum\endlink \a mib.
967	*/
968	QTextCodec* QTextCodec::codecForMib(int mib)
969	{
970	setup();
971
972	// Qt 3 used 1000 (mib for UCS2) as its identifier for the utf16 codec. Map
973	// this correctly for compatibility.
974	if (mib == 1000)
975	mib = 1015;
976
977	QList<QTextCodec*>::ConstIterator i;
978	for (int i = 0; i < all->size(); ++i) {
979	QTextCodec *cursor = all->at(i);
980	if (cursor->mibEnum() == mib)
981	return cursor;
982	}
983
984	return createForMib(mib);
985	}
986
987	/*!
988	Returns the list of all available codecs, by name. Call
989	QTextCodec::codecForName() to obtain the QTextCodec for the name.
990
991	The list may contain many mentions of the same codec
992	if the codec has aliases.
993
994	\sa availableMibs(), name(), aliases()
995	*/
996	QList<QByteArray> QTextCodec::availableCodecs()
997	{
998	setup();
999
1000	QList<QByteArray> codecs;
1001	for (int i = 0; i < all->size(); ++i) {
1002	codecs += all->at(i)->name();
1003	codecs += all->at(i)->aliases();
1004	}
1005	#ifndef QT_NO_TEXTCODECPLUGIN
1006	QFactoryLoader *l = loader();
1007	QStringList keys = l->keys();
1008	for (int i = 0; i < keys.size(); ++i) {
1009	if (!keys.at(i).startsWith(QLatin1String("MIB: "))) {
1010	QByteArray name = keys.at(i).toLatin1();
1011	if (!codecs.contains(name))
1012	codecs += name;
1013	}
1014	}
1015	#endif
1016
1017	return codecs;
1018	}
1019
1020	/*!
1021	Returns the list of MIBs for all available codecs. Call
1022	QTextCodec::codecForMib() to obtain the QTextCodec for the MIB.
1023
1024	\sa availableCodecs(), mibEnum()
1025	*/
1026	QList<int> QTextCodec::availableMibs()
1027	{
1028	setup();
1029
1030	QList<int> codecs;
1031	for (int i = 0; i < all->size(); ++i)
1032	codecs += all->at(i)->mibEnum();
1033	#ifndef QT_NO_TEXTCODECPLUGIN
1034	QFactoryLoader *l = loader();
1035	QStringList keys = l->keys();
1036	for (int i = 0; i < keys.size(); ++i) {
1037	if (keys.at(i).startsWith(QLatin1String("MIB: "))) {
1038	int mib = keys.at(i).mid(5).toInt();
1039	if (!codecs.contains(mib))
1040	codecs += mib;
1041	}
1042	}
1043	#endif
1044
1045	return codecs;
1046	}
1047
1048	/*!
1049	Set the codec to \a c; this will be returned by
1050	codecForLocale(). If \a c is a null pointer, the codec is reset to
1051	the default.
1052
1053	This might be needed for some applications that want to use their
1054	own mechanism for setting the locale.
1055
1056	Setting this codec is not supported on DOS based Windows.
1057
1058	\sa codecForLocale()
1059	*/
1060	void QTextCodec::setCodecForLocale(QTextCodec *c)
1061	{
1062	#ifdef Q_WS_WIN
1063	if (QSysInfo::WindowsVersion& QSysInfo::WV_DOS_based)
1064	return;
1065	#endif
1066	localeMapper = c;
1067	if (!localeMapper)
1068	setupLocaleMapper();
1069	}
1070
1071	/*!
1072	Returns a pointer to the codec most suitable for this locale.
1073
1074	On Windows, the codec will be based on a system locale. On Unix
1075	systems, starting with Qt 4.2, the codec will be using the \e
1076	iconv library. Note that in both cases the codec's name will be
1077	"System".
1078	*/
1079
1080	QTextCodec* QTextCodec::codecForLocale()
1081	{
1082	if (localeMapper)
1083	return localeMapper;
1084
1085	setup();
1086
1087	return localeMapper;
1088	}
1089
1090
1091	/*!
1092	\fn QByteArray QTextCodec::name() const
1093
1094	QTextCodec subclasses must reimplement this function. It returns
1095	the name of the encoding supported by the subclass.
1096
1097	If the codec is registered as a character set in the
1098	\l{IANA character-sets encoding file} this method should
1099	return the preferred mime name for the codec if defined,
1100	otherwise its name.
1101	*/
1102
1103	/*!
1104	\fn int QTextCodec::mibEnum() const
1105
1106	Subclasses of QTextCodec must reimplement this function. It
1107	returns the MIBenum (see \l{IANA character-sets encoding file}
1108	for more information). It is important that each QTextCodec
1109	subclass returns the correct unique value for this function.
1110	*/
1111
1112	/*!
1113	Subclasses can return a number of aliases for the codec in question.
1114
1115	Standard aliases for codecs can be found in the
1116	\l{IANA character-sets encoding file}.
1117	*/
1118	QList<QByteArray> QTextCodec::aliases() const
1119	{
1120	return QList<QByteArray>();
1121	}
1122
1123	/*!
1124	\fn QString QTextCodec::convertToUnicode(const char *chars, int len,
1125	ConverterState *state) const
1126
1127	QTextCodec subclasses must reimplement this function.
1128
1129	Converts the first \a len characters of \a chars from the
1130	encoding of the subclass to Unicode, and returns the result in a
1131	QString.
1132
1133	\a state can be 0, in which case the conversion is stateless and
1134	default conversion rules should be used. If state is not 0, the
1135	codec should save the state after the conversion in \a state, and
1136	adjust the remainingChars and invalidChars members of the struct.
1137	*/
1138
1139	/*!
1140	\fn QByteArray QTextCodec::convertFromUnicode(const QChar *input, int number,
1141	ConverterState *state) const
1142
1143	QTextCodec subclasses must reimplement this function.
1144
1145	Converts the first \a number of characters from the \a input array
1146	from Unicode to the encoding of the subclass, and returns the result
1147	in a QByteArray.
1148
1149	\a state can be 0 in which case the conversion is stateless and
1150	default conversion rules should be used. If state is not 0, the
1151	codec should save the state after the conversion in \a state, and
1152	adjust the remainingChars and invalidChars members of the struct.
1153	*/
1154
1155	/*!
1156	Creates a QTextDecoder which stores enough state to decode chunks
1157	of \c{char *} data to create chunks of Unicode data.
1158
1159	The caller is responsible for deleting the returned object.
1160	*/
1161	QTextDecoder* QTextCodec::makeDecoder() const
1162	{
1163	return new QTextDecoder(this);
1164	}
1165
1166
1167	/*!
1168	Creates a QTextEncoder which stores enough state to encode chunks
1169	of Unicode data as \c{char *} data.
1170
1171	The caller is responsible for deleting the returned object.
1172	*/
1173	QTextEncoder* QTextCodec::makeEncoder() const
1174	{
1175	return new QTextEncoder(this);
1176	}
1177
1178	/*!
1179	\fn QByteArray QTextCodec::fromUnicode(const QChar *input, int number,
1180	ConverterState *state) const
1181
1182	Converts the first \a number of characters from the \a input array
1183	from Unicode to the encoding of this codec, and returns the result
1184	in a QByteArray.
1185
1186	The \a state of the convertor used is updated.
1187	*/
1188
1189	/*!
1190	Converts \a str from Unicode to the encoding of this codec, and
1191	returns the result in a QByteArray.
1192	*/
1193	QByteArray QTextCodec::fromUnicode(const QString& str) const
1194	{
1195	return convertFromUnicode(str.constData(), str.length(), 0);
1196	}
1197
1198	/*!
1199	\fn QString QTextCodec::toUnicode(const char *input, int size,
1200	ConverterState *state) const
1201
1202	Converts the first \a size characters from the \a input from the
1203	encoding of this codec to Unicode, and returns the result in a
1204	QString.
1205
1206	The \a state of the convertor used is updated.
1207	*/
1208
1209	/*!
1210	Converts \a a from the encoding of this codec to Unicode, and
1211	returns the result in a QString.
1212	*/
1213	QString QTextCodec::toUnicode(const QByteArray& a) const
1214	{
1215	return convertToUnicode(a.constData(), a.length(), 0);
1216	}
1217
1218	/*!
1219	Returns true if the Unicode character \a ch can be fully encoded
1220	with this codec; otherwise returns false.
1221	*/
1222	bool QTextCodec::canEncode(QChar ch) const
1223	{
1224	ConverterState state;
1225	state.flags = ConvertInvalidToNull;
1226	convertFromUnicode(&ch, 1, &state);
1227	return (state.invalidChars == 0);
1228	}
1229
1230	/*!
1231	\overload
1232
1233	\a s contains the string being tested for encode-ability.
1234	*/
1235	bool QTextCodec::canEncode(const QString& s) const
1236	{
1237	ConverterState state;
1238	state.flags = ConvertInvalidToNull;
1239	convertFromUnicode(s.constData(), s.length(), &state);
1240	return (state.invalidChars == 0);
1241	}
1242
1243	#ifdef QT3_SUPPORT
1244	/*!
1245	Returns a string representing the current language and
1246	sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil.
1247
1248	\sa QLocale
1249	*/
1250	const char *QTextCodec::locale()
1251	{
1252	static char locale[6];
1253	QByteArray l = QLocale::system().name().toLatin1();
1254	int len = qMin(l.length(), 5);
1255	memcpy(locale, l.constData(), len);
1256	locale[len] = '\0';
1257
1258	return locale;
1259	}
1260
1261	/*!
1262	\overload
1263	*/
1264
1265	QByteArray QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
1266	{
1267	QByteArray result = convertFromUnicode(uc.constData(), lenInOut, 0);
1268	lenInOut = result.length();
1269	return result;
1270	}
1271
1272	/*!
1273	\overload
1274
1275	\a a contains the source characters; \a len contains the number of
1276	characters in \a a to use.
1277	*/
1278	QString QTextCodec::toUnicode(const QByteArray& a, int len) const
1279	{
1280	len = qMin(a.size(), len);
1281	return convertToUnicode(a.constData(), len, 0);
1282	}
1283	#endif
1284
1285	/*!
1286	\overload
1287
1288	\a chars contains the source characters.
1289	*/
1290	QString QTextCodec::toUnicode(const char *chars) const
1291	{
1292	int len = qstrlen(chars);
1293	return convertToUnicode(chars, len, 0);
1294	}
1295
1296
1297	/*!
1298	\class QTextEncoder
1299	\brief The QTextEncoder class provides a state-based encoder.
1300	\reentrant
1301	\ingroup i18n
1302
1303	A text encoder converts text from Unicode into an encoded text format
1304	using a specific codec.
1305
1306	The encoder converts Unicode into another format, remembering any
1307	state that is required between calls.
1308
1309	\sa QTextCodec::makeEncoder(), QTextDecoder
1310	*/
1311
1312	/*!
1313	\fn QTextEncoder::QTextEncoder(const QTextCodec *codec)
1314
1315	Constructs a text encoder for the given \a codec.
1316	*/
1317
1318	/*!
1319	Destroys the encoder.
1320	*/
1321	QTextEncoder::~QTextEncoder()
1322	{
1323	}
1324
1325	/*! \internal
1326	\since 4.5
1327	Determines whether the eecoder encountered a failure while decoding the input. If
1328	an error was encountered, the produced result is undefined, and gets converted as according
1329	to the conversion flags.
1330	*/
1331	bool QTextEncoder::hasFailure() const
1332	{
1333	return state.invalidChars != 0;
1334	}
1335
1336	/*!
1337	Converts the Unicode string \a str into an encoded QByteArray.
1338	*/
1339	QByteArray QTextEncoder::fromUnicode(const QString& str)
1340	{
1341	QByteArray result = c->fromUnicode(str.constData(), str.length(), &state);
1342	return result;
1343	}
1344
1345	/*!
1346	\overload
1347
1348	Converts \a len characters (not bytes) from \a uc, and returns the
1349	result in a QByteArray.
1350	*/
1351	QByteArray QTextEncoder::fromUnicode(const QChar *uc, int len)
1352	{
1353	QByteArray result = c->fromUnicode(uc, len, &state);
1354	return result;
1355	}
1356
1357	#ifdef QT3_SUPPORT
1358	/*!
1359	\overload
1360
1361	Converts \a lenInOut characters (not bytes) from \a uc, and returns the
1362	result in a QByteArray. The number of characters read is returned in
1363	the \a lenInOut parameter.
1364	*/
1365	QByteArray QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
1366	{
1367	QByteArray result = c->fromUnicode(uc.constData(), lenInOut, &state);
1368	lenInOut = result.length();
1369	return result;
1370	}
1371	#endif
1372
1373	/*!
1374	\class QTextDecoder
1375	\brief The QTextDecoder class provides a state-based decoder.
1376	\reentrant
1377	\ingroup i18n
1378
1379	A text decoder converts text from an encoded text format into Unicode
1380	using a specific codec.
1381
1382	The decoder converts text in this format into Unicode, remembering any
1383	state that is required between calls.
1384
1385	\sa QTextCodec::makeDecoder(), QTextEncoder
1386	*/
1387
1388	/*!
1389	\fn QTextDecoder::QTextDecoder(const QTextCodec *codec)
1390
1391	Constructs a text decoder for the given \a codec.
1392	*/
1393
1394	/*!
1395	Destroys the decoder.
1396	*/
1397	QTextDecoder::~QTextDecoder()
1398	{
1399	}
1400
1401	/*!
1402	\fn QString QTextDecoder::toUnicode(const char *chars, int len)
1403
1404	Converts the first \a len bytes in \a chars to Unicode, returning
1405	the result.
1406
1407	If not all characters are used (e.g. if only part of a multi-byte
1408	encoding is at the end of the characters), the decoder remembers
1409	enough state to continue with the next call to this function.
1410	*/
1411	QString QTextDecoder::toUnicode(const char *chars, int len)
1412	{
1413	return c->toUnicode(chars, len, &state);
1414	}
1415
1416
1417	/*! \overload
1418
1419	The converted string is returned in \a target.
1420	*/
1421	void QTextDecoder::toUnicode(QString target, const char chars, int len)
1422	{
1423	Q_ASSERT(target);
1424	switch (c->mibEnum()) {
1425	case 106: // utf8
1426	static_cast<const QUtf8Codec*>(c)->convertToUnicode(target, chars, len, &state);
1427	break;
1428	case 4: { // latin1
1429	target->resize(len);
1430	ushort data = (ushort)target->data();
1431	for (int i = len; i >=0; --i)
1432	data[i] = (uchar) chars[i];
1433	} break;
1434	default:
1435	*target = c->toUnicode(chars, len, &state);
1436	}
1437	}
1438
1439
1440	/*!
1441	\overload
1442
1443	Converts the bytes in the byte array specified by \a ba to Unicode
1444	and returns the result.
1445	*/
1446	QString QTextDecoder::toUnicode(const QByteArray &ba)
1447	{
1448	return c->toUnicode(ba.constData(), ba.length(), &state);
1449	}
1450
1451
1452	/*!
1453	\fn QTextCodec* QTextCodec::codecForTr()
1454
1455	Returns the codec used by QObject::tr() on its argument. If this
1456	function returns 0 (the default), tr() assumes Latin-1.
1457
1458	\sa setCodecForTr()
1459	*/
1460
1461	/*!
1462	\fn void QTextCodec::setCodecForTr(QTextCodec *c)
1463	\nonreentrant
1464
1465	Sets the codec used by QObject::tr() on its argument to \a c. If
1466	\a c is 0 (the default), tr() assumes Latin-1.
1467
1468	If the literal quoted text in the program is not in the Latin-1
1469	encoding, this function can be used to set the appropriate
1470	encoding. For example, software developed by Korean programmers
1471	might use eucKR for all the text in the program, in which case the
1472	main() function might look like this:
1473
1474	\snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 3
1475
1476	Note that this is not the way to select the encoding that the \e
1477	user has chosen. For example, to convert an application containing
1478	literal English strings to Korean, all that is needed is for the
1479	English strings to be passed through tr() and for translation
1480	files to be loaded. For details of internationalization, see
1481	\l{Internationalization with Qt}.
1482
1483	\sa codecForTr(), setCodecForCStrings()
1484	*/
1485
1486
1487	/*!
1488	\fn QTextCodec* QTextCodec::codecForCStrings()
1489
1490	Returns the codec used by QString to convert to and from \c{const
1491	char *} and QByteArrays. If this function returns 0 (the default),
1492	QString assumes Latin-1.
1493
1494	\sa setCodecForCStrings()
1495	*/
1496
1497	/*!
1498	\fn void QTextCodec::setCodecForCStrings(QTextCodec *codec)
1499	\nonreentrant
1500
1501	Sets the codec used by QString to convert to and from \c{const
1502	char *} and QByteArrays. If the \a codec is 0 (the default),
1503	QString assumes Latin-1.
1504
1505	\warning Some codecs do not preserve the characters in the ASCII
1506	range (0x00 to 0x7F). For example, the Japanese Shift-JIS
1507	encoding maps the backslash character (0x5A) to the Yen
1508	character. To avoid undesirable side-effects, we recommend
1509	avoiding such codecs with setCodecsForCString().
1510
1511	\sa codecForCStrings(), setCodecForTr()
1512	*/
1513
1514	/*!
1515	\since 4.4
1516
1517	Tries to detect the encoding of the provided snippet of HTML in the given byte array, \a ba,
1518	and returns a QTextCodec instance that is capable of decoding the html to unicode.
1519	If the codec cannot be detected from the content provided, \a defaultCodec is returned.
1520	*/
1521	QTextCodec QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec defaultCodec)
1522	{
1523	// determine charset
1524	int pos;
1525	QTextCodec *c = 0;
1526
1527	if (ba.size() > 1 && (((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff)
1528	\|\| ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe))) {
1529	c = QTextCodec::codecForMib(1015); // utf16
1530	} else if (ba.size() > 2
1531	&& (uchar)ba[0] == 0xef
1532	&& (uchar)ba[1] == 0xbb
1533	&& (uchar)ba[2] == 0xbf) {
1534	c = QTextCodec::codecForMib(106); // utf-8
1535	} else {
1536	QByteArray header = ba.left(512).toLower();
1537	if ((pos = header.indexOf("http-equiv=")) != -1) {
1538	pos = header.indexOf("charset=", pos) + int(strlen("charset="));
1539	if (pos != -1) {
1540	int pos2 = header.indexOf('\"', pos+1);
1541	QByteArray cs = header.mid(pos, pos2-pos);
1542	// qDebug("found charset: %s", cs.data());
1543	c = QTextCodec::codecForName(cs);
1544	}
1545	}
1546	}
1547	if (!c)
1548	c = defaultCodec;
1549
1550	return c;
1551	}
1552
1553	/*!
1554	\overload
1555
1556	If the codec cannot be detected, this overload returns a Latin-1 QTextCodec.
1557	*/
1558	QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
1559	{
1560	return codecForHtml(ba, QTextCodec::codecForMib(/Latin 1/ 4));
1561	}
1562
1563
1564	/*! \internal
1565	\since 4.3
1566	Determines whether the decoder encountered a failure while decoding the input. If
1567	an error was encountered, the produced result is undefined, and gets converted as according
1568	to the conversion flags.
1569	*/
1570	bool QTextDecoder::hasFailure() const
1571	{
1572	return state.invalidChars != 0;
1573	}
1574
1575	/*!
1576	\fn QTextCodec QTextCodec::codecForContent(const char str, int size)
1577
1578	This functionality is no longer provided by Qt. This
1579	compatibility function always returns a null pointer.
1580	*/
1581
1582	/*!
1583	\fn QTextCodec QTextCodec::codecForName(const char hint, int accuracy)
1584
1585	Use the codecForName(const QByteArray &) overload instead.
1586	*/
1587
1588	/*!
1589	\fn QTextCodec *QTextCodec::codecForIndex(int i)
1590
1591	Use availableCodecs() or availableMibs() instead and iterate
1592	through the resulting list.
1593	*/
1594
1595
1596	/*!
1597	\fn QByteArray QTextCodec::mimeName() const
1598
1599	Use name() instead.
1600	*/
1601
1602	QT_END_NAMESPACE
1603
1604	#endif // QT_NO_TEXTCODEC

Note: See TracBrowser for help on using the repository browser.

Download in other formats: