Context Navigation

source: trunk/src/corelib/codecs/qutfcodec.cpp@ 807

Last change on this file since 807 was 769, checked in by Dmitry A. Kuminov, 15 years ago
trunk: Merged in qt 4.6.3 sources from branches/vendor/nokia/qt.
File size: 19.2 KB

Line
1	/****************************************************************************
2	**
3	** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
4	** All rights reserved.
5	** Contact: Nokia Corporation ([email protected])
6	**
7	** This file is part of the QtCore module of the Qt Toolkit.
8	**
9	** $QT_BEGIN_LICENSE:LGPL$
10	** Commercial Usage
11	** Licensees holding valid Qt Commercial licenses may use this file in
12	** accordance with the Qt Commercial License Agreement provided with the
13	** Software or, alternatively, in accordance with the terms contained in
14	** a written agreement between you and Nokia.
15	**
16	** GNU Lesser General Public License Usage
17	** Alternatively, this file may be used under the terms of the GNU Lesser
18	** General Public License version 2.1 as published by the Free Software
19	** Foundation and appearing in the file LICENSE.LGPL included in the
20	** packaging of this file. Please review the following information to
21	** ensure the GNU Lesser General Public License version 2.1 requirements
22	** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23	**
24	** In addition, as a special exception, Nokia gives you certain additional
25	** rights. These rights are described in the Nokia Qt LGPL Exception
26	** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27	**
28	** GNU General Public License Usage
29	** Alternatively, this file may be used under the terms of the GNU
30	** General Public License version 3.0 as published by the Free Software
31	** Foundation and appearing in the file LICENSE.GPL included in the
32	** packaging of this file. Please review the following information to
33	** ensure the GNU General Public License version 3.0 requirements will be
34	** met: http://www.gnu.org/copyleft/gpl.html.
35	**
36	** If you have questions regarding the use of this file, please contact
37	** Nokia at [email protected].
38	** $QT_END_LICENSE$
39	**
40	****************************************************************************/
41
42	#include "qutfcodec_p.h"
43	#include "qlist.h"
44	#include "qendian.h"
45	#include "qchar.h"
46
47	QT_BEGIN_NAMESPACE
48
49	enum { Endian = 0, Data = 1 };
50
51	QByteArray QUtf8::convertFromUnicode(const QChar uc, int len, QTextCodec::ConverterState state)
52	{
53	uchar replacement = '?';
54	int rlen = 3*len;
55	int surrogate_high = -1;
56	if (state) {
57	if (state->flags & QTextCodec::ConvertInvalidToNull)
58	replacement = 0;
59	if (!(state->flags & QTextCodec::IgnoreHeader))
60	rlen += 3;
61	if (state->remainingChars)
62	surrogate_high = state->state_data[0];
63	}
64
65	QByteArray rstr;
66	rstr.resize(rlen);
67	uchar* cursor = (uchar*)rstr.data();
68	const QChar *ch = uc;
69	int invalid = 0;
70	if (state && !(state->flags & QTextCodec::IgnoreHeader)) {
71	*cursor++ = 0xef;
72	*cursor++ = 0xbb;
73	*cursor++ = 0xbf;
74	}
75
76	const QChar *end = ch + len;
77	while (ch < end) {
78	uint u = ch->unicode();
79	if (surrogate_high >= 0) {
80	if (u >= 0xdc00 && u < 0xe000) {
81	u = (surrogate_high - 0xd800)*0x400 + (u - 0xdc00) + 0x10000;
82	surrogate_high = -1;
83	} else {
84	// high surrogate without low
85	*cursor = replacement;
86	++ch;
87	++invalid;
88	surrogate_high = -1;
89	continue;
90	}
91	} else if (u >= 0xdc00 && u < 0xe000) {
92	// low surrogate without high
93	*cursor = replacement;
94	++ch;
95	++invalid;
96	continue;
97	} else if (u >= 0xd800 && u < 0xdc00) {
98	surrogate_high = u;
99	++ch;
100	continue;
101	}
102
103	if (u < 0x80) {
104	*cursor++ = (uchar)u;
105	} else {
106	if (u < 0x0800) {
107	*cursor++ = 0xc0 \| ((uchar) (u >> 6));
108	} else {
109	if (u > 0xffff) {
110	// see QString::fromUtf8() and QString::utf8() for explanations
111	if (u > 0x10fe00 && u < 0x10ff00) {
112	*cursor++ = (u - 0x10fe00);
113	++ch;
114	continue;
115	} else {
116	*cursor++ = 0xf0 \| ((uchar) (u >> 18));
117	*cursor++ = 0x80 \| (((uchar) (u >> 12)) & 0x3f);
118	}
119	} else {
120	*cursor++ = 0xe0 \| (((uchar) (u >> 12)) & 0x3f);
121	}
122	*cursor++ = 0x80 \| (((uchar) (u >> 6)) & 0x3f);
123	}
124	*cursor++ = 0x80 \| ((uchar) (u&0x3f));
125	}
126	++ch;
127	}
128
129	rstr.resize(cursor - (const uchar*)rstr.constData());
130	if (state) {
131	state->invalidChars += invalid;
132	state->flags \|= QTextCodec::IgnoreHeader;
133	state->remainingChars = 0;
134	if (surrogate_high >= 0) {
135	state->remainingChars = 1;
136	state->state_data[0] = surrogate_high;
137	}
138	}
139	return rstr;
140	}
141
142	QString QUtf8::convertToUnicode(const char chars, int len, QTextCodec::ConverterState state)
143	{
144	bool headerdone = false;
145	ushort replacement = QChar::ReplacementCharacter;
146	int need = 0;
147	int error = -1;
148	uint uc = 0;
149	uint min_uc = 0;
150	if (state) {
151	if (state->flags & QTextCodec::IgnoreHeader)
152	headerdone = true;
153	if (state->flags & QTextCodec::ConvertInvalidToNull)
154	replacement = QChar::Null;
155	need = state->remainingChars;
156	if (need) {
157	uc = state->state_data[0];
158	min_uc = state->state_data[1];
159	}
160	}
161	if (!headerdone && len > 3
162	&& (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) {
163	// starts with a byte order mark
164	chars += 3;
165	len -= 3;
166	headerdone = true;
167	}
168
169	QString result(need + len + 1, Qt::Uninitialized); // worst case
170	ushort qch = (ushort )result.unicode();
171	uchar ch;
172	int invalid = 0;
173
174	for (int i = 0; i < len; ++i) {
175	ch = chars[i];
176	if (need) {
177	if ((ch&0xc0) == 0x80) {
178	uc = (uc << 6) \| (ch & 0x3f);
179	--need;
180	if (!need) {
181	// utf-8 bom composes into 0xfeff code point
182	if (!headerdone && uc == 0xfeff) {
183	// dont do anything, just skip the BOM
184	} else if (uc > 0xffff && uc < 0x110000) {
185	// surrogate pair
186	Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
187	*qch++ = QChar::highSurrogate(uc);
188	*qch++ = QChar::lowSurrogate(uc);
189	} else if ((uc < min_uc) \|\| (uc >= 0xd800 && uc <= 0xdfff) \|\| (uc >= 0xfffe)) {
190	// error: overlong sequence, UTF16 surrogate or BOM
191	*qch++ = replacement;
192	++invalid;
193	} else {
194	*qch++ = uc;
195	}
196	headerdone = true;
197	}
198	} else {
199	// error
200	i = error;
201	*qch++ = replacement;
202	++invalid;
203	need = 0;
204	headerdone = true;
205	}
206	} else {
207	if (ch < 128) {
208	*qch++ = ushort(ch);
209	headerdone = true;
210	} else if ((ch & 0xe0) == 0xc0) {
211	uc = ch & 0x1f;
212	need = 1;
213	error = i;
214	min_uc = 0x80;
215	headerdone = true;
216	} else if ((ch & 0xf0) == 0xe0) {
217	uc = ch & 0x0f;
218	need = 2;
219	error = i;
220	min_uc = 0x800;
221	} else if ((ch&0xf8) == 0xf0) {
222	uc = ch & 0x07;
223	need = 3;
224	error = i;
225	min_uc = 0x10000;
226	headerdone = true;
227	} else {
228	// error
229	*qch++ = replacement;
230	++invalid;
231	headerdone = true;
232	}
233	}
234	}
235	if (!state && need > 0) {
236	// unterminated UTF sequence
237	for (int i = error; i < len; ++i) {
238	*qch++ = replacement;
239	++invalid;
240	}
241	}
242	result.truncate(qch - (ushort *)result.unicode());
243	if (state) {
244	state->invalidChars += invalid;
245	state->remainingChars = need;
246	if (headerdone)
247	state->flags \|= QTextCodec::IgnoreHeader;
248	state->state_data[0] = need ? uc : 0;
249	state->state_data[1] = need ? min_uc : 0;
250	}
251	return result;
252	}
253
254	QByteArray QUtf16::convertFromUnicode(const QChar uc, int len, QTextCodec::ConverterState state, DataEndianness e)
255	{
256	DataEndianness endian = e;
257	int length = 2*len;
258	if (!state \|\| (!(state->flags & QTextCodec::IgnoreHeader))) {
259	length += 2;
260	}
261	if (e == DetectEndianness) {
262	endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
263	}
264
265	QByteArray d;
266	d.resize(length);
267	char *data = d.data();
268	if (!state \|\| !(state->flags & QTextCodec::IgnoreHeader)) {
269	QChar bom(QChar::ByteOrderMark);
270	if (endian == BigEndianness) {
271	data[0] = bom.row();
272	data[1] = bom.cell();
273	} else {
274	data[0] = bom.cell();
275	data[1] = bom.row();
276	}
277	data += 2;
278	}
279	if (endian == BigEndianness) {
280	for (int i = 0; i < len; ++i) {
281	*(data++) = uc[i].row();
282	*(data++) = uc[i].cell();
283	}
284	} else {
285	for (int i = 0; i < len; ++i) {
286	*(data++) = uc[i].cell();
287	*(data++) = uc[i].row();
288	}
289	}
290
291	if (state) {
292	state->remainingChars = 0;
293	state->flags \|= QTextCodec::IgnoreHeader;
294	}
295	return d;
296	}
297
298	QString QUtf16::convertToUnicode(const char chars, int len, QTextCodec::ConverterState state, DataEndianness e)
299	{
300	DataEndianness endian = e;
301	bool half = false;
302	uchar buf = 0;
303	bool headerdone = false;
304	if (state) {
305	headerdone = state->flags & QTextCodec::IgnoreHeader;
306	if (endian == DetectEndianness)
307	endian = (DataEndianness)state->state_data[Endian];
308	if (state->remainingChars) {
309	half = true;
310	buf = state->state_data[Data];
311	}
312	}
313	if (headerdone && endian == DetectEndianness)
314	endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
315
316	QString result(len, Qt::Uninitialized); // worst case
317	QChar qch = (QChar )result.unicode();
318	while (len--) {
319	if (half) {
320	QChar ch;
321	if (endian == LittleEndianness) {
322	ch.setRow(*chars++);
323	ch.setCell(buf);
324	} else {
325	ch.setRow(buf);
326	ch.setCell(*chars++);
327	}
328	if (!headerdone) {
329	headerdone = true;
330	if (endian == DetectEndianness) {
331	if (ch == QChar::ByteOrderSwapped) {
332	endian = LittleEndianness;
333	} else if (ch == QChar::ByteOrderMark) {
334	endian = BigEndianness;
335	} else {
336	if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
337	endian = BigEndianness;
338	} else {
339	endian = LittleEndianness;
340	ch = QChar((ch.unicode() >> 8) \| ((ch.unicode() & 0xff) << 8));
341	}
342	*qch++ = ch;
343	}
344	} else if (ch != QChar::ByteOrderMark) {
345	*qch++ = ch;
346	}
347	} else {
348	*qch++ = ch;
349	}
350	half = false;
351	} else {
352	buf = *chars++;
353	half = true;
354	}
355	}
356	result.truncate(qch - result.unicode());
357
358	if (state) {
359	if (headerdone)
360	state->flags \|= QTextCodec::IgnoreHeader;
361	state->state_data[Endian] = endian;
362	if (half) {
363	state->remainingChars = 1;
364	state->state_data[Data] = buf;
365	} else {
366	state->remainingChars = 0;
367	state->state_data[Data] = 0;
368	}
369	}
370	return result;
371	}
372
373	QByteArray QUtf32::convertFromUnicode(const QChar uc, int len, QTextCodec::ConverterState state, DataEndianness e)
374	{
375	DataEndianness endian = e;
376	int length = 4*len;
377	if (!state \|\| (!(state->flags & QTextCodec::IgnoreHeader))) {
378	length += 4;
379	}
380	if (e == DetectEndianness) {
381	endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
382	}
383
384	QByteArray d(length, Qt::Uninitialized);
385	char *data = d.data();
386	if (!state \|\| !(state->flags & QTextCodec::IgnoreHeader)) {
387	if (endian == BigEndianness) {
388	data[0] = 0;
389	data[1] = 0;
390	data[2] = (char)0xfe;
391	data[3] = (char)0xff;
392	} else {
393	data[0] = (char)0xff;
394	data[1] = (char)0xfe;
395	data[2] = 0;
396	data[3] = 0;
397	}
398	data += 4;
399	}
400	if (endian == BigEndianness) {
401	for (int i = 0; i < len; ++i) {
402	uint cp = uc[i].unicode();
403	if (uc[i].isHighSurrogate() && i < len - 1)
404	cp = QChar::surrogateToUcs4(cp, uc[++i].unicode());
405	*(data++) = cp >> 24;
406	*(data++) = (cp >> 16) & 0xff;
407	*(data++) = (cp >> 8) & 0xff;
408	*(data++) = cp & 0xff;
409	}
410	} else {
411	for (int i = 0; i < len; ++i) {
412	uint cp = uc[i].unicode();
413	if (uc[i].isHighSurrogate() && i < len - 1)
414	cp = QChar::surrogateToUcs4(cp, uc[++i].unicode());
415	*(data++) = cp & 0xff;
416	*(data++) = (cp >> 8) & 0xff;
417	*(data++) = (cp >> 16) & 0xff;
418	*(data++) = cp >> 24;
419	}
420	}
421
422	if (state) {
423	state->remainingChars = 0;
424	state->flags \|= QTextCodec::IgnoreHeader;
425	}
426	return d;
427	}
428
429	QString QUtf32::convertToUnicode(const char chars, int len, QTextCodec::ConverterState state, DataEndianness e)
430	{
431	DataEndianness endian = e;
432	uchar tuple[4];
433	int num = 0;
434	bool headerdone = false;
435	if (state) {
436	headerdone = state->flags & QTextCodec::IgnoreHeader;
437	if (endian == DetectEndianness) {
438	endian = (DataEndianness)state->state_data[Endian];
439	}
440	num = state->remainingChars;
441	memcpy(tuple, &state->state_data[Data], 4);
442	}
443	if (headerdone && endian == DetectEndianness)
444	endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
445
446	QString result;
447	result.resize((num + len) >> 2 << 1); // worst case
448	QChar qch = (QChar )result.unicode();
449
450	const char *end = chars + len;
451	while (chars < end) {
452	tuple[num++] = *chars++;
453	if (num == 4) {
454	if (!headerdone) {
455	if (endian == DetectEndianness) {
456	if (endian == DetectEndianness) {
457	if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0 && endian != BigEndianness) {
458	endian = LittleEndianness;
459	num = 0;
460	continue;
461	} else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff && endian != LittleEndianness) {
462	endian = BigEndianness;
463	num = 0;
464	continue;
465	} else if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
466	endian = BigEndianness;
467	} else {
468	endian = LittleEndianness;
469	}
470	}
471	} else if (((endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple)) == QChar::ByteOrderMark) {
472	num = 0;
473	continue;
474	}
475	}
476	uint code = (endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple);
477	if (code >= 0x10000) {
478	*qch++ = QChar::highSurrogate(code);
479	*qch++ = QChar::lowSurrogate(code);
480	} else {
481	*qch++ = code;
482	}
483	num = 0;
484	}
485	}
486	result.truncate(qch - result.unicode());
487
488	if (state) {
489	if (headerdone)
490	state->flags \|= QTextCodec::IgnoreHeader;
491	state->state_data[Endian] = endian;
492	state->remainingChars = num;
493	memcpy(&state->state_data[Data], tuple, 4);
494	}
495	return result;
496	}
497
498
499	#ifndef QT_NO_TEXTCODEC
500
501	QUtf8Codec::~QUtf8Codec()
502	{
503	}
504
505	QByteArray QUtf8Codec::convertFromUnicode(const QChar uc, int len, ConverterState state) const
506	{
507	return QUtf8::convertFromUnicode(uc, len, state);
508	}
509
510	void QUtf8Codec::convertToUnicode(QString target, const char chars, int len, ConverterState *state) const
511	{
512	*target += QUtf8::convertToUnicode(chars, len, state);
513	}
514
515	QString QUtf8Codec::convertToUnicode(const char chars, int len, ConverterState state) const
516	{
517	return QUtf8::convertToUnicode(chars, len, state);
518	}
519
520	QByteArray QUtf8Codec::name() const
521	{
522	return "UTF-8";
523	}
524
525	int QUtf8Codec::mibEnum() const
526	{
527	return 106;
528	}
529
530	QUtf16Codec::~QUtf16Codec()
531	{
532	}
533
534	QByteArray QUtf16Codec::convertFromUnicode(const QChar uc, int len, ConverterState state) const
535	{
536	return QUtf16::convertFromUnicode(uc, len, state, e);
537	}
538
539	QString QUtf16Codec::convertToUnicode(const char chars, int len, ConverterState state) const
540	{
541	return QUtf16::convertToUnicode(chars, len, state, e);
542	}
543
544	int QUtf16Codec::mibEnum() const
545	{
546	return 1015;
547	}
548
549	QByteArray QUtf16Codec::name() const
550	{
551	return "UTF-16";
552	}
553
554	QList<QByteArray> QUtf16Codec::aliases() const
555	{
556	return QList<QByteArray>();
557	}
558
559	int QUtf16BECodec::mibEnum() const
560	{
561	return 1013;
562	}
563
564	QByteArray QUtf16BECodec::name() const
565	{
566	return "UTF-16BE";
567	}
568
569	QList<QByteArray> QUtf16BECodec::aliases() const
570	{
571	QList<QByteArray> list;
572	return list;
573	}
574
575	int QUtf16LECodec::mibEnum() const
576	{
577	return 1014;
578	}
579
580	QByteArray QUtf16LECodec::name() const
581	{
582	return "UTF-16LE";
583	}
584
585	QList<QByteArray> QUtf16LECodec::aliases() const
586	{
587	QList<QByteArray> list;
588	return list;
589	}
590
591	QUtf32Codec::~QUtf32Codec()
592	{
593	}
594
595	QByteArray QUtf32Codec::convertFromUnicode(const QChar uc, int len, ConverterState state) const
596	{
597	return QUtf32::convertFromUnicode(uc, len, state, e);
598	}
599
600	QString QUtf32Codec::convertToUnicode(const char chars, int len, ConverterState state) const
601	{
602	return QUtf32::convertToUnicode(chars, len, state, e);
603	}
604
605	int QUtf32Codec::mibEnum() const
606	{
607	return 1017;
608	}
609
610	QByteArray QUtf32Codec::name() const
611	{
612	return "UTF-32";
613	}
614
615	QList<QByteArray> QUtf32Codec::aliases() const
616	{
617	QList<QByteArray> list;
618	return list;
619	}
620
621	int QUtf32BECodec::mibEnum() const
622	{
623	return 1018;
624	}
625
626	QByteArray QUtf32BECodec::name() const
627	{
628	return "UTF-32BE";
629	}
630
631	QList<QByteArray> QUtf32BECodec::aliases() const
632	{
633	QList<QByteArray> list;
634	return list;
635	}
636
637	int QUtf32LECodec::mibEnum() const
638	{
639	return 1019;
640	}
641
642	QByteArray QUtf32LECodec::name() const
643	{
644	return "UTF-32LE";
645	}
646
647	QList<QByteArray> QUtf32LECodec::aliases() const
648	{
649	QList<QByteArray> list;
650	return list;
651	}
652
653	#endif //QT_NO_TEXTCODEC
654
655	QT_END_NAMESPACE

Note: See TracBrowser for help on using the repository browser.

Download in other formats: