Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

qutfcodec.cpp@ 467

Last change on this file since 467 was 2, checked in by Dmitry A. Kuminov, 16 years ago
Initially imported qt-all-opensource-src-4.5.1 from Trolltech.
File size: 17.7 KB

Line
1	/****************************************************************************
2	**
3	** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4	** Contact: Qt Software Information ([email protected])
5	**
6	** This file is part of the QtCore module of the Qt Toolkit.
7	**
8	** $QT_BEGIN_LICENSE:LGPL$
9	** Commercial Usage
10	** Licensees holding valid Qt Commercial licenses may use this file in
11	** accordance with the Qt Commercial License Agreement provided with the
12	** Software or, alternatively, in accordance with the terms contained in
13	** a written agreement between you and Nokia.
14	**
15	** GNU Lesser General Public License Usage
16	** Alternatively, this file may be used under the terms of the GNU Lesser
17	** General Public License version 2.1 as published by the Free Software
18	** Foundation and appearing in the file LICENSE.LGPL included in the
19	** packaging of this file. Please review the following information to
20	** ensure the GNU Lesser General Public License version 2.1 requirements
21	** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22	**
23	** In addition, as a special exception, Nokia gives you certain
24	** additional rights. These rights are described in the Nokia Qt LGPL
25	** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26	** package.
27	**
28	** GNU General Public License Usage
29	** Alternatively, this file may be used under the terms of the GNU
30	** General Public License version 3.0 as published by the Free Software
31	** Foundation and appearing in the file LICENSE.GPL included in the
32	** packaging of this file. Please review the following information to
33	** ensure the GNU General Public License version 3.0 requirements will be
34	** met: http://www.gnu.org/copyleft/gpl.html.
35	**
36	** If you are unsure which license is appropriate for your use, please
37	** contact the sales department at [email protected].
38	** $QT_END_LICENSE$
39	**
40	****************************************************************************/
41
42	#include "qutfcodec_p.h"
43	#include "qlist.h"
44	#include "qendian.h"
45	#include "qchar.h"
46
47	#ifndef QT_NO_TEXTCODEC
48
49	QT_BEGIN_NAMESPACE
50
51	QUtf8Codec::~QUtf8Codec()
52	{
53	}
54
55	QByteArray QUtf8Codec::convertFromUnicode(const QChar uc, int len, ConverterState state) const
56	{
57	uchar replacement = '?';
58	int rlen = 3*len;
59	int surrogate_high = -1;
60	if (state) {
61	if (state->flags & ConvertInvalidToNull)
62	replacement = 0;
63	if (!(state->flags & IgnoreHeader))
64	rlen += 3;
65	if (state->remainingChars)
66	surrogate_high = state->state_data[0];
67	}
68
69	QByteArray rstr;
70	rstr.resize(rlen);
71	uchar* cursor = (uchar*)rstr.data();
72	const QChar *ch = uc;
73	int invalid = 0;
74	if (state && !(state->flags & IgnoreHeader)) {
75	*cursor++ = 0xef;
76	*cursor++ = 0xbb;
77	*cursor++ = 0xbf;
78	}
79
80	const QChar *end = ch + len;
81	while (ch < end) {
82	uint u = ch->unicode();
83	if (surrogate_high >= 0) {
84	if (u >= 0xdc00 && u < 0xe000) {
85	u = (surrogate_high - 0xd800)*0x400 + (u - 0xdc00) + 0x10000;
86	surrogate_high = -1;
87	} else {
88	// high surrogate without low
89	*cursor = replacement;
90	++ch;
91	++invalid;
92	surrogate_high = -1;
93	continue;
94	}
95	} else if (u >= 0xdc00 && u < 0xe000) {
96	// low surrogate without high
97	*cursor = replacement;
98	++ch;
99	++invalid;
100	continue;
101	} else if (u >= 0xd800 && u < 0xdc00) {
102	surrogate_high = u;
103	++ch;
104	continue;
105	}
106
107	if (u < 0x80) {
108	*cursor++ = (uchar)u;
109	} else {
110	if (u < 0x0800) {
111	*cursor++ = 0xc0 \| ((uchar) (u >> 6));
112	} else {
113	if (u > 0xffff) {
114	// see QString::fromUtf8() and QString::utf8() for explanations
115	if (u > 0x10fe00 && u < 0x10ff00) {
116	*cursor++ = (u - 0x10fe00);
117	++ch;
118	continue;
119	} else {
120	*cursor++ = 0xf0 \| ((uchar) (u >> 18));
121	*cursor++ = 0x80 \| (((uchar) (u >> 12)) & 0x3f);
122	}
123	} else {
124	*cursor++ = 0xe0 \| (((uchar) (u >> 12)) & 0x3f);
125	}
126	*cursor++ = 0x80 \| (((uchar) (u >> 6)) & 0x3f);
127	}
128	*cursor++ = 0x80 \| ((uchar) (u&0x3f));
129	}
130	++ch;
131	}
132
133	rstr.resize(cursor - (const uchar*)rstr.constData());
134	if (state) {
135	state->invalidChars += invalid;
136	state->flags \|= IgnoreHeader;
137	state->remainingChars = 0;
138	if (surrogate_high >= 0) {
139	state->remainingChars = 1;
140	state->state_data[0] = surrogate_high;
141	}
142	}
143	return rstr;
144	}
145
146	void QUtf8Codec::convertToUnicode(QString target, const char chars, int len, ConverterState *state) const
147	{
148	bool headerdone = false;
149	QChar replacement = QChar::ReplacementCharacter;
150	int need = 0;
151	int error = -1;
152	uint uc = 0;
153	uint min_uc = 0;
154	if (state) {
155	if (state->flags & IgnoreHeader)
156	headerdone = true;
157	if (state->flags & ConvertInvalidToNull)
158	replacement = QChar::Null;
159	need = state->remainingChars;
160	if (need) {
161	uc = state->state_data[0];
162	min_uc = state->state_data[1];
163	}
164	}
165	if (!headerdone && len > 3
166	&& (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) {
167	// starts with a byte order mark
168	chars += 3;
169	len -= 3;
170	headerdone = true;
171	}
172
173	int originalLength = target->length();
174	QString &result = *target;
175	result.resize(originalLength + len + 1); // worst case
176	QChar *qch = result.data() + originalLength;
177	uchar ch;
178	int invalid = 0;
179
180	for (int i=0; i<len; i++) {
181	ch = chars[i];
182	if (need) {
183	if ((ch&0xc0) == 0x80) {
184	uc = (uc << 6) \| (ch & 0x3f);
185	need--;
186	if (!need) {
187	if (uc > 0xffff && uc < 0x110000) {
188	// surrogate pair
189	uc -= 0x10000;
190	unsigned short high = uc/0x400 + 0xd800;
191	unsigned short low = uc%0x400 + 0xdc00;
192
193	// resize if necessary
194	long where = qch - result.unicode();
195	if (where + 2 >= result.length()) {
196	result.resize(where + 2);
197	qch = result.data() + where;
198	}
199
200	*qch++ = QChar(high);
201	*qch++ = QChar(low);
202	} else if ((uc < min_uc) \|\| (uc >= 0xd800 && uc <= 0xdfff) \|\| (uc >= 0xfffe)) {
203	// error
204	*qch++ = replacement;
205	++invalid;
206	} else {
207	*qch++ = uc;
208	}
209	}
210	} else {
211	// error
212	i = error;
213	*qch++ = replacement;
214	++invalid;
215	need = 0;
216	}
217	} else {
218	if (ch < 128) {
219	*qch++ = QLatin1Char(ch);
220	} else if ((ch & 0xe0) == 0xc0) {
221	uc = ch & 0x1f;
222	need = 1;
223	error = i;
224	min_uc = 0x80;
225	} else if ((ch & 0xf0) == 0xe0) {
226	uc = ch & 0x0f;
227	need = 2;
228	error = i;
229	min_uc = 0x800;
230	} else if ((ch&0xf8) == 0xf0) {
231	uc = ch & 0x07;
232	need = 3;
233	error = i;
234	min_uc = 0x10000;
235	} else {
236	// error
237	*qch++ = replacement;
238	++invalid;
239	}
240	}
241	}
242	if (!state && need > 0) {
243	// unterminated UTF sequence
244	for (int i = error; i < len; ++i) {
245	*qch++ = replacement;
246	++invalid;
247	}
248	}
249	result.truncate(qch - result.unicode());
250	if (state) {
251	state->invalidChars += invalid;
252	state->remainingChars = need;
253	if (headerdone)
254	state->flags \|= IgnoreHeader;
255	state->state_data[0] = need ? uc : 0;
256	state->state_data[1] = need ? min_uc : 0;
257	}
258	}
259
260	QString QUtf8Codec::convertToUnicode(const char chars, int len, ConverterState state) const
261	{
262	QString result;
263	convertToUnicode(&result, chars, len, state);
264	return result;
265	}
266
267	QByteArray QUtf8Codec::name() const
268	{
269	return "UTF-8";
270	}
271
272	int QUtf8Codec::mibEnum() const
273	{
274	return 106;
275	}
276
277	enum { Endian = 0, Data = 1 };
278
279	QUtf16Codec::~QUtf16Codec()
280	{
281	}
282
283	QByteArray QUtf16Codec::convertFromUnicode(const QChar uc, int len, ConverterState state) const
284	{
285	Endianness endian = e;
286	int length = 2*len;
287	if (!state \|\| (!(state->flags & IgnoreHeader))) {
288	length += 2;
289	}
290	if (e == Detect) {
291	endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE;
292	}
293
294	QByteArray d;
295	d.resize(length);
296	char *data = d.data();
297	if (!state \|\| !(state->flags & IgnoreHeader)) {
298	QChar bom(QChar::ByteOrderMark);
299	if (endian == BE) {
300	data[0] = bom.row();
301	data[1] = bom.cell();
302	} else {
303	data[0] = bom.cell();
304	data[1] = bom.row();
305	}
306	data += 2;
307	}
308	if (endian == BE) {
309	for (int i = 0; i < len; ++i) {
310	*(data++) = uc[i].row();
311	*(data++) = uc[i].cell();
312	}
313	} else {
314	for (int i = 0; i < len; ++i) {
315	*(data++) = uc[i].cell();
316	*(data++) = uc[i].row();
317	}
318	}
319
320	if (state) {
321	state->remainingChars = 0;
322	state->flags \|= IgnoreHeader;
323	}
324	return d;
325	}
326
327	QString QUtf16Codec::convertToUnicode(const char chars, int len, ConverterState state) const
328	{
329	Endianness endian = e;
330	bool half = false;
331	uchar buf = 0;
332	bool headerdone = false;
333	if (state) {
334	headerdone = state->flags & IgnoreHeader;
335	if (endian == Detect)
336	endian = (Endianness)state->state_data[Endian];
337	if (state->remainingChars) {
338	half = true;
339	buf = state->state_data[Data];
340	}
341	}
342	if (headerdone && endian == Detect)
343	endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE;
344
345	QString result;
346	result.resize(len); // worst case
347	QChar qch = (QChar )result.unicode();
348	while (len--) {
349	if (half) {
350	QChar ch;
351	if (endian == LE) {
352	ch.setRow(*chars++);
353	ch.setCell(buf);
354	} else {
355	ch.setRow(buf);
356	ch.setCell(*chars++);
357	}
358	if (!headerdone) {
359	if (endian == Detect) {
360	if (ch == QChar::ByteOrderSwapped && endian != BE) {
361	endian = LE;
362	} else if (ch == QChar::ByteOrderMark && endian != LE) {
363	// ignore BOM
364	endian = BE;
365	} else {
366	if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
367	endian = BE;
368	} else {
369	endian = LE;
370	ch = QChar((ch.unicode() >> 8) \| ((ch.unicode() & 0xff) << 8));
371	}
372	*qch++ = ch;
373	}
374	} else if (ch != QChar::ByteOrderMark) {
375	*qch++ = ch;
376	}
377	headerdone = true;
378	} else {
379	*qch++ = ch;
380	}
381	half = false;
382	} else {
383	buf = *chars++;
384	half = true;
385	}
386	}
387	result.truncate(qch - result.unicode());
388
389	if (state) {
390	if (endian != Detect)
391	state->flags \|= IgnoreHeader;
392	state->state_data[Endian] = endian;
393	if (half) {
394	state->remainingChars = 1;
395	state->state_data[Data] = buf;
396	} else {
397	state->remainingChars = 0;
398	state->state_data[Data] = 0;
399	}
400	}
401	return result;
402	}
403
404	int QUtf16Codec::mibEnum() const
405	{
406	return 1015;
407	}
408
409	QByteArray QUtf16Codec::name() const
410	{
411	return "UTF-16";
412	}
413
414	QList<QByteArray> QUtf16Codec::aliases() const
415	{
416	QList<QByteArray> list;
417	list << "ISO-10646-UCS-2";
418	return list;
419	}
420
421	int QUtf16BECodec::mibEnum() const
422	{
423	return 1013;
424	}
425
426	QByteArray QUtf16BECodec::name() const
427	{
428	return "UTF-16BE";
429	}
430
431	QList<QByteArray> QUtf16BECodec::aliases() const
432	{
433	QList<QByteArray> list;
434	return list;
435	}
436
437	int QUtf16LECodec::mibEnum() const
438	{
439	return 1014;
440	}
441
442	QByteArray QUtf16LECodec::name() const
443	{
444	return "UTF-16LE";
445	}
446
447	QList<QByteArray> QUtf16LECodec::aliases() const
448	{
449	QList<QByteArray> list;
450	return list;
451	}
452
453	QUtf32Codec::~QUtf32Codec()
454	{
455	}
456
457	QByteArray QUtf32Codec::convertFromUnicode(const QChar uc, int len, ConverterState state) const
458	{
459	Endianness endian = e;
460	int length = 4*len;
461	if (!state \|\| (!(state->flags & IgnoreHeader))) {
462	length += 4;
463	}
464	if (e == Detect) {
465	endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE;
466	}
467
468	QByteArray d;
469	d.resize(length);
470	char *data = d.data();
471	if (!state \|\| !(state->flags & IgnoreHeader)) {
472	if (endian == BE) {
473	data[0] = 0;
474	data[1] = 0;
475	data[2] = (char)0xfe;
476	data[3] = (char)0xff;
477	} else {
478	data[0] = (char)0xff;
479	data[1] = (char)0xfe;
480	data[2] = 0;
481	data[3] = 0;
482	}
483	data += 2;
484	}
485	if (endian == BE) {
486	for (int i = 0; i < len; ++i) {
487	uint cp = uc[i].unicode();
488	if (uc[i].isHighSurrogate() && i < len - 1)
489	cp = QChar::surrogateToUcs4(cp, uc[++i].unicode());
490	*(data++) = cp >> 24;
491	*(data++) = (cp >> 16) & 0xff;
492	*(data++) = (cp >> 8) & 0xff;
493	*(data++) = cp & 0xff;
494	}
495	} else {
496	for (int i = 0; i < len; ++i) {
497	uint cp = uc[i].unicode();
498	if (uc[i].isHighSurrogate() && i < len - 1)
499	cp = QChar::surrogateToUcs4(cp, uc[++i].unicode());
500	*(data++) = cp & 0xff;
501	*(data++) = (cp >> 8) & 0xff;
502	*(data++) = (cp >> 16) & 0xff;
503	*(data++) = cp >> 24;
504	}
505	}
506
507	if (state) {
508	state->remainingChars = 0;
509	state->flags \|= IgnoreHeader;
510	}
511	return d;
512	}
513
514	QString QUtf32Codec::convertToUnicode(const char chars, int len, ConverterState state) const
515	{
516	Endianness endian = e;
517	uchar tuple[4];
518	int num = 0;
519	bool headerdone = false;
520	if (state) {
521	headerdone = state->flags & IgnoreHeader;
522	if (endian == Detect) {
523	endian = (Endianness)state->state_data[Endian];
524	}
525	num = state->remainingChars;
526	memcpy(tuple, &state->state_data[Data], 4);
527	}
528	if (headerdone && endian == Detect)
529	endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE;
530
531	QString result;
532	result.resize((num + len) >> 2 << 1); // worst case
533	QChar qch = (QChar )result.unicode();
534
535	const char *end = chars + len;
536	while (chars < end) {
537	tuple[num++] = *chars++;
538	if (num == 4) {
539	if (!headerdone) {
540	if (endian == Detect) {
541	if (endian == Detect) {
542	if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0 && endian != BE) {
543	endian = LE;
544	num = 0;
545	continue;
546	} else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff && endian != LE) {
547	endian = BE;
548	num = 0;
549	continue;
550	} else if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
551	endian = BE;
552	} else {
553	endian = LE;
554	}
555	}
556	} else if (((endian == BE) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple)) == QChar::ByteOrderMark) {
557	num = 0;
558	continue;
559	}
560	}
561	uint code = (endian == BE) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple);
562	if (code >= 0x10000) {
563	*qch++ = QChar::highSurrogate(code);
564	*qch++ = QChar::lowSurrogate(code);
565	} else {
566	*qch++ = code;
567	}
568	num = 0;
569	}
570	}
571	result.truncate(qch - result.unicode());
572
573	if (state) {
574	if (endian != Detect)
575	state->flags \|= IgnoreHeader;
576	state->state_data[Endian] = endian;
577	state->remainingChars = num;
578	memcpy(&state->state_data[Data], tuple, 4);
579	}
580	return result;
581	}
582
583	int QUtf32Codec::mibEnum() const
584	{
585	return 1017;
586	}
587
588	QByteArray QUtf32Codec::name() const
589	{
590	return "UTF-32";
591	}
592
593	QList<QByteArray> QUtf32Codec::aliases() const
594	{
595	QList<QByteArray> list;
596	return list;
597	}
598
599	int QUtf32BECodec::mibEnum() const
600	{
601	return 1018;
602	}
603
604	QByteArray QUtf32BECodec::name() const
605	{
606	return "UTF-32BE";
607	}
608
609	QList<QByteArray> QUtf32BECodec::aliases() const
610	{
611	QList<QByteArray> list;
612	return list;
613	}
614
615	int QUtf32LECodec::mibEnum() const
616	{
617	return 1019;
618	}
619
620	QByteArray QUtf32LECodec::name() const
621	{
622	return "UTF-32LE";
623	}
624
625	QList<QByteArray> QUtf32LECodec::aliases() const
626	{
627	QList<QByteArray> list;
628	return list;
629	}
630
631
632	QT_END_NAMESPACE
633
634	#endif //QT_NO_TEXTCODEC

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/src/corelib/codecs/qutfcodec.cpp@ 467

Download in other formats: