Context Navigation

source: trunk/src/corelib/tools/qchar.cpp@ 805

Last change on this file since 805 was 769, checked in by Dmitry A. Kuminov, 15 years ago
trunk: Merged in qt 4.6.3 sources from branches/vendor/nokia/qt.
File size: 41.8 KB

Line
1	/****************************************************************************
2	**
3	** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
4	** All rights reserved.
5	** Contact: Nokia Corporation ([email protected])
6	**
7	** This file is part of the QtCore module of the Qt Toolkit.
8	**
9	** $QT_BEGIN_LICENSE:LGPL$
10	** Commercial Usage
11	** Licensees holding valid Qt Commercial licenses may use this file in
12	** accordance with the Qt Commercial License Agreement provided with the
13	** Software or, alternatively, in accordance with the terms contained in
14	** a written agreement between you and Nokia.
15	**
16	** GNU Lesser General Public License Usage
17	** Alternatively, this file may be used under the terms of the GNU Lesser
18	** General Public License version 2.1 as published by the Free Software
19	** Foundation and appearing in the file LICENSE.LGPL included in the
20	** packaging of this file. Please review the following information to
21	** ensure the GNU Lesser General Public License version 2.1 requirements
22	** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23	**
24	** In addition, as a special exception, Nokia gives you certain additional
25	** rights. These rights are described in the Nokia Qt LGPL Exception
26	** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27	**
28	** GNU General Public License Usage
29	** Alternatively, this file may be used under the terms of the GNU
30	** General Public License version 3.0 as published by the Free Software
31	** Foundation and appearing in the file LICENSE.GPL included in the
32	** packaging of this file. Please review the following information to
33	** ensure the GNU General Public License version 3.0 requirements will be
34	** met: http://www.gnu.org/copyleft/gpl.html.
35	**
36	** If you have questions regarding the use of this file, please contact
37	** Nokia at [email protected].
38	** $QT_END_LICENSE$
39	**
40	****************************************************************************/
41
42	// Don't define it while compiling this module, or USERS of Qt will
43	// not be able to link.
44	#ifdef QT_NO_CAST_FROM_ASCII
45	#undef QT_NO_CAST_FROM_ASCII
46	#endif
47	#ifdef QT_NO_CAST_TO_ASCII
48	#undef QT_NO_CAST_TO_ASCII
49	#endif
50	#include "qchar.h"
51	#include "qdatastream.h"
52	#include "qtextcodec.h"
53
54	#include "qunicodetables_p.h"
55
56	#include "qunicodetables.cpp"
57
58	QT_BEGIN_NAMESPACE
59
60	#define LAST_UNICODE_CHAR 0x10ffff
61
62	#ifndef QT_NO_CODEC_FOR_C_STRINGS
63	#ifdef QT_NO_TEXTCODEC
64	#define QT_NO_CODEC_FOR_C_STRINGS
65	#endif
66	#endif
67
68	#define FLAG(x) (1 << (x))
69
70	/*! \class QLatin1Char
71	\brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
72
73	\ingroup string-processing
74
75	This class is only useful to avoid the codec for C strings business
76	in the QChar(ch) constructor. You can avoid it by writing
77	QChar(ch, 0).
78
79	\sa QChar, QLatin1String, QString
80	*/
81
82	/*!
83	\fn const char QLatin1Char::toLatin1() const
84
85	Converts a Latin-1 character to an 8-bit ASCII representation of
86	the character.
87	*/
88
89	/*!
90	\fn const ushort QLatin1Char::unicode() const
91
92	Converts a Latin-1 character to an 16-bit-encoded Unicode representation
93	of the character.
94	*/
95
96	/*!
97	\fn QLatin1Char::QLatin1Char(char c)
98
99	Constructs a Latin-1 character for \a c. This constructor should be
100	used when the encoding of the input character is known to be Latin-1.
101	*/
102
103	/*!
104	\class QChar
105	\brief The QChar class provides a 16-bit Unicode character.
106
107	\ingroup string-processing
108	\reentrant
109
110	In Qt, Unicode characters are 16-bit entities without any markup
111	or structure. This class represents such an entity. It is
112	lightweight, so it can be used everywhere. Most compilers treat
113	it like a \c{unsigned short}.
114
115	QChar provides a full complement of testing/classification
116	functions, converting to and from other formats, converting from
117	composed to decomposed Unicode, and trying to compare and
118	case-convert if you ask it to.
119
120	The classification functions include functions like those in the
121	standard C++ header \<cctype\> (formerly \<ctype.h\>), but
122	operating on the full range of Unicode characters. They all
123	return true if the character is a certain type of character;
124	otherwise they return false. These classification functions are
125	isNull() (returns true if the character is '\\0'), isPrint()
126	(true if the character is any sort of printable character,
127	including whitespace), isPunct() (any sort of punctation),
128	isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any
129	sort of numeric character, not just 0-9), isLetterOrNumber(), and
130	isDigit() (decimal digits). All of these are wrappers around
131	category() which return the Unicode-defined category of each
132	character.
133
134	QChar also provides direction(), which indicates the "natural"
135	writing direction of this character. The joining() function
136	indicates how the character joins with its neighbors (needed
137	mostly for Arabic) and finally hasMirrored(), which indicates
138	whether the character needs to be mirrored when it is printed in
139	its "unnatural" writing direction.
140
141	Composed Unicode characters (like \aring) can be converted to
142	decomposed Unicode ("a" followed by "ring above") by using
143	decomposition().
144
145	In Unicode, comparison is not necessarily possible and case
146	conversion is very difficult at best. Unicode, covering the
147	"entire" world, also includes most of the world's case and
148	sorting problems. operator==() and friends will do comparison
149	based purely on the numeric Unicode value (code point) of the
150	characters, and toUpper() and toLower() will do case changes when
151	the character has a well-defined uppercase/lowercase equivalent.
152	For locale-dependent comparisons, use
153	QString::localeAwareCompare().
154
155	The conversion functions include unicode() (to a scalar),
156	toLatin1() (to scalar, but converts all non-Latin-1 characters to
157	0), row() (gives the Unicode row), cell() (gives the Unicode
158	cell), digitValue() (gives the integer value of any of the
159	numerous digit characters), and a host of constructors.
160
161	QChar provides constructors and cast operators that make it easy
162	to convert to and from traditional 8-bit \c{char}s. If you
163	defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as
164	explained in the QString documentation, you will need to
165	explicitly call fromAscii() or fromLatin1(), or use QLatin1Char,
166	to construct a QChar from an 8-bit \c char, and you will need to
167	call toAscii() or toLatin1() to get the 8-bit value back.
168
169	\sa QString, Unicode, QLatin1Char
170	*/
171
172	/*!
173	\enum QChar::UnicodeVersion
174
175	Specifies which version of the \l{http://www.unicode.org/}{Unicode standard}
176	introduced a certain character.
177
178	\value Unicode_1_1 Version 1.1
179	\value Unicode_2_0 Version 2.0
180	\value Unicode_2_1_2 Version 2.1.2
181	\value Unicode_3_0 Version 3.0
182	\value Unicode_3_1 Version 3.1
183	\value Unicode_3_2 Version 3.2
184	\value Unicode_4_0 Version 4.0
185	\value Unicode_4_1 Version 4.1
186	\value Unicode_5_0 Version 5.0
187	\value Unicode_Unassigned The value is not assigned to any character
188	in version 5.0 of Unicode.
189
190	\sa unicodeVersion()
191	*/
192
193	/*!
194	\enum QChar::Category
195
196	This enum maps the Unicode character categories.
197
198	The following characters are normative in Unicode:
199
200	\value Mark_NonSpacing Unicode class name Mn
201
202	\value Mark_SpacingCombining Unicode class name Mc
203
204	\value Mark_Enclosing Unicode class name Me
205
206	\value Number_DecimalDigit Unicode class name Nd
207
208	\value Number_Letter Unicode class name Nl
209
210	\value Number_Other Unicode class name No
211
212	\value Separator_Space Unicode class name Zs
213
214	\value Separator_Line Unicode class name Zl
215
216	\value Separator_Paragraph Unicode class name Zp
217
218	\value Other_Control Unicode class name Cc
219
220	\value Other_Format Unicode class name Cf
221
222	\value Other_Surrogate Unicode class name Cs
223
224	\value Other_PrivateUse Unicode class name Co
225
226	\value Other_NotAssigned Unicode class name Cn
227
228
229	The following categories are informative in Unicode:
230
231	\value Letter_Uppercase Unicode class name Lu
232
233	\value Letter_Lowercase Unicode class name Ll
234
235	\value Letter_Titlecase Unicode class name Lt
236
237	\value Letter_Modifier Unicode class name Lm
238
239	\value Letter_Other Unicode class name Lo
240
241	\value Punctuation_Connector Unicode class name Pc
242
243	\value Punctuation_Dash Unicode class name Pd
244
245	\value Punctuation_Open Unicode class name Ps
246
247	\value Punctuation_Close Unicode class name Pe
248
249	\value Punctuation_InitialQuote Unicode class name Pi
250
251	\value Punctuation_FinalQuote Unicode class name Pf
252
253	\value Punctuation_Other Unicode class name Po
254
255	\value Symbol_Math Unicode class name Sm
256
257	\value Symbol_Currency Unicode class name Sc
258
259	\value Symbol_Modifier Unicode class name Sk
260
261	\value Symbol_Other Unicode class name So
262
263	\value NoCategory Qt cannot find an appropriate category for the character.
264
265	\omitvalue Punctuation_Dask
266
267	\sa category()
268	*/
269
270	/*!
271	\enum QChar::Direction
272
273	This enum type defines the Unicode direction attributes. See the
274	\l{http://www.unicode.org/}{Unicode Standard} for a description
275	of the values.
276
277	In order to conform to C/C++ naming conventions "Dir" is prepended
278	to the codes used in the Unicode Standard.
279
280	\value DirAL
281	\value DirAN
282	\value DirB
283	\value DirBN
284	\value DirCS
285	\value DirEN
286	\value DirES
287	\value DirET
288	\value DirL
289	\value DirLRE
290	\value DirLRO
291	\value DirNSM
292	\value DirON
293	\value DirPDF
294	\value DirR
295	\value DirRLE
296	\value DirRLO
297	\value DirS
298	\value DirWS
299
300	\sa direction()
301	*/
302
303	/*!
304	\enum QChar::Decomposition
305
306	This enum type defines the Unicode decomposition attributes. See
307	the \l{http://www.unicode.org/}{Unicode Standard} for a
308	description of the values.
309
310	\value NoDecomposition
311	\value Canonical
312	\value Circle
313	\value Compat
314	\value Final
315	\value Font
316	\value Fraction
317	\value Initial
318	\value Isolated
319	\value Medial
320	\value Narrow
321	\value NoBreak
322	\value Small
323	\value Square
324	\value Sub
325	\value Super
326	\value Vertical
327	\value Wide
328
329	\omitvalue Single
330
331	\sa decomposition()
332	*/
333
334	/*!
335	\enum QChar::Joining
336
337	This enum type defines the Unicode joining attributes. See the
338	\l{http://www.unicode.org/}{Unicode Standard} for a description
339	of the values.
340
341	\value Center
342	\value Dual
343	\value OtherJoining
344	\value Right
345
346	\sa joining()
347	*/
348
349	/*!
350	\enum QChar::CombiningClass
351
352	\internal
353
354	This enum type defines names for some of the Unicode combining
355	classes. See the \l{http://www.unicode.org/}{Unicode Standard}
356	for a description of the values.
357
358	\value Combining_Above
359	\value Combining_AboveAttached
360	\value Combining_AboveLeft
361	\value Combining_AboveLeftAttached
362	\value Combining_AboveRight
363	\value Combining_AboveRightAttached
364	\value Combining_Below
365	\value Combining_BelowAttached
366	\value Combining_BelowLeft
367	\value Combining_BelowLeftAttached
368	\value Combining_BelowRight
369	\value Combining_BelowRightAttached
370	\value Combining_DoubleAbove
371	\value Combining_DoubleBelow
372	\value Combining_IotaSubscript
373	\value Combining_Left
374	\value Combining_LeftAttached
375	\value Combining_Right
376	\value Combining_RightAttached
377	*/
378
379	/*!
380	\enum QChar::SpecialCharacter
381
382	\value Null A QChar with this value isNull().
383	\value Nbsp Non-breaking space.
384	\value ReplacementCharacter The character shown when a font has no glyph
385	for a certain codepoint. A special question mark character is often
386	used. Codecs use this codepoint when input data cannot be
387	represented in Unicode.
388	\value ObjectReplacementCharacter Used to represent an object such as an
389	image when such objects cannot be presented.
390	\value ByteOrderMark
391	\value ByteOrderSwapped
392	\value ParagraphSeparator
393	\value LineSeparator
394
395	\omitvalue null
396	\omitvalue replacement
397	\omitvalue byteOrderMark
398	\omitvalue byteOrderSwapped
399	\omitvalue nbsp
400	*/
401
402	/*!
403	\fn void QChar::setCell(uchar cell)
404	\internal
405	*/
406
407	/*!
408	\fn void QChar::setRow(uchar row)
409	\internal
410	*/
411
412	/*!
413	\fn QChar::QChar()
414
415	Constructs a null QChar ('\\0').
416
417	\sa isNull()
418	*/
419
420	/*!
421	\fn QChar::QChar(QLatin1Char ch)
422
423	Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
424	*/
425
426	/*!
427	\fn QChar::QChar(SpecialCharacter ch)
428
429	Constructs a QChar for the predefined character value \a ch.
430	*/
431
432	/*!
433	Constructs a QChar corresponding to ASCII/Latin-1 character \a
434	ch.
435	*/
436	QChar::QChar(char ch)
437	{
438	#ifndef QT_NO_CODEC_FOR_C_STRINGS
439	if (QTextCodec::codecForCStrings())
440	// #####
441	ucs = QTextCodec::codecForCStrings()->toUnicode(&ch, 1).at(0).unicode();
442	else
443	#endif
444	ucs = uchar(ch);
445	}
446
447	/*!
448	Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
449	*/
450	QChar::QChar(uchar ch)
451	{
452	#ifndef QT_NO_CODEC_FOR_C_STRINGS
453	if (QTextCodec::codecForCStrings()) {
454	// #####
455	char c = char(ch);
456	ucs = QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
457	} else
458	#endif
459	ucs = ch;
460	}
461
462	/*!
463	\fn QChar::QChar(uchar cell, uchar row)
464
465	Constructs a QChar for Unicode cell \a cell in row \a row.
466
467	\sa cell(), row()
468	*/
469
470	/*!
471	\fn QChar::QChar(ushort code)
472
473	Constructs a QChar for the character with Unicode code point \a
474	code.
475	*/
476
477
478	/*!
479	\fn QChar::QChar(short code)
480
481	Constructs a QChar for the character with Unicode code point \a
482	code.
483	*/
484
485
486	/*!
487	\fn QChar::QChar(uint code)
488
489	Constructs a QChar for the character with Unicode code point \a
490	code.
491	*/
492
493
494	/*!
495	\fn QChar::QChar(int code)
496
497	Constructs a QChar for the character with Unicode code point \a
498	code.
499	*/
500
501
502	/*!
503	\fn bool QChar::isNull() const
504
505	Returns true if the character is the Unicode character 0x0000
506	('\\0'); otherwise returns false.
507	*/
508
509	/*!
510	\fn uchar QChar::cell() const
511
512	Returns the cell (least significant byte) of the Unicode
513	character.
514
515	\sa row()
516	*/
517
518	/*!
519	\fn uchar QChar::row() const
520
521	Returns the row (most significant byte) of the Unicode character.
522
523	\sa cell()
524	*/
525
526	/*!
527	Returns true if the character is a printable character; otherwise
528	returns false. This is any character not of category Cc or Cn.
529
530	Note that this gives no indication of whether the character is
531	available in a particular font.
532	*/
533	bool QChar::isPrint() const
534	{
535	const int test = FLAG(Other_Control) \|
536	FLAG(Other_NotAssigned);
537	return !(FLAG(qGetProp(ucs)->category) & test);
538	}
539
540	/*!
541	Returns true if the character is a separator character
542	(Separator_* categories); otherwise returns false.
543	*/
544	bool QChar::isSpace() const
545	{
546	if(ucs >= 9 && ucs <=13)
547	return true;
548	const int test = FLAG(Separator_Space) \|
549	FLAG(Separator_Line) \|
550	FLAG(Separator_Paragraph);
551	return FLAG(qGetProp(ucs)->category) & test;
552	}
553
554	/*!
555	Returns true if the character is a mark (Mark_* categories);
556	otherwise returns false.
557
558	See QChar::Category for more information regarding marks.
559	*/
560	bool QChar::isMark() const
561	{
562	const int test = FLAG(Mark_NonSpacing) \|
563	FLAG(Mark_SpacingCombining) \|
564	FLAG(Mark_Enclosing);
565	return FLAG(qGetProp(ucs)->category) & test;
566	}
567
568	/*!
569	Returns true if the character is a punctuation mark (Punctuation_*
570	categories); otherwise returns false.
571	*/
572	bool QChar::isPunct() const
573	{
574	const int test = FLAG(Punctuation_Connector) \|
575	FLAG(Punctuation_Dash) \|
576	FLAG(Punctuation_Open) \|
577	FLAG(Punctuation_Close) \|
578	FLAG(Punctuation_InitialQuote) \|
579	FLAG(Punctuation_FinalQuote) \|
580	FLAG(Punctuation_Other);
581	return FLAG(qGetProp(ucs)->category) & test;
582	}
583
584	/*!
585	Returns true if the character is a letter (Letter_* categories);
586	otherwise returns false.
587	*/
588	bool QChar::isLetter() const
589	{
590	const int test = FLAG(Letter_Uppercase) \|
591	FLAG(Letter_Lowercase) \|
592	FLAG(Letter_Titlecase) \|
593	FLAG(Letter_Modifier) \|
594	FLAG(Letter_Other);
595	return FLAG(qGetProp(ucs)->category) & test;
596	}
597
598	/*!
599	Returns true if the character is a number (Number_* categories,
600	not just 0-9); otherwise returns false.
601
602	\sa isDigit()
603	*/
604	bool QChar::isNumber() const
605	{
606	const int test = FLAG(Number_DecimalDigit) \|
607	FLAG(Number_Letter) \|
608	FLAG(Number_Other);
609	return FLAG(qGetProp(ucs)->category) & test;
610	}
611
612	/*!
613	Returns true if the character is a letter or number (Letter_* or
614	Number_* categories); otherwise returns false.
615	*/
616	bool QChar::isLetterOrNumber() const
617	{
618	const int test = FLAG(Letter_Uppercase) \|
619	FLAG(Letter_Lowercase) \|
620	FLAG(Letter_Titlecase) \|
621	FLAG(Letter_Modifier) \|
622	FLAG(Letter_Other) \|
623	FLAG(Number_DecimalDigit) \|
624	FLAG(Number_Letter) \|
625	FLAG(Number_Other);
626	return FLAG(qGetProp(ucs)->category) & test;
627	}
628
629
630	/*!
631	Returns true if the character is a decimal digit
632	(Number_DecimalDigit); otherwise returns false.
633	*/
634	bool QChar::isDigit() const
635	{
636	return (qGetProp(ucs)->category == Number_DecimalDigit);
637	}
638
639
640	/*!
641	Returns true if the character is a symbol (Symbol_* categories);
642	otherwise returns false.
643	*/
644	bool QChar::isSymbol() const
645	{
646	const int test = FLAG(Symbol_Math) \|
647	FLAG(Symbol_Currency) \|
648	FLAG(Symbol_Modifier) \|
649	FLAG(Symbol_Other);
650	return FLAG(qGetProp(ucs)->category) & test;
651	}
652
653	/*!
654	\fn bool QChar::isHighSurrogate() const
655
656	Returns true if the QChar is the high part of a utf16 surrogate
657	(ie. if its code point is between 0xd800 and 0xdbff).
658	*/
659
660	/*!
661	\fn bool QChar::isLowSurrogate() const
662
663	Returns true if the QChar is the low part of a utf16 surrogate
664	(ie. if its code point is between 0xdc00 and 0xdfff).
665	*/
666
667	/*!
668	\fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
669
670	Converts a UTF16 surrogate pair with the given \a high and \a low values
671	to its UCS-4 code point.
672	*/
673
674	/*!
675	\fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
676
677	Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code
678	point.
679	*/
680
681	/*!
682	\fn static ushort QChar::highSurrogate(uint ucs4)
683
684	Returns the high surrogate value of a ucs4 code point.
685	The returned result is undefined if \a ucs4 is smaller than 0x10000.
686	*/
687
688	/*!
689	\fn static ushort QChar::lowSurrogate(uint ucs4)
690
691	Returns the low surrogate value of a ucs4 code point.
692	The returned result is undefined if \a ucs4 is smaller than 0x10000.
693	*/
694
695	/*!
696	Returns the numeric value of the digit, or -1 if the character is
697	not a digit.
698	*/
699	int QChar::digitValue() const
700	{
701	return qGetProp(ucs)->digitValue;
702	}
703
704	/*!
705	\overload
706	Returns the numeric value of the digit, specified by the UCS-2-encoded
707	character, \a ucs2, or -1 if the character is not a digit.
708	*/
709	int QChar::digitValue(ushort ucs2)
710	{
711	return qGetProp(ucs2)->digitValue;
712	}
713
714	/*!
715	\overload
716	Returns the numeric value of the digit specified by the UCS-4-encoded
717	character, \a ucs4, or -1 if the character is not a digit.
718	*/
719	int QChar::digitValue(uint ucs4)
720	{
721	if (ucs4 > LAST_UNICODE_CHAR)
722	return 0;
723	return qGetProp(ucs4)->digitValue;
724	}
725
726	/*!
727	Returns the character's category.
728	*/
729	QChar::Category QChar::category() const
730	{
731	return (QChar::Category) qGetProp(ucs)->category;
732	}
733
734	/*!
735	\overload
736	\since 4.3
737	Returns the category of the UCS-4-encoded character specified by \a ucs4.
738	*/
739	QChar::Category QChar::category(uint ucs4)
740	{
741	if (ucs4 > LAST_UNICODE_CHAR)
742	return QChar::NoCategory;
743	return (QChar::Category) qGetProp(ucs4)->category;
744	}
745
746	/*!
747	\overload
748	Returns the category of the UCS-2-encoded character specified by \a ucs2.
749	*/
750	QChar::Category QChar::category(ushort ucs2)
751	{
752	return (QChar::Category) qGetProp(ucs2)->category;
753	}
754
755
756	/*!
757	Returns the character's direction.
758	*/
759	QChar::Direction QChar::direction() const
760	{
761	return (QChar::Direction) qGetProp(ucs)->direction;
762	}
763
764	/*!
765	\overload
766	Returns the direction of the UCS-4-encoded character specified by \a ucs4.
767	*/
768	QChar::Direction QChar::direction(uint ucs4)
769	{
770	if (ucs4 > LAST_UNICODE_CHAR)
771	return QChar::DirL;
772	return (QChar::Direction) qGetProp(ucs4)->direction;
773	}
774
775	/*!
776	\overload
777	Returns the direction of the UCS-2-encoded character specified by \a ucs2.
778	*/
779	QChar::Direction QChar::direction(ushort ucs2)
780	{
781	return (QChar::Direction) qGetProp(ucs2)->direction;
782	}
783
784	/*!
785	Returns information about the joining properties of the character
786	(needed for certain languages such as Arabic).
787	*/
788	QChar::Joining QChar::joining() const
789	{
790	return (QChar::Joining) qGetProp(ucs)->joining;
791	}
792
793	/*!
794	\overload
795	Returns information about the joining properties of the UCS-4-encoded
796	character specified by \a ucs4 (needed for certain languages such as
797	Arabic).
798	*/
799	QChar::Joining QChar::joining(uint ucs4)
800	{
801	if (ucs4 > LAST_UNICODE_CHAR)
802	return QChar::OtherJoining;
803	return (QChar::Joining) qGetProp(ucs4)->joining;
804	}
805
806	/*!
807	\overload
808	Returns information about the joining properties of the UCS-2-encoded
809	character specified by \a ucs2 (needed for certain languages such as
810	Arabic).
811	*/
812	QChar::Joining QChar::joining(ushort ucs2)
813	{
814	return (QChar::Joining) qGetProp(ucs2)->joining;
815	}
816
817
818	/*!
819	Returns true if the character should be reversed if the text
820	direction is reversed; otherwise returns false.
821
822	Same as (ch.mirroredChar() != ch).
823
824	\sa mirroredChar()
825	*/
826	bool QChar::hasMirrored() const
827	{
828	return qGetProp(ucs)->mirrorDiff != 0;
829	}
830
831	/*!
832	\fn bool QChar::isLower() const
833
834	Returns true if the character is a lowercase letter, i.e.
835	category() is Letter_Lowercase.
836
837	\sa isUpper(), toLower(), toUpper()
838	*/
839
840	/*!
841	\fn bool QChar::isUpper() const
842
843	Returns true if the character is an uppercase letter, i.e.
844	category() is Letter_Uppercase.
845
846	\sa isLower(), toUpper(), toLower()
847	*/
848
849	/*!
850	\fn bool QChar::isTitleCase() const
851	\since 4.3
852
853	Returns true if the character is a titlecase letter, i.e.
854	category() is Letter_Titlecase.
855
856	\sa isLower(), toUpper(), toLower(), toTitleCase()
857	*/
858
859	/*!
860	Returns the mirrored character if this character is a mirrored
861	character; otherwise returns the character itself.
862
863	\sa hasMirrored()
864	*/
865	QChar QChar::mirroredChar() const
866	{
867	return ucs + qGetProp(ucs)->mirrorDiff;
868	}
869
870	/*! \overload
871	Returns the mirrored character if the UCS-4-encoded character specified
872	by \a ucs4 is a mirrored character; otherwise returns the character itself.
873
874	\sa hasMirrored()
875	*/
876	uint QChar::mirroredChar(uint ucs4)
877	{
878	if (ucs4 > LAST_UNICODE_CHAR)
879	return ucs4;
880	return ucs4 + qGetProp(ucs4)->mirrorDiff;
881	}
882
883	/*!
884	\overload
885	Returns the mirrored character if the UCS-2-encoded character specified
886	by \a ucs2 is a mirrored character; otherwise returns the character itself.
887
888	\sa hasMirrored()
889	*/
890	ushort QChar::mirroredChar(ushort ucs2)
891	{
892	return ucs2 + qGetProp(ucs2)->mirrorDiff;
893	}
894
895
896	enum {
897	Hangul_SBase = 0xac00,
898	Hangul_LBase = 0x1100,
899	Hangul_VBase = 0x1161,
900	Hangul_TBase = 0x11a7,
901	Hangul_SCount = 11172,
902	Hangul_LCount = 19,
903	Hangul_VCount = 21,
904	Hangul_TCount = 28,
905	Hangul_NCount = 21*28
906	};
907
908	// buffer has to have a length of 3. It's needed for Hangul decomposition
909	static const unsigned short * QT_FASTCALL decompositionHelper
910	(uint ucs4, int length, int tag, unsigned short *buffer)
911	{
912	*length = 0;
913	if (ucs4 > LAST_UNICODE_CHAR)
914	return 0;
915	if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
916	int SIndex = ucs4 - Hangul_SBase;
917	buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
918	buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
919	buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
920	*length = buffer[2] == Hangul_TBase ? 2 : 3;
921	*tag = QChar::Canonical;
922	return buffer;
923	}
924
925	const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
926	if (index == 0xffff)
927	return 0;
928	const unsigned short *decomposition = uc_decomposition_map+index;
929	tag = (decomposition) & 0xff;
930	length = (decomposition) >> 8;
931	return decomposition+1;
932	}
933
934	/*!
935	Decomposes a character into its parts. Returns an empty string if
936	no decomposition exists.
937	*/
938	QString QChar::decomposition() const
939	{
940	return decomposition(ucs);
941	}
942
943	/*!
944	\overload
945	Decomposes the UCS-4-encoded character specified by \a ucs4 into its
946	constituent parts. Returns an empty string if no decomposition exists.
947	*/
948	QString QChar::decomposition(uint ucs4)
949	{
950	unsigned short buffer[3];
951	int length;
952	int tag;
953	const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
954	return QString::fromUtf16(d, length);
955	}
956
957	/*!
958	Returns the tag defining the composition of the character. Returns
959	QChar::Single if no decomposition exists.
960	*/
961	QChar::Decomposition QChar::decompositionTag() const
962	{
963	return decompositionTag(ucs);
964	}
965
966	/*!
967	\overload
968	Returns the tag defining the composition of the UCS-4-encoded character
969	specified by \a ucs4. Returns QChar::Single if no decomposition exists.
970	*/
971	QChar::Decomposition QChar::decompositionTag(uint ucs4)
972	{
973	if (ucs4 > LAST_UNICODE_CHAR)
974	return QChar::NoDecomposition;
975	const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
976	if (index == 0xffff)
977	return QChar::NoDecomposition;
978	return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff);
979	}
980
981	/*!
982	Returns the combining class for the character as defined in the
983	Unicode standard. This is mainly useful as a positioning hint for
984	marks attached to a base character.
985
986	The Qt text rendering engine uses this information to correctly
987	position non-spacing marks around a base character.
988	*/
989	unsigned char QChar::combiningClass() const
990	{
991	return (unsigned char) qGetProp(ucs)->combiningClass;
992	}
993
994	/*! \overload
995	Returns the combining class for the UCS-4-encoded character specified by
996	\a ucs4, as defined in the Unicode standard.
997	*/
998	unsigned char QChar::combiningClass(uint ucs4)
999	{
1000	if (ucs4 > LAST_UNICODE_CHAR)
1001	return 0;
1002	return (unsigned char) qGetProp(ucs4)->combiningClass;
1003	}
1004
1005	/*! \overload
1006	Returns the combining class for the UCS-2-encoded character specified by
1007	\a ucs2, as defined in the Unicode standard.
1008	*/
1009	unsigned char QChar::combiningClass(ushort ucs2)
1010	{
1011	return (unsigned char) qGetProp(ucs2)->combiningClass;
1012	}
1013
1014
1015	/*!
1016	Returns the Unicode version that introduced this character.
1017	*/
1018	QChar::UnicodeVersion QChar::unicodeVersion() const
1019	{
1020	return (QChar::UnicodeVersion) qGetProp(ucs)->unicodeVersion;
1021	}
1022
1023	/*! \overload
1024	Returns the Unicode version that introduced the character specified in
1025	its UCS-4-encoded form as \a ucs4.
1026	*/
1027	QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4)
1028	{
1029	if (ucs4 > LAST_UNICODE_CHAR)
1030	return QChar::Unicode_Unassigned;
1031	return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion;
1032	}
1033
1034	/*! \overload
1035	Returns the Unicode version that introduced the character specified in
1036	its UCS-2-encoded form as \a ucs2.
1037	*/
1038	QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2)
1039	{
1040	return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion;
1041	}
1042
1043
1044	/*!
1045	Returns the lowercase equivalent if the character is uppercase or titlecase;
1046	otherwise returns the character itself.
1047	*/
1048	QChar QChar::toLower() const
1049	{
1050	const QUnicodeTables::Properties *p = qGetProp(ucs);
1051	if (!p->lowerCaseSpecial)
1052	return ucs + p->lowerCaseDiff;
1053	return ucs;
1054	}
1055
1056	/*! \overload
1057	Returns the lowercase equivalent of the UCS-4-encoded character specified
1058	by \a ucs4 if the character is uppercase or titlecase; otherwise returns
1059	the character itself.
1060	*/
1061	uint QChar::toLower(uint ucs4)
1062	{
1063	if (ucs4 > LAST_UNICODE_CHAR)
1064	return ucs4;
1065	const QUnicodeTables::Properties *p = qGetProp(ucs4);
1066	if (!p->lowerCaseSpecial)
1067	return ucs4 + p->lowerCaseDiff;
1068	return ucs4;
1069	}
1070
1071	/*! \overload
1072	Returns the lowercase equivalent of the UCS-2-encoded character specified
1073	by \a ucs2 if the character is uppercase or titlecase; otherwise returns
1074	the character itself.
1075	*/
1076	ushort QChar::toLower(ushort ucs2)
1077	{
1078	const QUnicodeTables::Properties *p = qGetProp(ucs2);
1079	if (!p->lowerCaseSpecial)
1080	return ucs2 + p->lowerCaseDiff;
1081	return ucs2;
1082	}
1083
1084	/*!
1085	Returns the uppercase equivalent if the character is lowercase or titlecase;
1086	otherwise returns the character itself.
1087	*/
1088	QChar QChar::toUpper() const
1089	{
1090	const QUnicodeTables::Properties *p = qGetProp(ucs);
1091	if (!p->upperCaseSpecial)
1092	return ucs + p->upperCaseDiff;
1093	return ucs;
1094	}
1095
1096	/*! \overload
1097	Returns the uppercase equivalent of the UCS-4-encoded character specified
1098	by \a ucs4 if the character is lowercase or titlecase; otherwise returns
1099	the character itself.
1100	*/
1101	uint QChar::toUpper(uint ucs4)
1102	{
1103	if (ucs4 > LAST_UNICODE_CHAR)
1104	return ucs4;
1105	const QUnicodeTables::Properties *p = qGetProp(ucs4);
1106	if (!p->upperCaseSpecial)
1107	return ucs4 + p->upperCaseDiff;
1108	return ucs4;
1109	}
1110
1111	/*! \overload
1112	Returns the uppercase equivalent of the UCS-2-encoded character specified
1113	by \a ucs2 if the character is lowercase or titlecase; otherwise returns
1114	the character itself.
1115	*/
1116	ushort QChar::toUpper(ushort ucs2)
1117	{
1118	const QUnicodeTables::Properties *p = qGetProp(ucs2);
1119	if (!p->upperCaseSpecial)
1120	return ucs2 + p->upperCaseDiff;
1121	return ucs2;
1122	}
1123
1124	/*!
1125	Returns the title case equivalent if the character is lowercase or uppercase;
1126	otherwise returns the character itself.
1127	*/
1128	QChar QChar::toTitleCase() const
1129	{
1130	const QUnicodeTables::Properties *p = qGetProp(ucs);
1131	if (!p->titleCaseSpecial)
1132	return ucs + p->titleCaseDiff;
1133	return ucs;
1134	}
1135
1136	/*!
1137	\overload
1138	Returns the title case equivalent of the UCS-4-encoded character specified
1139	by \a ucs4 if the character is lowercase or uppercase; otherwise returns
1140	the character itself.
1141	*/
1142	uint QChar::toTitleCase(uint ucs4)
1143	{
1144	if (ucs4 > LAST_UNICODE_CHAR)
1145	return ucs4;
1146	const QUnicodeTables::Properties *p = qGetProp(ucs4);
1147	if (!p->titleCaseSpecial)
1148	return ucs4 + p->titleCaseDiff;
1149	return ucs4;
1150	}
1151
1152	/*!
1153	\overload
1154	Returns the title case equivalent of the UCS-2-encoded character specified
1155	by \a ucs2 if the character is lowercase or uppercase; otherwise returns
1156	the character itself.
1157	*/
1158	ushort QChar::toTitleCase(ushort ucs2)
1159	{
1160	const QUnicodeTables::Properties *p = qGetProp(ucs2);
1161	if (!p->titleCaseSpecial)
1162	return ucs2 + p->titleCaseDiff;
1163	return ucs2;
1164	}
1165
1166
1167	static inline uint foldCase(const ushort ch, const ushort start)
1168	{
1169	uint c = *ch;
1170	if (QChar(c).isLowSurrogate() && ch > start && QChar(*(ch - 1)).isHighSurrogate())
1171	c = QChar::surrogateToUcs4(*(ch - 1), c);
1172	return *ch + qGetProp(c)->caseFoldDiff;
1173	}
1174
1175	static inline uint foldCase(uint ch, uint &last)
1176	{
1177	uint c = ch;
1178	if (QChar(c).isLowSurrogate() && QChar(last).isHighSurrogate())
1179	c = QChar::surrogateToUcs4(last, c);
1180	last = ch;
1181	return ch + qGetProp(c)->caseFoldDiff;
1182	}
1183
1184	static inline ushort foldCase(ushort ch)
1185	{
1186	return ch + qGetProp(ch)->caseFoldDiff;
1187	}
1188
1189	/*!
1190	Returns the case folded equivalent of the character. For most Unicode characters this
1191	is the same as toLowerCase().
1192	*/
1193	QChar QChar::toCaseFolded() const
1194	{
1195	return ucs + qGetProp(ucs)->caseFoldDiff;
1196	}
1197
1198	/*!
1199	\overload
1200	Returns the case folded equivalent of the UCS-4-encoded character specified
1201	by \a ucs4. For most Unicode characters this is the same as toLowerCase().
1202	*/
1203	uint QChar::toCaseFolded(uint ucs4)
1204	{
1205	if (ucs4 > LAST_UNICODE_CHAR)
1206	return ucs4;
1207	return ucs4 + qGetProp(ucs4)->caseFoldDiff;
1208	}
1209
1210	/*!
1211	\overload
1212	Returns the case folded equivalent of the UCS-2-encoded character specified
1213	by \a ucs2. For most Unicode characters this is the same as toLowerCase().
1214	*/
1215	ushort QChar::toCaseFolded(ushort ucs2)
1216	{
1217	return ucs2 + qGetProp(ucs2)->caseFoldDiff;
1218	}
1219
1220
1221	/*!
1222	\fn char QChar::latin1() const
1223
1224	Use toLatin1() instead.
1225	*/
1226
1227	/*!
1228	\fn char QChar::ascii() const
1229
1230	Use toAscii() instead.
1231	*/
1232
1233	/*!
1234	\fn char QChar::toLatin1() const
1235
1236	Returns the Latin-1 character equivalent to the QChar, or 0. This
1237	is mainly useful for non-internationalized software.
1238
1239	\sa toAscii(), unicode(), QTextCodec::codecForCStrings()
1240	*/
1241
1242	/*!
1243	\fn char QChar::toAscii() const
1244	Returns the character value of the QChar obtained using the current
1245	codec used to read C strings, or 0 if the character is not representable
1246	using this codec. The default codec handles Latin-1 encoded text,
1247	but this can be changed to assist developers writing source code using
1248	other encodings.
1249
1250	The main purpose of this function is to preserve ASCII characters used
1251	in C strings. This is mainly useful for developers of non-internationalized
1252	software.
1253
1254	\sa toLatin1(), unicode(), QTextCodec::codecForCStrings()
1255	*/
1256	#ifdef Q_COMPILER_MANGLES_RETURN_TYPE
1257	const char QChar::toAscii() const
1258	#else
1259	char QChar::toAscii() const
1260	#endif
1261	{
1262	#ifndef QT_NO_CODEC_FOR_C_STRINGS
1263	if (QTextCodec::codecForCStrings())
1264	// #####
1265	return QTextCodec::codecForCStrings()->fromUnicode(QString(*this)).at(0);
1266	#endif
1267	return ucs > 0xff ? 0 : char(ucs);
1268	}
1269
1270	/*!
1271	\fn QChar QChar::fromLatin1(char c)
1272
1273	Converts the Latin-1 character \a c to its equivalent QChar. This
1274	is mainly useful for non-internationalized software.
1275
1276	\sa fromAscii(), unicode(), QTextCodec::codecForCStrings()
1277	*/
1278
1279	/*!
1280	Converts the ASCII character \a c to its equivalent QChar. This
1281	is mainly useful for non-internationalized software.
1282
1283	An alternative is to use QLatin1Char.
1284
1285	\sa fromLatin1(), unicode(), QTextCodec::codecForCStrings()
1286	*/
1287	QChar QChar::fromAscii(char c)
1288	{
1289	#ifndef QT_NO_CODEC_FOR_C_STRINGS
1290	if (QTextCodec::codecForCStrings())
1291	// #####
1292	return QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
1293	#endif
1294	return QChar(ushort((uchar)c));
1295	}
1296
1297	#ifndef QT_NO_DATASTREAM
1298	/*!
1299	\relates QChar
1300
1301	Writes the char \a chr to the stream \a out.
1302
1303	\sa {Format of the QDataStream operators}
1304	*/
1305
1306	QDataStream &operator<<(QDataStream &out, const QChar &chr)
1307	{
1308	out << quint16(chr.unicode());
1309	return out;
1310	}
1311
1312
1313	/*!
1314	\relates QChar
1315
1316	Reads a char from the stream \a in into char \a chr.
1317
1318	\sa {Format of the QDataStream operators}
1319	*/
1320
1321	QDataStream &operator>>(QDataStream &in, QChar &chr)
1322	{
1323	quint16 u;
1324	in >> u;
1325	chr.unicode() = ushort(u);
1326	return in;
1327	}
1328	#endif // QT_NO_DATASTREAM
1329
1330	/*!
1331	\fn ushort & QChar::unicode()
1332
1333	Returns a reference to the numeric Unicode value of the QChar.
1334	*/
1335
1336	/*!
1337	\fn ushort QChar::unicode() const
1338
1339	\overload
1340	*/
1341
1342	/*****************************************************************************
1343	Documentation of QChar related functions
1344	*****************************************************************************/
1345
1346	/*!
1347	\fn bool operator==(QChar c1, QChar c2)
1348
1349	\relates QChar
1350
1351	Returns true if \a c1 and \a c2 are the same Unicode character;
1352	otherwise returns false.
1353	*/
1354
1355	/*!
1356	\fn int operator!=(QChar c1, QChar c2)
1357
1358	\relates QChar
1359
1360	Returns true if \a c1 and \a c2 are not the same Unicode
1361	character; otherwise returns false.
1362	*/
1363
1364	/*!
1365	\fn int operator<=(QChar c1, QChar c2)
1366
1367	\relates QChar
1368
1369	Returns true if the numeric Unicode value of \a c1 is less than
1370	or equal to that of \a c2; otherwise returns false.
1371	*/
1372
1373	/*!
1374	\fn int operator>=(QChar c1, QChar c2)
1375
1376	\relates QChar
1377
1378	Returns true if the numeric Unicode value of \a c1 is greater than
1379	or equal to that of \a c2; otherwise returns false.
1380	*/
1381
1382	/*!
1383	\fn int operator<(QChar c1, QChar c2)
1384
1385	\relates QChar
1386
1387	Returns true if the numeric Unicode value of \a c1 is less than
1388	that of \a c2; otherwise returns false.
1389	*/
1390
1391	/*!
1392	\fn int operator>(QChar c1, QChar c2)
1393
1394	\relates QChar
1395
1396	Returns true if the numeric Unicode value of \a c1 is greater than
1397	that of \a c2; otherwise returns false.
1398	*/
1399
1400	/*!
1401	\fn bool QChar::mirrored() const
1402
1403	Use hasMirrored() instead.
1404	*/
1405
1406	/*!
1407	\fn QChar QChar::lower() const
1408
1409	Use toLower() instead.
1410	*/
1411
1412	/*!
1413	\fn QChar QChar::upper() const
1414
1415	Use toUpper() instead.
1416	*/
1417
1418	/*!
1419	\fn bool QChar::networkOrdered()
1420
1421	See if QSysInfo::ByteOrder == QSysInfo::BigEndian instead.
1422	*/
1423
1424
1425	// ---------------------------------------------------------------------------
1426
1427
1428	static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from)
1429	{
1430	unsigned short buffer[3];
1431
1432	QString &s = *str;
1433
1434	const unsigned short utf16 = reinterpret_cast<unsigned short >(s.data());
1435	const unsigned short *uc = utf16 + s.length();
1436	while (uc != utf16 + from) {
1437	uint ucs4 = *(--uc);
1438	if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
1439	ushort high = *(uc - 1);
1440	if (QChar(high).isHighSurrogate()) {
1441	--uc;
1442	ucs4 = QChar::surrogateToUcs4(high, ucs4);
1443	}
1444	}
1445	if (QChar::unicodeVersion(ucs4) > version)
1446	continue;
1447	int length;
1448	int tag;
1449	const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
1450	if (!d \|\| (canonical && tag != QChar::Canonical))
1451	continue;
1452
1453	s.replace(uc - utf16, ucs4 > 0x10000 ? 2 : 1, (const QChar *)d, length);
1454	// since the insert invalidates the pointers and we do decomposition recursive
1455	int pos = uc - utf16;
1456	utf16 = reinterpret_cast<unsigned short *>(s.data());
1457	uc = utf16 + pos + length;
1458	}
1459	}
1460
1461
1462	static ushort ligatureHelper(ushort u1, ushort u2)
1463	{
1464	// hangul L-V pair
1465	int LIndex = u1 - Hangul_LBase;
1466	if (0 <= LIndex && LIndex < Hangul_LCount) {
1467	int VIndex = u2 - Hangul_VBase;
1468	if (0 <= VIndex && VIndex < Hangul_VCount)
1469	return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
1470	}
1471
1472	// hangul LV-T pair
1473	int SIndex = u1 - Hangul_SBase;
1474	if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
1475	int TIndex = u2 - Hangul_TBase;
1476	if (0 <= TIndex && TIndex <= Hangul_TCount)
1477	return u1 + TIndex;
1478	}
1479
1480	const unsigned short index = GET_LIGATURE_INDEX(u2);
1481	if (index == 0xffff)
1482	return 0;
1483	const unsigned short *ligatures = uc_ligature_map+index;
1484	ushort length = *ligatures;
1485	++ligatures;
1486	// ### use bsearch
1487	for (uint i = 0; i < length; ++i)
1488	if (ligatures[2*i] == u1)
1489	return ligatures[2*i+1];
1490	return 0;
1491	}
1492
1493	static void composeHelper(QString *str, int from)
1494	{
1495	QString &s = *str;
1496
1497	if (s.length() - from < 2)
1498	return;
1499
1500	// the loop can partly ignore high Unicode as all ligatures are in the BMP
1501	int starter = 0;
1502	int lastCombining = 0;
1503	int pos = from;
1504	while (pos < s.length()) {
1505	uint uc = s.at(pos).unicode();
1506	if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
1507	ushort low = s.at(pos+1).unicode();
1508	if (QChar(low).isLowSurrogate()) {
1509	uc = QChar::surrogateToUcs4(uc, low);
1510	++pos;
1511	}
1512	}
1513	int combining = QChar::combiningClass(uc);
1514	if (starter == pos - 1 \|\| combining > lastCombining) {
1515	// allowed to form ligature with S
1516	QChar ligature = ligatureHelper(s.at(starter).unicode(), uc);
1517	if (ligature.unicode()) {
1518	s[starter] = ligature;
1519	s.remove(pos, 1);
1520	continue;
1521	}
1522	}
1523	if (!combining)
1524	starter = pos;
1525	lastCombining = combining;
1526	++pos;
1527	}
1528	}
1529
1530
1531	static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from)
1532	{
1533	QString &s = *str;
1534	const int l = s.length()-1;
1535	int pos = from;
1536	while (pos < l) {
1537	int p2 = pos+1;
1538	uint u1 = s.at(pos).unicode();
1539	if (QChar(u1).isHighSurrogate()) {
1540	ushort low = s.at(pos+1).unicode();
1541	if (QChar(low).isLowSurrogate()) {
1542	p2++;
1543	u1 = QChar::surrogateToUcs4(u1, low);
1544	if (p2 >= l)
1545	break;
1546	}
1547	}
1548	uint u2 = s.at(p2).unicode();
1549	if (QChar(u2).isHighSurrogate() && p2 < l-1) {
1550	ushort low = s.at(p2+1).unicode();
1551	if (QChar(low).isLowSurrogate()) {
1552	p2++;
1553	u2 = QChar::surrogateToUcs4(u2, low);
1554	}
1555	}
1556
1557	int c2 = QChar::combiningClass(u2);
1558	if (QChar::unicodeVersion(u2) > version)
1559	c2 = 0;
1560
1561	if (c2 == 0) {
1562	pos = p2+1;
1563	continue;
1564	}
1565	int c1 = QChar::combiningClass(u1);
1566	if (QChar::unicodeVersion(u1) > version)
1567	c1 = 0;
1568
1569	if (c1 > c2) {
1570	QChar *uc = s.data();
1571	int p = pos;
1572	// exchange characters
1573	if (u2 < 0x10000) {
1574	uc[p++] = u2;
1575	} else {
1576	uc[p++] = QChar::highSurrogate(u2);
1577	uc[p++] = QChar::lowSurrogate(u2);
1578	}
1579	if (u1 < 0x10000) {
1580	uc[p++] = u1;
1581	} else {
1582	uc[p++] = QChar::highSurrogate(u1);
1583	uc[p++] = QChar::lowSurrogate(u1);
1584	}
1585	if (pos > 0)
1586	--pos;
1587	if (pos > 0 && s.at(pos).isLowSurrogate())
1588	--pos;
1589	} else {
1590	++pos;
1591	if (u1 > 0x10000)
1592	++pos;
1593	}
1594	}
1595	}
1596
1597	int QT_FASTCALL QUnicodeTables::script(unsigned int uc)
1598	{
1599	if (uc > 0xffff)
1600	return Common;
1601	int script = uc_scripts[uc >> 7];
1602	if (script < ScriptSentinel)
1603	return script;
1604	script = (((script - ScriptSentinel) * UnicodeBlockSize) + UnicodeBlockCount);
1605	script = uc_scripts[script + (uc & 0x7f)];
1606	return script;
1607	}
1608
1609
1610	Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL QUnicodeTables::lineBreakClass(uint ucs4)
1611	{
1612	return (QUnicodeTables::LineBreakClass) qGetProp(ucs4)->line_break_class;
1613	}
1614
1615
1616	QT_END_NAMESPACE

Note: See TracBrowser for help on using the repository browser.

Download in other formats: