Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

qchar.cpp@ 17

Last change on this file since 17 was 2, checked in by Dmitry A. Kuminov, 16 years ago
Initially imported qt-all-opensource-src-4.5.1 from Trolltech.
File size: 41.5 KB

Line
1	/****************************************************************************
2	**
3	** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4	** Contact: Qt Software Information ([email protected])
5	**
6	** This file is part of the QtCore module of the Qt Toolkit.
7	**
8	** $QT_BEGIN_LICENSE:LGPL$
9	** Commercial Usage
10	** Licensees holding valid Qt Commercial licenses may use this file in
11	** accordance with the Qt Commercial License Agreement provided with the
12	** Software or, alternatively, in accordance with the terms contained in
13	** a written agreement between you and Nokia.
14	**
15	** GNU Lesser General Public License Usage
16	** Alternatively, this file may be used under the terms of the GNU Lesser
17	** General Public License version 2.1 as published by the Free Software
18	** Foundation and appearing in the file LICENSE.LGPL included in the
19	** packaging of this file. Please review the following information to
20	** ensure the GNU Lesser General Public License version 2.1 requirements
21	** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22	**
23	** In addition, as a special exception, Nokia gives you certain
24	** additional rights. These rights are described in the Nokia Qt LGPL
25	** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26	** package.
27	**
28	** GNU General Public License Usage
29	** Alternatively, this file may be used under the terms of the GNU
30	** General Public License version 3.0 as published by the Free Software
31	** Foundation and appearing in the file LICENSE.GPL included in the
32	** packaging of this file. Please review the following information to
33	** ensure the GNU General Public License version 3.0 requirements will be
34	** met: http://www.gnu.org/copyleft/gpl.html.
35	**
36	** If you are unsure which license is appropriate for your use, please
37	** contact the sales department at [email protected].
38	** $QT_END_LICENSE$
39	**
40	****************************************************************************/
41
42	// Don't define it while compiling this module, or USERS of Qt will
43	// not be able to link.
44	#ifdef QT_NO_CAST_FROM_ASCII
45	#undef QT_NO_CAST_FROM_ASCII
46	#endif
47	#ifdef QT_NO_CAST_TO_ASCII
48	#undef QT_NO_CAST_TO_ASCII
49	#endif
50	#include "qchar.h"
51	#include "qdatastream.h"
52	#include "qtextcodec.h"
53
54	#include "qunicodetables_p.h"
55
56	#include "qunicodetables.cpp"
57
58	QT_BEGIN_NAMESPACE
59
60	#define LAST_UNICODE_CHAR 0x10ffff
61
62	#ifndef QT_NO_CODEC_FOR_C_STRINGS
63	#ifdef QT_NO_TEXTCODEC
64	#define QT_NO_CODEC_FOR_C_STRINGS
65	#endif
66	#endif
67
68	#define FLAG(x) (1 << (x))
69
70	/*! \class QLatin1Char
71	\brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
72
73	\ingroup text
74
75	This class is only useful to avoid the codec for C strings business
76	in the QChar(ch) constructor. You can avoid it by writing
77	QChar(ch, 0).
78
79	\sa QChar, QLatin1String, QString
80	*/
81
82	/*!
83	\fn const char QLatin1Char::toLatin1() const
84
85	Converts a Latin-1 character to an 8-bit ASCII representation of
86	the character.
87	*/
88
89	/*!
90	\fn const ushort QLatin1Char::unicode() const
91
92	Converts a Latin-1 character to an 16-bit-encoded Unicode representation
93	of the character.
94	*/
95
96	/*!
97	\fn QLatin1Char::QLatin1Char(char c)
98
99	Constructs a Latin-1 character for \a c. This constructor should be
100	used when the encoding of the input character is known to be Latin-1.
101	*/
102
103	/*!
104	\class QChar
105	\brief The QChar class provides a 16-bit Unicode character.
106
107	\ingroup text
108	\reentrant
109
110	In Qt, Unicode characters are 16-bit entities without any markup
111	or structure. This class represents such an entity. It is
112	lightweight, so it can be used everywhere. Most compilers treat
113	it like a \c{unsigned short}.
114
115	QChar provides a full complement of testing/classification
116	functions, converting to and from other formats, converting from
117	composed to decomposed Unicode, and trying to compare and
118	case-convert if you ask it to.
119
120	The classification functions include functions like those in the
121	standard C++ header \<cctype\> (formerly \<ctype.h\>), but
122	operating on the full range of Unicode characters. They all
123	return true if the character is a certain type of character;
124	otherwise they return false. These classification functions are
125	isNull() (returns true if the character is '\\0'), isPrint()
126	(true if the character is any sort of printable character,
127	including whitespace), isPunct() (any sort of punctation),
128	isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any
129	sort of numeric character, not just 0-9), isLetterOrNumber(), and
130	isDigit() (decimal digits). All of these are wrappers around
131	category() which return the Unicode-defined category of each
132	character.
133
134	QChar also provides direction(), which indicates the "natural"
135	writing direction of this character. The joining() function
136	indicates how the character joins with its neighbors (needed
137	mostly for Arabic) and finally hasMirrored(), which indicates
138	whether the character needs to be mirrored when it is printed in
139	its "unnatural" writing direction.
140
141	Composed Unicode characters (like \aring) can be converted to
142	decomposed Unicode ("a" followed by "ring above") by using
143	decomposition().
144
145	In Unicode, comparison is not necessarily possible and case
146	conversion is very difficult at best. Unicode, covering the
147	"entire" world, also includes most of the world's case and
148	sorting problems. operator==() and friends will do comparison
149	based purely on the numeric Unicode value (code point) of the
150	characters, and toUpper() and toLower() will do case changes when
151	the character has a well-defined uppercase/lowercase equivalent.
152	For locale-dependent comparisons, use
153	QString::localeAwareCompare().
154
155	The conversion functions include unicode() (to a scalar),
156	toLatin1() (to scalar, but converts all non-Latin-1 characters to
157	0), row() (gives the Unicode row), cell() (gives the Unicode
158	cell), digitValue() (gives the integer value of any of the
159	numerous digit characters), and a host of constructors.
160
161	QChar provides constructors and cast operators that make it easy
162	to convert to and from traditional 8-bit \c{char}s. If you
163	defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as
164	explained in the QString documentation, you will need to
165	explicitly call fromAscii() or fromLatin1(), or use QLatin1Char,
166	to construct a QChar from an 8-bit \c char, and you will need to
167	call toAscii() or toLatin1() to get the 8-bit value back.
168
169	\sa QString, Unicode, QLatin1Char
170	*/
171
172	/*!
173	\enum QChar::UnicodeVersion
174
175	Specifies which version of the \l{http://www.unicode.org/}{Unicode standard}
176	introduced a certain character.
177
178	\value Unicode_1_1 Version 1.1
179	\value Unicode_2_0 Version 2.0
180	\value Unicode_2_1_2 Version 2.1.2
181	\value Unicode_3_0 Version 3.0
182	\value Unicode_3_1 Version 3.1
183	\value Unicode_3_2 Version 3.2
184	\value Unicode_4_0 Version 4.0
185	\value Unicode_4_1 Version 4.1
186	\value Unicode_5_0 Version 5.0
187	\value Unicode_Unassigned The value is not assigned to any character
188	in version 5.0 of Unicode.
189
190	\sa unicodeVersion()
191	*/
192
193	/*!
194	\enum QChar::Category
195
196	This enum maps the Unicode character categories.
197
198	The following characters are normative in Unicode:
199
200	\value Mark_NonSpacing Unicode class name Mn
201
202	\value Mark_SpacingCombining Unicode class name Mc
203
204	\value Mark_Enclosing Unicode class name Me
205
206	\value Number_DecimalDigit Unicode class name Nd
207
208	\value Number_Letter Unicode class name Nl
209
210	\value Number_Other Unicode class name No
211
212	\value Separator_Space Unicode class name Zs
213
214	\value Separator_Line Unicode class name Zl
215
216	\value Separator_Paragraph Unicode class name Zp
217
218	\value Other_Control Unicode class name Cc
219
220	\value Other_Format Unicode class name Cf
221
222	\value Other_Surrogate Unicode class name Cs
223
224	\value Other_PrivateUse Unicode class name Co
225
226	\value Other_NotAssigned Unicode class name Cn
227
228
229	The following categories are informative in Unicode:
230
231	\value Letter_Uppercase Unicode class name Lu
232
233	\value Letter_Lowercase Unicode class name Ll
234
235	\value Letter_Titlecase Unicode class name Lt
236
237	\value Letter_Modifier Unicode class name Lm
238
239	\value Letter_Other Unicode class name Lo
240
241	\value Punctuation_Connector Unicode class name Pc
242
243	\value Punctuation_Dash Unicode class name Pd
244
245	\value Punctuation_Open Unicode class name Ps
246
247	\value Punctuation_Close Unicode class name Pe
248
249	\value Punctuation_InitialQuote Unicode class name Pi
250
251	\value Punctuation_FinalQuote Unicode class name Pf
252
253	\value Punctuation_Other Unicode class name Po
254
255	\value Symbol_Math Unicode class name Sm
256
257	\value Symbol_Currency Unicode class name Sc
258
259	\value Symbol_Modifier Unicode class name Sk
260
261	\value Symbol_Other Unicode class name So
262
263	\value NoCategory Qt cannot find an appropriate category for the character.
264
265	\omitvalue Punctuation_Dask
266
267	\sa category()
268	*/
269
270	/*!
271	\enum QChar::Direction
272
273	This enum type defines the Unicode direction attributes. See the
274	\l{http://www.unicode.org/}{Unicode Standard} for a description
275	of the values.
276
277	In order to conform to C/C++ naming conventions "Dir" is prepended
278	to the codes used in the Unicode Standard.
279
280	\value DirAL
281	\value DirAN
282	\value DirB
283	\value DirBN
284	\value DirCS
285	\value DirEN
286	\value DirES
287	\value DirET
288	\value DirL
289	\value DirLRE
290	\value DirLRO
291	\value DirNSM
292	\value DirON
293	\value DirPDF
294	\value DirR
295	\value DirRLE
296	\value DirRLO
297	\value DirS
298	\value DirWS
299
300	\sa direction()
301	*/
302
303	/*!
304	\enum QChar::Decomposition
305
306	This enum type defines the Unicode decomposition attributes. See
307	the \l{http://www.unicode.org/}{Unicode Standard} for a
308	description of the values.
309
310	\value NoDecomposition
311	\value Canonical
312	\value Circle
313	\value Compat
314	\value Final
315	\value Font
316	\value Fraction
317	\value Initial
318	\value Isolated
319	\value Medial
320	\value Narrow
321	\value NoBreak
322	\value Small
323	\value Square
324	\value Sub
325	\value Super
326	\value Vertical
327	\value Wide
328
329	\omitvalue Single
330
331	\sa decomposition()
332	*/
333
334	/*!
335	\enum QChar::Joining
336
337	This enum type defines the Unicode joining attributes. See the
338	\l{http://www.unicode.org/}{Unicode Standard} for a description
339	of the values.
340
341	\value Center
342	\value Dual
343	\value OtherJoining
344	\value Right
345
346	\sa joining()
347	*/
348
349	/*!
350	\enum QChar::CombiningClass
351
352	\internal
353
354	This enum type defines names for some of the Unicode combining
355	classes. See the \l{http://www.unicode.org/}{Unicode Standard}
356	for a description of the values.
357
358	\value Combining_Above
359	\value Combining_AboveAttached
360	\value Combining_AboveLeft
361	\value Combining_AboveLeftAttached
362	\value Combining_AboveRight
363	\value Combining_AboveRightAttached
364	\value Combining_Below
365	\value Combining_BelowAttached
366	\value Combining_BelowLeft
367	\value Combining_BelowLeftAttached
368	\value Combining_BelowRight
369	\value Combining_BelowRightAttached
370	\value Combining_DoubleAbove
371	\value Combining_DoubleBelow
372	\value Combining_IotaSubscript
373	\value Combining_Left
374	\value Combining_LeftAttached
375	\value Combining_Right
376	\value Combining_RightAttached
377	*/
378
379	/*!
380	\enum QChar::SpecialCharacter
381
382	\value Null A QChar with this value isNull().
383	\value Nbsp Non-breaking space.
384	\value ReplacementCharacter
385	\value ObjectReplacementCharacter The character shown when a font has no glyph for a certain codepoint. The square character is normally used.
386	\value ByteOrderMark
387	\value ByteOrderSwapped
388	\value ParagraphSeparator
389	\value LineSeparator
390
391	\omitvalue null
392	\omitvalue replacement
393	\omitvalue byteOrderMark
394	\omitvalue byteOrderSwapped
395	\omitvalue nbsp
396	*/
397
398	/*!
399	\fn void QChar::setCell(uchar cell)
400	\internal
401	*/
402
403	/*!
404	\fn void QChar::setRow(uchar row)
405	\internal
406	*/
407
408	/*!
409	\fn QChar::QChar()
410
411	Constructs a null QChar ('\\0').
412
413	\sa isNull()
414	*/
415
416	/*!
417	\fn QChar::QChar(QLatin1Char ch)
418
419	Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
420	*/
421
422	/*!
423	\fn QChar::QChar(SpecialCharacter ch)
424
425	Constructs a QChar for the predefined character value \a ch.
426	*/
427
428	/*!
429	Constructs a QChar corresponding to ASCII/Latin-1 character \a
430	ch.
431	*/
432	QChar::QChar(char ch)
433	{
434	#ifndef QT_NO_CODEC_FOR_C_STRINGS
435	if (QTextCodec::codecForCStrings())
436	// #####
437	ucs = QTextCodec::codecForCStrings()->toUnicode(&ch, 1).at(0).unicode();
438	else
439	#endif
440	ucs = uchar(ch);
441	}
442
443	/*!
444	Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
445	*/
446	QChar::QChar(uchar ch)
447	{
448	#ifndef QT_NO_CODEC_FOR_C_STRINGS
449	if (QTextCodec::codecForCStrings()) {
450	// #####
451	char c = char(ch);
452	ucs = QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
453	} else
454	#endif
455	ucs = ch;
456	}
457
458	/*!
459	\fn QChar::QChar(uchar cell, uchar row)
460
461	Constructs a QChar for Unicode cell \a cell in row \a row.
462
463	\sa cell(), row()
464	*/
465
466	/*!
467	\fn QChar::QChar(ushort code)
468
469	Constructs a QChar for the character with Unicode code point \a
470	code.
471	*/
472
473
474	/*!
475	\fn QChar::QChar(short code)
476
477	Constructs a QChar for the character with Unicode code point \a
478	code.
479	*/
480
481
482	/*!
483	\fn QChar::QChar(uint code)
484
485	Constructs a QChar for the character with Unicode code point \a
486	code.
487	*/
488
489
490	/*!
491	\fn QChar::QChar(int code)
492
493	Constructs a QChar for the character with Unicode code point \a
494	code.
495	*/
496
497
498	/*!
499	\fn bool QChar::isNull() const
500
501	Returns true if the character is the Unicode character 0x0000
502	('\\0'); otherwise returns false.
503	*/
504
505	/*!
506	\fn uchar QChar::cell() const
507
508	Returns the cell (least significant byte) of the Unicode
509	character.
510
511	\sa row()
512	*/
513
514	/*!
515	\fn uchar QChar::row() const
516
517	Returns the row (most significant byte) of the Unicode character.
518
519	\sa cell()
520	*/
521
522	/*!
523	Returns true if the character is a printable character; otherwise
524	returns false. This is any character not of category Cc or Cn.
525
526	Note that this gives no indication of whether the character is
527	available in a particular font.
528	*/
529	bool QChar::isPrint() const
530	{
531	const int test = FLAG(Other_Control) \|
532	FLAG(Other_NotAssigned);
533	return !(FLAG(qGetProp(ucs)->category) & test);
534	}
535
536	/*!
537	Returns true if the character is a separator character
538	(Separator_* categories); otherwise returns false.
539	*/
540	bool QChar::isSpace() const
541	{
542	if(ucs >= 9 && ucs <=13)
543	return true;
544	const int test = FLAG(Separator_Space) \|
545	FLAG(Separator_Line) \|
546	FLAG(Separator_Paragraph);
547	return FLAG(qGetProp(ucs)->category) & test;
548	}
549
550	/*!
551	Returns true if the character is a mark (Mark_* categories);
552	otherwise returns false.
553
554	See QChar::Category for more information regarding marks.
555	*/
556	bool QChar::isMark() const
557	{
558	const int test = FLAG(Mark_NonSpacing) \|
559	FLAG(Mark_SpacingCombining) \|
560	FLAG(Mark_Enclosing);
561	return FLAG(qGetProp(ucs)->category) & test;
562	}
563
564	/*!
565	Returns true if the character is a punctuation mark (Punctuation_*
566	categories); otherwise returns false.
567	*/
568	bool QChar::isPunct() const
569	{
570	const int test = FLAG(Punctuation_Connector) \|
571	FLAG(Punctuation_Dash) \|
572	FLAG(Punctuation_Open) \|
573	FLAG(Punctuation_Close) \|
574	FLAG(Punctuation_InitialQuote) \|
575	FLAG(Punctuation_FinalQuote) \|
576	FLAG(Punctuation_Other);
577	return FLAG(qGetProp(ucs)->category) & test;
578	}
579
580	/*!
581	Returns true if the character is a letter (Letter_* categories);
582	otherwise returns false.
583	*/
584	bool QChar::isLetter() const
585	{
586	const int test = FLAG(Letter_Uppercase) \|
587	FLAG(Letter_Lowercase) \|
588	FLAG(Letter_Titlecase) \|
589	FLAG(Letter_Modifier) \|
590	FLAG(Letter_Other);
591	return FLAG(qGetProp(ucs)->category) & test;
592	}
593
594	/*!
595	Returns true if the character is a number (Number_* categories,
596	not just 0-9); otherwise returns false.
597
598	\sa isDigit()
599	*/
600	bool QChar::isNumber() const
601	{
602	const int test = FLAG(Number_DecimalDigit) \|
603	FLAG(Number_Letter) \|
604	FLAG(Number_Other);
605	return FLAG(qGetProp(ucs)->category) & test;
606	}
607
608	/*!
609	Returns true if the character is a letter or number (Letter_* or
610	Number_* categories); otherwise returns false.
611	*/
612	bool QChar::isLetterOrNumber() const
613	{
614	const int test = FLAG(Letter_Uppercase) \|
615	FLAG(Letter_Lowercase) \|
616	FLAG(Letter_Titlecase) \|
617	FLAG(Letter_Modifier) \|
618	FLAG(Letter_Other) \|
619	FLAG(Number_DecimalDigit) \|
620	FLAG(Number_Letter) \|
621	FLAG(Number_Other);
622	return FLAG(qGetProp(ucs)->category) & test;
623	}
624
625
626	/*!
627	Returns true if the character is a decimal digit
628	(Number_DecimalDigit); otherwise returns false.
629	*/
630	bool QChar::isDigit() const
631	{
632	return (qGetProp(ucs)->category == Number_DecimalDigit);
633	}
634
635
636	/*!
637	Returns true if the character is a symbol (Symbol_* categories);
638	otherwise returns false.
639	*/
640	bool QChar::isSymbol() const
641	{
642	const int test = FLAG(Symbol_Math) \|
643	FLAG(Symbol_Currency) \|
644	FLAG(Symbol_Modifier) \|
645	FLAG(Symbol_Other);
646	return FLAG(qGetProp(ucs)->category) & test;
647	}
648
649	/*!
650	\fn bool QChar::isHighSurrogate() const
651
652	Returns true if the QChar is the high part of a utf16 surrogate
653	(ie. if its code point is between 0xd800 and 0xdbff).
654	*/
655
656	/*!
657	\fn bool QChar::isLowSurrogate() const
658
659	Returns true if the QChar is the low part of a utf16 surrogate
660	(ie. if its code point is between 0xdc00 and 0xdfff).
661	*/
662
663	/*!
664	\fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
665
666	Converts a UTF16 surrogate pair with the given \a high and \a low values
667	to its UCS-4 code point.
668	*/
669
670	/*!
671	\fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
672
673	Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code
674	point.
675	*/
676
677	/*!
678	\fn static ushort QChar::highSurrogate(uint ucs4)
679
680	Returns the high surrogate value of a ucs4 code point.
681	The returned result is undefined if \a ucs4 is smaller than 0x10000.
682	*/
683
684	/*!
685	\fn static ushort QChar::lowSurrogate(uint ucs4)
686
687	Returns the low surrogate value of a ucs4 code point.
688	The returned result is undefined if \a ucs4 is smaller than 0x10000.
689	*/
690
691	/*!
692	Returns the numeric value of the digit, or -1 if the character is
693	not a digit.
694	*/
695	int QChar::digitValue() const
696	{
697	return qGetProp(ucs)->digitValue;
698	}
699
700	/*!
701	\overload
702	Returns the numeric value of the digit, specified by the UCS-2-encoded
703	character, \a ucs2, or -1 if the character is not a digit.
704	*/
705	int QChar::digitValue(ushort ucs2)
706	{
707	return qGetProp(ucs2)->digitValue;
708	}
709
710	/*!
711	\overload
712	Returns the numeric value of the digit specified by the UCS-4-encoded
713	character, \a ucs4, or -1 if the character is not a digit.
714	*/
715	int QChar::digitValue(uint ucs4)
716	{
717	if (ucs4 > LAST_UNICODE_CHAR)
718	return 0;
719	return qGetProp(ucs4)->digitValue;
720	}
721
722	/*!
723	Returns the character's category.
724	*/
725	QChar::Category QChar::category() const
726	{
727	return (QChar::Category) qGetProp(ucs)->category;
728	}
729
730	/*!
731	\overload
732	\since 4.3
733	Returns the category of the UCS-4-encoded character specified by \a ucs4.
734	*/
735	QChar::Category QChar::category(uint ucs4)
736	{
737	if (ucs4 > LAST_UNICODE_CHAR)
738	return QChar::NoCategory;
739	return (QChar::Category) qGetProp(ucs4)->category;
740	}
741
742	/*!
743	\overload
744	Returns the category of the UCS-2-encoded character specified by \a ucs2.
745	*/
746	QChar::Category QChar::category(ushort ucs2)
747	{
748	return (QChar::Category) qGetProp(ucs2)->category;
749	}
750
751
752	/*!
753	Returns the character's direction.
754	*/
755	QChar::Direction QChar::direction() const
756	{
757	return (QChar::Direction) qGetProp(ucs)->direction;
758	}
759
760	/*!
761	\overload
762	Returns the direction of the UCS-4-encoded character specified by \a ucs4.
763	*/
764	QChar::Direction QChar::direction(uint ucs4)
765	{
766	if (ucs4 > LAST_UNICODE_CHAR)
767	return QChar::DirL;
768	return (QChar::Direction) qGetProp(ucs4)->direction;
769	}
770
771	/*!
772	\overload
773	Returns the direction of the UCS-2-encoded character specified by \a ucs2.
774	*/
775	QChar::Direction QChar::direction(ushort ucs2)
776	{
777	return (QChar::Direction) qGetProp(ucs2)->direction;
778	}
779
780	/*!
781	Returns information about the joining properties of the character
782	(needed for certain languages such as Arabic).
783	*/
784	QChar::Joining QChar::joining() const
785	{
786	return (QChar::Joining) qGetProp(ucs)->joining;
787	}
788
789	/*!
790	\overload
791	Returns information about the joining properties of the UCS-4-encoded
792	character specified by \a ucs4 (needed for certain languages such as
793	Arabic).
794	*/
795	QChar::Joining QChar::joining(uint ucs4)
796	{
797	if (ucs4 > LAST_UNICODE_CHAR)
798	return QChar::OtherJoining;
799	return (QChar::Joining) qGetProp(ucs4)->joining;
800	}
801
802	/*!
803	\overload
804	Returns information about the joining properties of the UCS-2-encoded
805	character specified by \a ucs2 (needed for certain languages such as
806	Arabic).
807	*/
808	QChar::Joining QChar::joining(ushort ucs2)
809	{
810	return (QChar::Joining) qGetProp(ucs2)->joining;
811	}
812
813
814	/*!
815	Returns true if the character should be reversed if the text
816	direction is reversed; otherwise returns false.
817
818	Same as (ch.mirroredChar() != ch).
819
820	\sa mirroredChar()
821	*/
822	bool QChar::hasMirrored() const
823	{
824	return qGetProp(ucs)->mirrorDiff != 0;
825	}
826
827	/*!
828	\fn bool QChar::isLower() const
829
830	Returns true if the character is a lowercase letter, i.e.
831	category() is Letter_Lowercase.
832
833	\sa isUpper(), toLower(), toUpper()
834	*/
835
836	/*!
837	\fn bool QChar::isUpper() const
838
839	Returns true if the character is an uppercase letter, i.e.
840	category() is Letter_Uppercase.
841
842	\sa isLower(), toUpper(), toLower()
843	*/
844
845	/*!
846	\fn bool QChar::isTitleCase() const
847	\since 4.3
848
849	Returns true if the character is a titlecase letter, i.e.
850	category() is Letter_Titlecase.
851
852	\sa isLower(), toUpper(), toLower(), toTitleCase()
853	*/
854
855	/*!
856	Returns the mirrored character if this character is a mirrored
857	character; otherwise returns the character itself.
858
859	\sa hasMirrored()
860	*/
861	QChar QChar::mirroredChar() const
862	{
863	return ucs + qGetProp(ucs)->mirrorDiff;
864	}
865
866	/*! \overload
867	Returns the mirrored character if the UCS-4-encoded character specified
868	by \a ucs4 is a mirrored character; otherwise returns the character itself.
869
870	\sa hasMirrored()
871	*/
872	uint QChar::mirroredChar(uint ucs4)
873	{
874	if (ucs4 > LAST_UNICODE_CHAR)
875	return ucs4;
876	return ucs4 + qGetProp(ucs4)->mirrorDiff;
877	}
878
879	/*!
880	\overload
881	Returns the mirrored character if the UCS-2-encoded character specified
882	by \a ucs2 is a mirrored character; otherwise returns the character itself.
883
884	\sa hasMirrored()
885	*/
886	ushort QChar::mirroredChar(ushort ucs2)
887	{
888	return ucs2 + qGetProp(ucs2)->mirrorDiff;
889	}
890
891
892	enum {
893	Hangul_SBase = 0xac00,
894	Hangul_LBase = 0x1100,
895	Hangul_VBase = 0x1161,
896	Hangul_TBase = 0x11a7,
897	Hangul_SCount = 11172,
898	Hangul_LCount = 19,
899	Hangul_VCount = 21,
900	Hangul_TCount = 28,
901	Hangul_NCount = 21*28
902	};
903
904	// buffer has to have a length of 3. It's needed for Hangul decomposition
905	static const unsigned short * QT_FASTCALL decompositionHelper
906	(uint ucs4, int length, int tag, unsigned short *buffer)
907	{
908	*length = 0;
909	if (ucs4 > LAST_UNICODE_CHAR)
910	return 0;
911	if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
912	int SIndex = ucs4 - Hangul_SBase;
913	buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
914	buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
915	buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
916	*length = buffer[2] == Hangul_TBase ? 2 : 3;
917	*tag = QChar::Canonical;
918	return buffer;
919	}
920
921	const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
922	if (index == 0xffff)
923	return 0;
924	const unsigned short *decomposition = uc_decomposition_map+index;
925	tag = (decomposition) & 0xff;
926	length = (decomposition) >> 8;
927	return decomposition+1;
928	}
929
930	/*!
931	Decomposes a character into its parts. Returns an empty string if
932	no decomposition exists.
933	*/
934	QString QChar::decomposition() const
935	{
936	return decomposition(ucs);
937	}
938
939	/*!
940	\overload
941	Decomposes the UCS-4-encoded character specified by \a ucs4 into its
942	constituent parts. Returns an empty string if no decomposition exists.
943	*/
944	QString QChar::decomposition(uint ucs4)
945	{
946	unsigned short buffer[3];
947	int length;
948	int tag;
949	const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
950	return QString::fromUtf16(d, length);
951	}
952
953	/*!
954	Returns the tag defining the composition of the character. Returns
955	QChar::Single if no decomposition exists.
956	*/
957	QChar::Decomposition QChar::decompositionTag() const
958	{
959	return decompositionTag(ucs);
960	}
961
962	/*!
963	\overload
964	Returns the tag defining the composition of the UCS-4-encoded character
965	specified by \a ucs4. Returns QChar::Single if no decomposition exists.
966	*/
967	QChar::Decomposition QChar::decompositionTag(uint ucs4)
968	{
969	if (ucs4 > LAST_UNICODE_CHAR)
970	return QChar::NoDecomposition;
971	const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
972	if (index == 0xffff)
973	return QChar::NoDecomposition;
974	return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff);
975	}
976
977	/*!
978	Returns the combining class for the character as defined in the
979	Unicode standard. This is mainly useful as a positioning hint for
980	marks attached to a base character.
981
982	The Qt text rendering engine uses this information to correctly
983	position non-spacing marks around a base character.
984	*/
985	unsigned char QChar::combiningClass() const
986	{
987	return (unsigned char) qGetProp(ucs)->combiningClass;
988	}
989
990	/*! \overload
991	Returns the combining class for the UCS-4-encoded character specified by
992	\a ucs4, as defined in the Unicode standard.
993	*/
994	unsigned char QChar::combiningClass(uint ucs4)
995	{
996	if (ucs4 > LAST_UNICODE_CHAR)
997	return 0;
998	return (unsigned char) qGetProp(ucs4)->combiningClass;
999	}
1000
1001	/*! \overload
1002	Returns the combining class for the UCS-2-encoded character specified by
1003	\a ucs2, as defined in the Unicode standard.
1004	*/
1005	unsigned char QChar::combiningClass(ushort ucs2)
1006	{
1007	return (unsigned char) qGetProp(ucs2)->combiningClass;
1008	}
1009
1010
1011	/*!
1012	Returns the Unicode version that introduced this character.
1013	*/
1014	QChar::UnicodeVersion QChar::unicodeVersion() const
1015	{
1016	return (QChar::UnicodeVersion) qGetProp(ucs)->unicodeVersion;
1017	}
1018
1019	/*! \overload
1020	Returns the Unicode version that introduced the character specified in
1021	its UCS-4-encoded form as \a ucs4.
1022	*/
1023	QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4)
1024	{
1025	if (ucs4 > LAST_UNICODE_CHAR)
1026	return QChar::Unicode_Unassigned;
1027	return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion;
1028	}
1029
1030	/*! \overload
1031	Returns the Unicode version that introduced the character specified in
1032	its UCS-2-encoded form as \a ucs2.
1033	*/
1034	QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2)
1035	{
1036	return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion;
1037	}
1038
1039
1040	/*!
1041	Returns the lowercase equivalent if the character is uppercase or titlecase;
1042	otherwise returns the character itself.
1043	*/
1044	QChar QChar::toLower() const
1045	{
1046	const QUnicodeTables::Properties *p = qGetProp(ucs);
1047	if (!p->lowerCaseSpecial)
1048	return ucs + p->lowerCaseDiff;
1049	return ucs;
1050	}
1051
1052	/*! \overload
1053	Returns the lowercase equivalent of the UCS-4-encoded character specified
1054	by \a ucs4 if the character is uppercase or titlecase; otherwise returns
1055	the character itself.
1056	*/
1057	uint QChar::toLower(uint ucs4)
1058	{
1059	if (ucs4 > LAST_UNICODE_CHAR)
1060	return ucs4;
1061	const QUnicodeTables::Properties *p = qGetProp(ucs4);
1062	if (!p->lowerCaseSpecial)
1063	return ucs4 + p->lowerCaseDiff;
1064	return ucs4;
1065	}
1066
1067	/*! \overload
1068	Returns the lowercase equivalent of the UCS-2-encoded character specified
1069	by \a ucs2 if the character is uppercase or titlecase; otherwise returns
1070	the character itself.
1071	*/
1072	ushort QChar::toLower(ushort ucs2)
1073	{
1074	const QUnicodeTables::Properties *p = qGetProp(ucs2);
1075	if (!p->lowerCaseSpecial)
1076	return ucs2 + p->lowerCaseDiff;
1077	return ucs2;
1078	}
1079
1080	/*!
1081	Returns the uppercase equivalent if the character is lowercase or titlecase;
1082	otherwise returns the character itself.
1083	*/
1084	QChar QChar::toUpper() const
1085	{
1086	const QUnicodeTables::Properties *p = qGetProp(ucs);
1087	if (!p->upperCaseSpecial)
1088	return ucs + p->upperCaseDiff;
1089	return ucs;
1090	}
1091
1092	/*! \overload
1093	Returns the uppercase equivalent of the UCS-4-encoded character specified
1094	by \a ucs4 if the character is lowercase or titlecase; otherwise returns
1095	the character itself.
1096	*/
1097	uint QChar::toUpper(uint ucs4)
1098	{
1099	if (ucs4 > LAST_UNICODE_CHAR)
1100	return ucs4;
1101	const QUnicodeTables::Properties *p = qGetProp(ucs4);
1102	if (!p->upperCaseSpecial)
1103	return ucs4 + p->upperCaseDiff;
1104	return ucs4;
1105	}
1106
1107	/*! \overload
1108	Returns the uppercase equivalent of the UCS-2-encoded character specified
1109	by \a ucs2 if the character is lowercase or titlecase; otherwise returns
1110	the character itself.
1111	*/
1112	ushort QChar::toUpper(ushort ucs2)
1113	{
1114	const QUnicodeTables::Properties *p = qGetProp(ucs2);
1115	if (!p->upperCaseSpecial)
1116	return ucs2 + p->upperCaseDiff;
1117	return ucs2;
1118	}
1119
1120	/*!
1121	Returns the title case equivalent if the character is lowercase or uppercase;
1122	otherwise returns the character itself.
1123	*/
1124	QChar QChar::toTitleCase() const
1125	{
1126	const QUnicodeTables::Properties *p = qGetProp(ucs);
1127	if (!p->titleCaseSpecial)
1128	return ucs + p->titleCaseDiff;
1129	return ucs;
1130	}
1131
1132	/*!
1133	\overload
1134	Returns the title case equivalent of the UCS-4-encoded character specified
1135	by \a ucs4 if the character is lowercase or uppercase; otherwise returns
1136	the character itself.
1137	*/
1138	uint QChar::toTitleCase(uint ucs4)
1139	{
1140	if (ucs4 > LAST_UNICODE_CHAR)
1141	return ucs4;
1142	const QUnicodeTables::Properties *p = qGetProp(ucs4);
1143	if (!p->titleCaseSpecial)
1144	return ucs4 + p->titleCaseDiff;
1145	return ucs4;
1146	}
1147
1148	/*!
1149	\overload
1150	Returns the title case equivalent of the UCS-2-encoded character specified
1151	by \a ucs2 if the character is lowercase or uppercase; otherwise returns
1152	the character itself.
1153	*/
1154	ushort QChar::toTitleCase(ushort ucs2)
1155	{
1156	const QUnicodeTables::Properties *p = qGetProp(ucs2);
1157	if (!p->titleCaseSpecial)
1158	return ucs2 + p->titleCaseDiff;
1159	return ucs2;
1160	}
1161
1162
1163	static inline uint foldCase(const ushort ch, const ushort start)
1164	{
1165	uint c = *ch;
1166	if (QChar(c).isLowSurrogate() && ch > start && QChar(*(ch - 1)).isHighSurrogate())
1167	c = QChar::surrogateToUcs4(*(ch - 1), c);
1168	return *ch + qGetProp(c)->caseFoldDiff;
1169	}
1170
1171	static inline uint foldCase(uint ch, uint &last)
1172	{
1173	uint c = ch;
1174	if (QChar(c).isLowSurrogate() && QChar(last).isHighSurrogate())
1175	c = QChar::surrogateToUcs4(last, c);
1176	last = ch;
1177	return ch + qGetProp(c)->caseFoldDiff;
1178	}
1179
1180	static inline ushort foldCase(ushort ch)
1181	{
1182	return ch + qGetProp(ch)->caseFoldDiff;
1183	}
1184
1185	/*!
1186	Returns the case folded equivalent of the character. For most Unicode characters this
1187	is the same as toLowerCase().
1188	*/
1189	QChar QChar::toCaseFolded() const
1190	{
1191	return ucs + qGetProp(ucs)->caseFoldDiff;
1192	}
1193
1194	/*!
1195	\overload
1196	Returns the case folded equivalent of the UCS-4-encoded character specified
1197	by \a ucs4. For most Unicode characters this is the same as toLowerCase().
1198	*/
1199	uint QChar::toCaseFolded(uint ucs4)
1200	{
1201	if (ucs4 > LAST_UNICODE_CHAR)
1202	return ucs4;
1203	return ucs4 + qGetProp(ucs4)->caseFoldDiff;
1204	}
1205
1206	/*!
1207	\overload
1208	Returns the case folded equivalent of the UCS-2-encoded character specified
1209	by \a ucs2. For most Unicode characters this is the same as toLowerCase().
1210	*/
1211	ushort QChar::toCaseFolded(ushort ucs2)
1212	{
1213	return ucs2 + qGetProp(ucs2)->caseFoldDiff;
1214	}
1215
1216
1217	/*!
1218	\fn char QChar::latin1() const
1219
1220	Use toLatin1() instead.
1221	*/
1222
1223	/*!
1224	\fn char QChar::ascii() const
1225
1226	Use toAscii() instead.
1227	*/
1228
1229	/*!
1230	\fn char QChar::toLatin1() const
1231
1232	Returns the Latin-1 character equivalent to the QChar, or 0. This
1233	is mainly useful for non-internationalized software.
1234
1235	\sa toAscii(), unicode(), QTextCodec::codecForCStrings()
1236	*/
1237
1238	/*!
1239	\fn char QChar::toAscii() const
1240	Returns the character value of the QChar obtained using the current
1241	codec used to read C strings, or 0 if the character is not representable
1242	using this codec. The default codec handles Latin-1 encoded text,
1243	but this can be changed to assist developers writing source code using
1244	other encodings.
1245
1246	The main purpose of this function is to preserve ASCII characters used
1247	in C strings. This is mainly useful for developers of non-internationalized
1248	software.
1249
1250	\sa toLatin1(), unicode(), QTextCodec::codecForCStrings()
1251	*/
1252	#ifdef Q_COMPILER_MANGLES_RETURN_TYPE
1253	const char QChar::toAscii() const
1254	#else
1255	char QChar::toAscii() const
1256	#endif
1257	{
1258	#ifndef QT_NO_CODEC_FOR_C_STRINGS
1259	if (QTextCodec::codecForCStrings())
1260	// #####
1261	return QTextCodec::codecForCStrings()->fromUnicode(QString(*this)).at(0);
1262	#endif
1263	return ucs > 0xff ? 0 : char(ucs);
1264	}
1265
1266	/*!
1267	\fn QChar QChar::fromLatin1(char c)
1268
1269	Converts the Latin-1 character \a c to its equivalent QChar. This
1270	is mainly useful for non-internationalized software.
1271
1272	\sa fromAscii(), unicode(), QTextCodec::codecForCStrings()
1273	*/
1274
1275	/*!
1276	Converts the ASCII character \a c to its equivalent QChar. This
1277	is mainly useful for non-internationalized software.
1278
1279	An alternative is to use QLatin1Char.
1280
1281	\sa fromLatin1(), unicode(), QTextCodec::codecForCStrings()
1282	*/
1283	QChar QChar::fromAscii(char c)
1284	{
1285	#ifndef QT_NO_CODEC_FOR_C_STRINGS
1286	if (QTextCodec::codecForCStrings())
1287	// #####
1288	return QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
1289	#endif
1290	return QChar(ushort((uchar)c));
1291	}
1292
1293	#ifndef QT_NO_DATASTREAM
1294	/*!
1295	\relates QChar
1296
1297	Writes the char \a chr to the stream \a out.
1298
1299	\sa {Format of the QDataStream operators}
1300	*/
1301
1302	QDataStream &operator<<(QDataStream &out, const QChar &chr)
1303	{
1304	out << quint16(chr.unicode());
1305	return out;
1306	}
1307
1308
1309	/*!
1310	\relates QChar
1311
1312	Reads a char from the stream \a in into char \a chr.
1313
1314	\sa {Format of the QDataStream operators}
1315	*/
1316
1317	QDataStream &operator>>(QDataStream &in, QChar &chr)
1318	{
1319	quint16 u;
1320	in >> u;
1321	chr.unicode() = ushort(u);
1322	return in;
1323	}
1324	#endif
1325
1326	/*!
1327	\fn ushort & QChar::unicode()
1328
1329	Returns a reference to the numeric Unicode value of the QChar.
1330	*/
1331
1332	/*!
1333	\fn ushort QChar::unicode() const
1334
1335	\overload
1336	*/
1337
1338	/*****************************************************************************
1339	Documentation of QChar related functions
1340	*****************************************************************************/
1341
1342	/*!
1343	\fn bool operator==(QChar c1, QChar c2)
1344
1345	\relates QChar
1346
1347	Returns true if \a c1 and \a c2 are the same Unicode character;
1348	otherwise returns false.
1349	*/
1350
1351	/*!
1352	\fn int operator!=(QChar c1, QChar c2)
1353
1354	\relates QChar
1355
1356	Returns true if \a c1 and \a c2 are not the same Unicode
1357	character; otherwise returns false.
1358	*/
1359
1360	/*!
1361	\fn int operator<=(QChar c1, QChar c2)
1362
1363	\relates QChar
1364
1365	Returns true if the numeric Unicode value of \a c1 is less than
1366	or equal to that of \a c2; otherwise returns false.
1367	*/
1368
1369	/*!
1370	\fn int operator>=(QChar c1, QChar c2)
1371
1372	\relates QChar
1373
1374	Returns true if the numeric Unicode value of \a c1 is greater than
1375	or equal to that of \a c2; otherwise returns false.
1376	*/
1377
1378	/*!
1379	\fn int operator<(QChar c1, QChar c2)
1380
1381	\relates QChar
1382
1383	Returns true if the numeric Unicode value of \a c1 is less than
1384	that of \a c2; otherwise returns false.
1385	*/
1386
1387	/*!
1388	\fn int operator>(QChar c1, QChar c2)
1389
1390	\relates QChar
1391
1392	Returns true if the numeric Unicode value of \a c1 is greater than
1393	that of \a c2; otherwise returns false.
1394	*/
1395
1396	/*!
1397	\fn bool QChar::mirrored() const
1398
1399	Use hasMirrored() instead.
1400	*/
1401
1402	/*!
1403	\fn QChar QChar::lower() const
1404
1405	Use toLower() instead.
1406	*/
1407
1408	/*!
1409	\fn QChar QChar::upper() const
1410
1411	Use toUpper() instead.
1412	*/
1413
1414	/*!
1415	\fn bool QChar::networkOrdered()
1416
1417	See if QSysInfo::ByteOrder == QSysInfo::BigEndian instead.
1418	*/
1419
1420
1421	// ---------------------------------------------------------------------------
1422
1423
1424	static QString decomposeHelper
1425	(const QString &str, bool canonical, QChar::UnicodeVersion version)
1426	{
1427	unsigned short buffer[3];
1428
1429	QString s = str;
1430
1431	const unsigned short *utf16 = s.utf16();
1432	const unsigned short *uc = utf16 + s.length();
1433	while (uc != utf16) {
1434	uint ucs4 = *(--uc);
1435	if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
1436	ushort high = *(uc - 1);
1437	if (QChar(high).isHighSurrogate()) {
1438	--uc;
1439	ucs4 = QChar::surrogateToUcs4(high, ucs4);
1440	}
1441	}
1442	if (QChar::unicodeVersion(ucs4) > version)
1443	continue;
1444	int length;
1445	int tag;
1446	const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
1447	if (!d \|\| (canonical && tag != QChar::Canonical))
1448	continue;
1449
1450	s.replace(uc - utf16, ucs4 > 0x10000 ? 2 : 1, (const QChar *)d, length);
1451	// since the insert invalidates the pointers and we do decomposition recursive
1452	int pos = uc - utf16;
1453	utf16 = s.utf16();
1454	uc = utf16 + pos + length;
1455	}
1456
1457	return s;
1458	}
1459
1460
1461	static ushort ligatureHelper(ushort u1, ushort u2)
1462	{
1463	// hangul L-V pair
1464	int LIndex = u1 - Hangul_LBase;
1465	if (0 <= LIndex && LIndex < Hangul_LCount) {
1466	int VIndex = u2 - Hangul_VBase;
1467	if (0 <= VIndex && VIndex < Hangul_VCount)
1468	return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
1469	}
1470
1471	// hangul LV-T pair
1472	int SIndex = u1 - Hangul_SBase;
1473	if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
1474	int TIndex = u2 - Hangul_TBase;
1475	if (0 <= TIndex && TIndex <= Hangul_TCount)
1476	return u1 + TIndex;
1477	}
1478
1479	const unsigned short index = GET_LIGATURE_INDEX(u2);
1480	if (index == 0xffff)
1481	return 0;
1482	const unsigned short *ligatures = uc_ligature_map+index;
1483	ushort length = *ligatures;
1484	++ligatures;
1485	// ### use bsearch
1486	for (uint i = 0; i < length; ++i)
1487	if (ligatures[2*i] == u1)
1488	return ligatures[2*i+1];
1489	return 0;
1490	}
1491
1492	static QString composeHelper(const QString &str)
1493	{
1494	QString s = str;
1495
1496	if (s.length() < 2)
1497	return s;
1498
1499	// the loop can partly ignore high Unicode as all ligatures are in the BMP
1500	int starter = 0;
1501	int lastCombining = 0;
1502	int pos = 0;
1503	while (pos < s.length()) {
1504	uint uc = s.utf16()[pos];
1505	if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
1506	ushort low = s.utf16()[pos+1];
1507	if (QChar(low).isLowSurrogate()) {
1508	uc = QChar::surrogateToUcs4(uc, low);
1509	++pos;
1510	}
1511	}
1512	int combining = QChar::combiningClass(uc);
1513	if (starter == pos - 1 \|\| combining > lastCombining) {
1514	// allowed to form ligature with S
1515	QChar ligature = ligatureHelper(s.utf16()[starter], uc);
1516	if (ligature.unicode()) {
1517	s[starter] = ligature;
1518	s.remove(pos, 1);
1519	continue;
1520	}
1521	}
1522	if (!combining)
1523	starter = pos;
1524	lastCombining = combining;
1525	++pos;
1526	}
1527	return s;
1528	}
1529
1530
1531	static QString canonicalOrderHelper
1532	(const QString &str, QChar::UnicodeVersion version)
1533	{
1534	QString s = str;
1535	const int l = s.length()-1;
1536	int pos = 0;
1537	while (pos < l) {
1538	int p2 = pos+1;
1539	uint u1 = s.at(pos).unicode();
1540	if (QChar(u1).isHighSurrogate()) {
1541	ushort low = s.at(pos+1).unicode();
1542	if (QChar(low).isLowSurrogate()) {
1543	p2++;
1544	u1 = QChar::surrogateToUcs4(u1, low);
1545	if (p2 >= l)
1546	break;
1547	}
1548	}
1549	uint u2 = s.at(p2).unicode();
1550	if (QChar(u2).isHighSurrogate() && p2 < l-1) {
1551	ushort low = s.at(p2+1).unicode();
1552	if (QChar(low).isLowSurrogate()) {
1553	p2++;
1554	u2 = QChar::surrogateToUcs4(u2, low);
1555	}
1556	}
1557
1558	int c2 = QChar::combiningClass(u2);
1559	if (QChar::unicodeVersion(u2) > version)
1560	c2 = 0;
1561
1562	if (c2 == 0) {
1563	pos = p2+1;
1564	continue;
1565	}
1566	int c1 = QChar::combiningClass(u1);
1567	if (QChar::unicodeVersion(u1) > version)
1568	c1 = 0;
1569
1570	if (c1 > c2) {
1571	QChar *uc = s.data();
1572	int p = pos;
1573	// exchange characters
1574	if (u2 < 0x10000) {
1575	uc[p++] = u2;
1576	} else {
1577	uc[p++] = QChar::highSurrogate(u2);
1578	uc[p++] = QChar::lowSurrogate(u2);
1579	}
1580	if (u1 < 0x10000) {
1581	uc[p++] = u1;
1582	} else {
1583	uc[p++] = QChar::highSurrogate(u1);
1584	uc[p++] = QChar::lowSurrogate(u1);
1585	}
1586	if (pos > 0)
1587	--pos;
1588	if (pos > 0 && s.at(pos).isLowSurrogate())
1589	--pos;
1590	} else {
1591	++pos;
1592	if (u1 > 0x10000)
1593	++pos;
1594	}
1595	}
1596	return s;
1597	}
1598
1599	int QT_FASTCALL QUnicodeTables::script(unsigned int uc)
1600	{
1601	if (uc > 0xffff)
1602	return Common;
1603	int script = uc_scripts[uc >> 7];
1604	if (script < ScriptSentinel)
1605	return script;
1606	script = (((script - ScriptSentinel) * UnicodeBlockSize) + UnicodeBlockCount);
1607	script = uc_scripts[script + (uc & 0x7f)];
1608	return script;
1609	}
1610
1611
1612	Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL QUnicodeTables::lineBreakClass(uint ucs4)
1613	{
1614	return (QUnicodeTables::LineBreakClass) qGetProp(ucs4)->line_break_class;
1615	}
1616
1617
1618	QT_END_NAMESPACE

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/src/corelib/tools/qchar.cpp@ 17

Download in other formats: