source: trunk/src/corelib/tools/qtextboundaryfinder.cpp@ 788

Last change on this file since 788 was 651, checked in by Dmitry A. Kuminov, 15 years ago

trunk: Merged in qt 4.6.2 sources.

File size: 13.5 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
4** All rights reserved.
5** Contact: Nokia Corporation ([email protected])
6**
7** This file is part of the QtCore module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial Usage
11** Licensees holding valid Qt Commercial licenses may use this file in
12** accordance with the Qt Commercial License Agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and Nokia.
15**
16** GNU Lesser General Public License Usage
17** Alternatively, this file may be used under the terms of the GNU Lesser
18** General Public License version 2.1 as published by the Free Software
19** Foundation and appearing in the file LICENSE.LGPL included in the
20** packaging of this file. Please review the following information to
21** ensure the GNU Lesser General Public License version 2.1 requirements
22** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23**
24** In addition, as a special exception, Nokia gives you certain additional
25** rights. These rights are described in the Nokia Qt LGPL Exception
26** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you have questions regarding the use of this file, please contact
37** Nokia at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41#include <QtCore/qtextboundaryfinder.h>
42#include <QtCore/qvarlengtharray.h>
43#include <private/qunicodetables_p.h>
44#include <qdebug.h>
45#include "private/qharfbuzz_p.h"
46
47QT_BEGIN_NAMESPACE
48
49class QTextBoundaryFinderPrivate
50{
51public:
52 HB_CharAttributes attributes[1];
53};
54
55static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int length, HB_CharAttributes *attributes)
56{
57 QVarLengthArray<HB_ScriptItem> scriptItems;
58
59 const ushort *string = reinterpret_cast<const ushort *>(chars);
60 const ushort *unicode = string;
61 // correctly assign script, isTab and isObject to the script analysis
62 const ushort *uc = unicode;
63 const ushort *e = uc + length;
64 int script = QUnicodeTables::Common;
65 int lastScript = QUnicodeTables::Common;
66 const ushort *start = uc;
67 while (uc < e) {
68 int s = QUnicodeTables::script(*uc);
69 if (s != QUnicodeTables::Inherited)
70 script = s;
71 if (*uc == QChar::ObjectReplacementCharacter || *uc == QChar::LineSeparator || *uc == 9)
72 script = QUnicodeTables::Common;
73 if (script != lastScript) {
74 if (uc != start) {
75 HB_ScriptItem item;
76 item.pos = start - string;
77 item.length = uc - start;
78 item.script = (HB_Script)lastScript;
79 item.bidiLevel = 0; // ### what's the proper value?
80 scriptItems.append(item);
81 start = uc;
82 }
83 lastScript = script;
84 }
85 ++uc;
86 }
87 if (uc != start) {
88 HB_ScriptItem item;
89 item.pos = start - string;
90 item.length = uc - start;
91 item.script = (HB_Script)lastScript;
92 item.bidiLevel = 0; // ### what's the proper value?
93 scriptItems.append(item);
94 }
95
96 qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes);
97 if (type == QTextBoundaryFinder::Word)
98 HB_GetWordBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
99 else if (type == QTextBoundaryFinder::Sentence)
100 HB_GetSentenceBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
101}
102
103/*! \class QTextBoundaryFinder
104
105 \brief The QTextBoundaryFinder class provides a way of finding Unicode text boundaries in a string.
106
107 \since 4.4
108 \ingroup tools
109 \ingroup shared
110 \ingroup string-processing
111 \reentrant
112
113 QTextBoundaryFinder allows to find Unicode text boundaries in a
114 string, similar to the Unicode text boundary specification (see
115 http://www.unicode.org/reports/tr29/tr29-11.html).
116
117 QTextBoundaryFinder can operate on a QString in four possible
118 modes depending on the value of \a BoundaryType.
119
120 Units of Unicode characters that make up what the user thinks of
121 as a character or basic unit of the language are here called
122 Grapheme clusters. The two unicode characters 'A' + diaeresis do
123 for example form one grapheme cluster as the user thinks of them
124 as one character, yet it is in this case represented by two
125 unicode code points.
126
127 Word boundaries are there to locate the start and end of what a
128 language considers to be a word.
129
130 Line break boundaries give possible places where a line break
131 might happen and sentence boundaries will show the beginning and
132 end of whole sentences.
133*/
134
135/*!
136 \enum QTextBoundaryFinder::BoundaryType
137
138 \value Grapheme Finds a grapheme which is the smallest boundary. It
139 including letters, punctation marks, numerals and more.
140 \value Word Finds a word.
141 \value Line Finds possible positions for breaking the text into multiple
142 lines.
143 \value Sentence Finds sentence boundaries. These include periods, question
144 marks etc.
145*/
146
147/*!
148 \enum QTextBoundaryFinder::BoundaryReason
149
150 \value NotAtBoundary The boundary finder is not at a boundary position.
151 \value StartWord The boundary finder is at the start of a word.
152 \value EndWord The boundary finder is at the end of a word.
153*/
154
155/*!
156 Constructs an invalid QTextBoundaryFinder object.
157*/
158QTextBoundaryFinder::QTextBoundaryFinder()
159 : t(Grapheme)
160 , chars(0)
161 , length(0)
162 , freePrivate(true)
163 , d(0)
164{
165}
166
167/*!
168 Copies the QTextBoundaryFinder object, \a other.
169*/
170QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other)
171 : t(other.t)
172 , s(other.s)
173 , chars(other.chars)
174 , length(other.length)
175 , pos(other.pos)
176 , freePrivate(true)
177{
178 d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
179 Q_CHECK_PTR(d);
180 memcpy(d, other.d, length*sizeof(HB_CharAttributes));
181}
182
183/*!
184 Assigns the object, \a other, to another QTextBoundaryFinder object.
185*/
186QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &other)
187{
188 if (&other == this)
189 return *this;
190
191 t = other.t;
192 s = other.s;
193 chars = other.chars;
194 length = other.length;
195 pos = other.pos;
196 freePrivate = true;
197
198 QTextBoundaryFinderPrivate *newD = (QTextBoundaryFinderPrivate *)
199 realloc(d, length*sizeof(HB_CharAttributes));
200 Q_CHECK_PTR(newD);
201 d = newD;
202 memcpy(d, other.d, length*sizeof(HB_CharAttributes));
203
204 return *this;
205}
206
207/*!
208 Destructs the QTextBoundaryFinder object.
209*/
210QTextBoundaryFinder::~QTextBoundaryFinder()
211{
212 if (freePrivate)
213 free(d);
214}
215
216/*!
217 Creates a QTextBoundaryFinder object of \a type operating on \a string.
218*/
219QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &string)
220 : t(type)
221 , s(string)
222 , chars(string.unicode())
223 , length(string.length())
224 , pos(0)
225 , freePrivate(true)
226{
227 d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
228 Q_CHECK_PTR(d);
229 init(t, chars, length, d->attributes);
230}
231
232/*!
233 Creates a QTextBoundaryFinder object of \a type operating on \a chars
234 with \a length.
235
236 \a buffer is an optional working buffer of size \a bufferSize you can pass to
237 the QTextBoundaryFinder. If the buffer is large enough to hold the working
238 data required, it will use this instead of allocating its own buffer.
239
240 \warning QTextBoundaryFinder does not create a copy of \a chars. It is the
241 application programmer's responsability to ensure the array is allocated for
242 as long as the QTextBoundaryFinder object stays alive. The same applies to
243 \a buffer.
244*/
245QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, int length, unsigned char *buffer, int bufferSize)
246 : t(type)
247 , chars(chars)
248 , length(length)
249 , pos(0)
250{
251 if (buffer && (uint)bufferSize >= length*sizeof(HB_CharAttributes)) {
252 d = (QTextBoundaryFinderPrivate *)buffer;
253 freePrivate = false;
254 } else {
255 d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
256 Q_CHECK_PTR(d);
257 freePrivate = true;
258 }
259 init(t, chars, length, d->attributes);
260}
261
262/*!
263 Moves the finder to the start of the string. This is equivalent to setPosition(0).
264
265 \sa setPosition(), position()
266*/
267void QTextBoundaryFinder::toStart()
268{
269 pos = 0;
270}
271
272/*!
273 Moves the finder to the end of the string. This is equivalent to setPosition(string.length()).
274
275 \sa setPosition(), position()
276*/
277void QTextBoundaryFinder::toEnd()
278{
279 pos = length;
280}
281
282/*!
283 Returns the current position of the QTextBoundaryFinder.
284
285 The range is from 0 (the beginning of the string) to the length of
286 the string inclusive.
287
288 \sa setPosition()
289*/
290int QTextBoundaryFinder::position() const
291{
292 return pos;
293}
294
295/*!
296 Sets the current position of the QTextBoundaryFinder to \a position.
297
298 If \a position is out of bounds, it will be bound to only valid
299 positions. In this case, valid positions are from 0 to the length of
300 the string inclusive.
301
302 \sa position()
303*/
304void QTextBoundaryFinder::setPosition(int position)
305{
306 pos = qBound(0, position, length);
307}
308
309/*! \fn QTextBoundaryFinder::BoundaryType QTextBoundaryFinder::type() const
310
311 Returns the type of the QTextBoundaryFinder.
312*/
313
314/*! \fn bool QTextBoundaryFinder::isValid() const
315
316 Returns true if the text boundary finder is valid; otherwise returns false.
317 A default QTextBoundaryFinder is invalid.
318*/
319
320/*!
321 Returns the string the QTextBoundaryFinder object operates on.
322*/
323QString QTextBoundaryFinder::string() const
324{
325 if (chars == s.unicode() && length == s.length())
326 return s;
327 return QString(chars, length);
328}
329
330
331/*!
332 Moves the QTextBoundaryFinder to the next boundary position and returns that position.
333
334 Returns -1 is there is no next boundary.
335*/
336int QTextBoundaryFinder::toNextBoundary()
337{
338 if (!d) {
339 pos = -1;
340 return pos;
341 }
342
343 if (pos < 0 || pos >= length) {
344 pos = -1;
345 return pos;
346 }
347 ++pos;
348 if (pos == length)
349 return pos;
350
351 switch(t) {
352 case Grapheme:
353 while (pos < length && !d->attributes[pos].charStop)
354 ++pos;
355 break;
356 case Word:
357 while (pos < length && !d->attributes[pos].wordBoundary)
358 ++pos;
359 break;
360 case Sentence:
361 while (pos < length && !d->attributes[pos].sentenceBoundary)
362 ++pos;
363 break;
364 case Line:
365 while (pos < length && d->attributes[pos].lineBreakType < HB_Break)
366 ++pos;
367 break;
368 }
369
370 return pos;
371}
372
373/*!
374 Moves the QTextBoundaryFinder to the previous boundary position and returns that position.
375
376 Returns -1 is there is no previous boundary.
377*/
378int QTextBoundaryFinder::toPreviousBoundary()
379{
380 if (!d) {
381 pos = -1;
382 return pos;
383 }
384
385 if (pos <= 0 || pos > length) {
386 pos = -1;
387 return pos;
388 }
389 --pos;
390 if (pos == 0)
391 return pos;
392
393 switch(t) {
394 case Grapheme:
395 while (pos > 0 && !d->attributes[pos].charStop)
396 --pos;
397 break;
398 case Word:
399 while (pos > 0 && !d->attributes[pos].wordBoundary)
400 --pos;
401 break;
402 case Sentence:
403 while (pos > 0 && !d->attributes[pos].sentenceBoundary)
404 --pos;
405 break;
406 case Line:
407 while (pos > 0 && d->attributes[pos].lineBreakType < HB_Break)
408 --pos;
409 break;
410 }
411
412 return pos;
413}
414
415/*!
416 Returns true if the object's position() is currently at a valid text boundary.
417*/
418bool QTextBoundaryFinder::isAtBoundary() const
419{
420 if (!d || pos < 0)
421 return false;
422
423 if (pos == length)
424 return true;
425
426 switch(t) {
427 case Grapheme:
428 return d->attributes[pos].charStop;
429 case Word:
430 return d->attributes[pos].wordBoundary;
431 case Line:
432 return d->attributes[pos].lineBreakType >= HB_Break;
433 case Sentence:
434 return d->attributes[pos].sentenceBoundary;
435 }
436 return false;
437}
438
439/*!
440 Returns the reasons for the boundary finder to have chosen the current position as a boundary.
441*/
442QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const
443{
444 if (!d)
445 return NotAtBoundary;
446 if (! isAtBoundary())
447 return NotAtBoundary;
448 if (pos == 0) {
449 if (d->attributes[pos].whiteSpace)
450 return NotAtBoundary;
451 return StartWord;
452 }
453 if (pos >= length - 1) {
454 if (d->attributes[length-1].whiteSpace)
455 return NotAtBoundary;
456 return EndWord;
457 }
458
459 BoundaryReasons answer;
460 const bool nextIsSpace = d->attributes[pos + 1].whiteSpace;
461 const bool prevIsSpace = d->attributes[pos - 1].whiteSpace;
462
463 if (d->attributes[pos].whiteSpace)
464 answer = EndWord;
465 else if (!prevIsSpace) {
466 answer = StartWord;
467 answer |= EndWord;
468 }
469
470 if (prevIsSpace)
471 answer |= StartWord;
472 if (nextIsSpace)
473 answer |= EndWord;
474 if (answer == 0) {
475 answer = StartWord;
476 answer |= EndWord;
477 }
478
479 return answer;
480}
481
482QT_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.