source: trunk/src/corelib/tools/qtextboundaryfinder.cpp@ 890

Last change on this file since 890 was 846, checked in by Dmitry A. Kuminov, 15 years ago

trunk: Merged in qt 4.7.2 sources from branches/vendor/nokia/qt.

File size: 13.8 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
4** All rights reserved.
5** Contact: Nokia Corporation ([email protected])
6**
7** This file is part of the QtCore module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial Usage
11** Licensees holding valid Qt Commercial licenses may use this file in
12** accordance with the Qt Commercial License Agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and Nokia.
15**
16** GNU Lesser General Public License Usage
17** Alternatively, this file may be used under the terms of the GNU Lesser
18** General Public License version 2.1 as published by the Free Software
19** Foundation and appearing in the file LICENSE.LGPL included in the
20** packaging of this file. Please review the following information to
21** ensure the GNU Lesser General Public License version 2.1 requirements
22** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23**
24** In addition, as a special exception, Nokia gives you certain additional
25** rights. These rights are described in the Nokia Qt LGPL Exception
26** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you have questions regarding the use of this file, please contact
37** Nokia at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41#include <QtCore/qtextboundaryfinder.h>
42#include <QtCore/qvarlengtharray.h>
43#include <private/qunicodetables_p.h>
44#include <qdebug.h>
45#include "private/qharfbuzz_p.h"
46
47QT_BEGIN_NAMESPACE
48
49class QTextBoundaryFinderPrivate
50{
51public:
52 HB_CharAttributes attributes[1];
53};
54
55static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int length, HB_CharAttributes *attributes)
56{
57 QVarLengthArray<HB_ScriptItem> scriptItems;
58
59 const ushort *string = reinterpret_cast<const ushort *>(chars);
60 const ushort *unicode = string;
61 // correctly assign script, isTab and isObject to the script analysis
62 const ushort *uc = unicode;
63 const ushort *e = uc + length;
64 int script = QUnicodeTables::Common;
65 int lastScript = QUnicodeTables::Common;
66 const ushort *start = uc;
67 while (uc < e) {
68 int s = QUnicodeTables::script(*uc);
69 if (s != QUnicodeTables::Inherited)
70 script = s;
71 if (*uc == QChar::ObjectReplacementCharacter || *uc == QChar::LineSeparator || *uc == 9)
72 script = QUnicodeTables::Common;
73 if (script != lastScript) {
74 if (uc != start) {
75 HB_ScriptItem item;
76 item.pos = start - string;
77 item.length = uc - start;
78 item.script = (HB_Script)lastScript;
79 item.bidiLevel = 0; // ### what's the proper value?
80 scriptItems.append(item);
81 start = uc;
82 }
83 lastScript = script;
84 }
85 ++uc;
86 }
87 if (uc != start) {
88 HB_ScriptItem item;
89 item.pos = start - string;
90 item.length = uc - start;
91 item.script = (HB_Script)lastScript;
92 item.bidiLevel = 0; // ### what's the proper value?
93 scriptItems.append(item);
94 }
95
96 qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes);
97 if (type == QTextBoundaryFinder::Word)
98 HB_GetWordBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
99 else if (type == QTextBoundaryFinder::Sentence)
100 HB_GetSentenceBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
101}
102
103/*!
104 \class QTextBoundaryFinder
105
106 \brief The QTextBoundaryFinder class provides a way of finding Unicode text boundaries in a string.
107
108 \since 4.4
109 \ingroup tools
110 \ingroup shared
111 \ingroup string-processing
112 \reentrant
113
114 QTextBoundaryFinder allows to find Unicode text boundaries in a
115 string, similar to the Unicode text boundary specification (see
116 http://www.unicode.org/reports/tr29/tr29-11.html).
117
118 QTextBoundaryFinder can operate on a QString in four possible
119 modes depending on the value of \a BoundaryType.
120
121 Units of Unicode characters that make up what the user thinks of
122 as a character or basic unit of the language are here called
123 Grapheme clusters. The two unicode characters 'A' + diaeresis do
124 for example form one grapheme cluster as the user thinks of them
125 as one character, yet it is in this case represented by two
126 unicode code points.
127
128 Word boundaries are there to locate the start and end of what a
129 language considers to be a word.
130
131 Line break boundaries give possible places where a line break
132 might happen and sentence boundaries will show the beginning and
133 end of whole sentences.
134
135 The first position in a string is always a valid boundary and
136 refers to the position before the first character. The last
137 position at the length of the string is also valid and refers
138 to the position after the last character.
139*/
140
141/*!
142 \enum QTextBoundaryFinder::BoundaryType
143
144 \value Grapheme Finds a grapheme which is the smallest boundary. It
145 including letters, punctation marks, numerals and more.
146 \value Word Finds a word.
147 \value Line Finds possible positions for breaking the text into multiple
148 lines.
149 \value Sentence Finds sentence boundaries. These include periods, question
150 marks etc.
151*/
152
153/*!
154 \enum QTextBoundaryFinder::BoundaryReason
155
156 \value NotAtBoundary The boundary finder is not at a boundary position.
157 \value StartWord The boundary finder is at the start of a word.
158 \value EndWord The boundary finder is at the end of a word.
159*/
160
161/*!
162 Constructs an invalid QTextBoundaryFinder object.
163*/
164QTextBoundaryFinder::QTextBoundaryFinder()
165 : t(Grapheme)
166 , chars(0)
167 , length(0)
168 , freePrivate(true)
169 , d(0)
170{
171}
172
173/*!
174 Copies the QTextBoundaryFinder object, \a other.
175*/
176QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other)
177 : t(other.t)
178 , s(other.s)
179 , chars(other.chars)
180 , length(other.length)
181 , pos(other.pos)
182 , freePrivate(true)
183{
184 d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
185 Q_CHECK_PTR(d);
186 memcpy(d, other.d, length*sizeof(HB_CharAttributes));
187}
188
189/*!
190 Assigns the object, \a other, to another QTextBoundaryFinder object.
191*/
192QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &other)
193{
194 if (&other == this)
195 return *this;
196
197 t = other.t;
198 s = other.s;
199 chars = other.chars;
200 length = other.length;
201 pos = other.pos;
202 freePrivate = true;
203
204 QTextBoundaryFinderPrivate *newD = (QTextBoundaryFinderPrivate *)
205 realloc(d, length*sizeof(HB_CharAttributes));
206 Q_CHECK_PTR(newD);
207 d = newD;
208 memcpy(d, other.d, length*sizeof(HB_CharAttributes));
209
210 return *this;
211}
212
213/*!
214 Destructs the QTextBoundaryFinder object.
215*/
216QTextBoundaryFinder::~QTextBoundaryFinder()
217{
218 if (freePrivate)
219 free(d);
220}
221
222/*!
223 Creates a QTextBoundaryFinder object of \a type operating on \a string.
224*/
225QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &string)
226 : t(type)
227 , s(string)
228 , chars(string.unicode())
229 , length(string.length())
230 , pos(0)
231 , freePrivate(true)
232{
233 d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
234 Q_CHECK_PTR(d);
235 init(t, chars, length, d->attributes);
236}
237
238/*!
239 Creates a QTextBoundaryFinder object of \a type operating on \a chars
240 with \a length.
241
242 \a buffer is an optional working buffer of size \a bufferSize you can pass to
243 the QTextBoundaryFinder. If the buffer is large enough to hold the working
244 data required, it will use this instead of allocating its own buffer.
245
246 \warning QTextBoundaryFinder does not create a copy of \a chars. It is the
247 application programmer's responsibility to ensure the array is allocated for
248 as long as the QTextBoundaryFinder object stays alive. The same applies to
249 \a buffer.
250*/
251QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, int length, unsigned char *buffer, int bufferSize)
252 : t(type)
253 , chars(chars)
254 , length(length)
255 , pos(0)
256{
257 if (buffer && (uint)bufferSize >= length*sizeof(HB_CharAttributes)) {
258 d = (QTextBoundaryFinderPrivate *)buffer;
259 freePrivate = false;
260 } else {
261 d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
262 Q_CHECK_PTR(d);
263 freePrivate = true;
264 }
265 init(t, chars, length, d->attributes);
266}
267
268/*!
269 Moves the finder to the start of the string. This is equivalent to setPosition(0).
270
271 \sa setPosition(), position()
272*/
273void QTextBoundaryFinder::toStart()
274{
275 pos = 0;
276}
277
278/*!
279 Moves the finder to the end of the string. This is equivalent to setPosition(string.length()).
280
281 \sa setPosition(), position()
282*/
283void QTextBoundaryFinder::toEnd()
284{
285 pos = length;
286}
287
288/*!
289 Returns the current position of the QTextBoundaryFinder.
290
291 The range is from 0 (the beginning of the string) to the length of
292 the string inclusive.
293
294 \sa setPosition()
295*/
296int QTextBoundaryFinder::position() const
297{
298 return pos;
299}
300
301/*!
302 Sets the current position of the QTextBoundaryFinder to \a position.
303
304 If \a position is out of bounds, it will be bound to only valid
305 positions. In this case, valid positions are from 0 to the length of
306 the string inclusive.
307
308 \sa position()
309*/
310void QTextBoundaryFinder::setPosition(int position)
311{
312 pos = qBound(0, position, length);
313}
314
315/*! \fn QTextBoundaryFinder::BoundaryType QTextBoundaryFinder::type() const
316
317 Returns the type of the QTextBoundaryFinder.
318*/
319
320/*! \fn bool QTextBoundaryFinder::isValid() const
321
322 Returns true if the text boundary finder is valid; otherwise returns false.
323 A default QTextBoundaryFinder is invalid.
324*/
325
326/*!
327 Returns the string the QTextBoundaryFinder object operates on.
328*/
329QString QTextBoundaryFinder::string() const
330{
331 if (chars == s.unicode() && length == s.length())
332 return s;
333 return QString(chars, length);
334}
335
336
337/*!
338 Moves the QTextBoundaryFinder to the next boundary position and returns that position.
339
340 Returns -1 if there is no next boundary.
341*/
342int QTextBoundaryFinder::toNextBoundary()
343{
344 if (!d) {
345 pos = -1;
346 return pos;
347 }
348
349 if (pos < 0 || pos >= length) {
350 pos = -1;
351 return pos;
352 }
353 ++pos;
354 if (pos == length)
355 return pos;
356
357 switch(t) {
358 case Grapheme:
359 while (pos < length && !d->attributes[pos].charStop)
360 ++pos;
361 break;
362 case Word:
363 while (pos < length && !d->attributes[pos].wordBoundary)
364 ++pos;
365 break;
366 case Sentence:
367 while (pos < length && !d->attributes[pos].sentenceBoundary)
368 ++pos;
369 break;
370 case Line:
371 Q_ASSERT(pos);
372 while (pos < length && d->attributes[pos-1].lineBreakType < HB_Break)
373 ++pos;
374 break;
375 }
376
377 return pos;
378}
379
380/*!
381 Moves the QTextBoundaryFinder to the previous boundary position and returns that position.
382
383 Returns -1 if there is no previous boundary.
384*/
385int QTextBoundaryFinder::toPreviousBoundary()
386{
387 if (!d) {
388 pos = -1;
389 return pos;
390 }
391
392 if (pos <= 0 || pos > length) {
393 pos = -1;
394 return pos;
395 }
396 --pos;
397 if (pos == 0)
398 return pos;
399
400 switch(t) {
401 case Grapheme:
402 while (pos > 0 && !d->attributes[pos].charStop)
403 --pos;
404 break;
405 case Word:
406 while (pos > 0 && !d->attributes[pos].wordBoundary)
407 --pos;
408 break;
409 case Sentence:
410 while (pos > 0 && !d->attributes[pos].sentenceBoundary)
411 --pos;
412 break;
413 case Line:
414 while (pos > 0 && d->attributes[pos-1].lineBreakType < HB_Break)
415 --pos;
416 break;
417 }
418
419 return pos;
420}
421
422/*!
423 Returns true if the object's position() is currently at a valid text boundary.
424*/
425bool QTextBoundaryFinder::isAtBoundary() const
426{
427 if (!d || pos < 0)
428 return false;
429
430 if (pos == length)
431 return true;
432
433 switch(t) {
434 case Grapheme:
435 return d->attributes[pos].charStop;
436 case Word:
437 return d->attributes[pos].wordBoundary;
438 case Line:
439 return (pos > 0) ? d->attributes[pos-1].lineBreakType >= HB_Break : true;
440 case Sentence:
441 return d->attributes[pos].sentenceBoundary;
442 }
443 return false;
444}
445
446/*!
447 Returns the reasons for the boundary finder to have chosen the current position as a boundary.
448*/
449QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const
450{
451 if (!d)
452 return NotAtBoundary;
453 if (! isAtBoundary())
454 return NotAtBoundary;
455 if (pos == 0) {
456 if (d->attributes[pos].whiteSpace)
457 return NotAtBoundary;
458 return StartWord;
459 }
460 if (pos >= length - 1) {
461 if (d->attributes[length-1].whiteSpace)
462 return NotAtBoundary;
463 return EndWord;
464 }
465
466 BoundaryReasons answer;
467 const bool nextIsSpace = d->attributes[pos + 1].whiteSpace;
468 const bool prevIsSpace = d->attributes[pos - 1].whiteSpace;
469
470 if (d->attributes[pos].whiteSpace)
471 answer = EndWord;
472 else if (!prevIsSpace) {
473 answer = StartWord;
474 answer |= EndWord;
475 }
476
477 if (prevIsSpace)
478 answer |= StartWord;
479 if (nextIsSpace)
480 answer |= EndWord;
481 if (answer == 0) {
482 answer = StartWord;
483 answer |= EndWord;
484 }
485
486 return answer;
487}
488
489QT_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.