source: trunk/src/corelib/tools/qtextboundaryfinder.cpp@ 5

Last change on this file since 5 was 2, checked in by Dmitry A. Kuminov, 16 years ago

Initially imported qt-all-opensource-src-4.5.1 from Trolltech.

File size: 13.4 KB
Line 
1/****************************************************************************
2**
3** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4** Contact: Qt Software Information ([email protected])
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial Usage
10** Licensees holding valid Qt Commercial licenses may use this file in
11** accordance with the Qt Commercial License Agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Nokia.
14**
15** GNU Lesser General Public License Usage
16** Alternatively, this file may be used under the terms of the GNU Lesser
17** General Public License version 2.1 as published by the Free Software
18** Foundation and appearing in the file LICENSE.LGPL included in the
19** packaging of this file. Please review the following information to
20** ensure the GNU Lesser General Public License version 2.1 requirements
21** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
22**
23** In addition, as a special exception, Nokia gives you certain
24** additional rights. These rights are described in the Nokia Qt LGPL
25** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
26** package.
27**
28** GNU General Public License Usage
29** Alternatively, this file may be used under the terms of the GNU
30** General Public License version 3.0 as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL included in the
32** packaging of this file. Please review the following information to
33** ensure the GNU General Public License version 3.0 requirements will be
34** met: http://www.gnu.org/copyleft/gpl.html.
35**
36** If you are unsure which license is appropriate for your use, please
37** contact the sales department at [email protected].
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41#include "private/qharfbuzz_p.h"
42#include <QtCore/qtextboundaryfinder.h>
43#include <QtCore/qvarlengtharray.h>
44#include <private/qunicodetables_p.h>
45#include <qdebug.h>
46
47QT_BEGIN_NAMESPACE
48
49class QTextBoundaryFinderPrivate
50{
51public:
52 HB_CharAttributes attributes[1];
53};
54
55static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int length, HB_CharAttributes *attributes)
56{
57 QVarLengthArray<HB_ScriptItem> scriptItems;
58
59 const ushort *string = reinterpret_cast<const ushort *>(chars);
60 const ushort *unicode = string;
61 // correctly assign script, isTab and isObject to the script analysis
62 const ushort *uc = unicode;
63 const ushort *e = uc + length;
64 int script = QUnicodeTables::Common;
65 int lastScript = QUnicodeTables::Common;
66 const ushort *start = uc;
67 while (uc < e) {
68 int s = QUnicodeTables::script(*uc);
69 if (s != QUnicodeTables::Inherited)
70 script = s;
71 if (*uc == QChar::ObjectReplacementCharacter || *uc == QChar::LineSeparator || *uc == 9)
72 script = QUnicodeTables::Common;
73 if (script != lastScript) {
74 if (uc != start) {
75 HB_ScriptItem item;
76 item.pos = start - string;
77 item.length = uc - start;
78 item.script = (HB_Script)lastScript;
79 item.bidiLevel = 0; // ### what's the proper value?
80 scriptItems.append(item);
81 start = uc;
82 }
83 lastScript = script;
84 }
85 ++uc;
86 }
87 if (uc != start) {
88 HB_ScriptItem item;
89 item.pos = start - string;
90 item.length = uc - start;
91 item.script = (HB_Script)lastScript;
92 item.bidiLevel = 0; // ### what's the proper value?
93 scriptItems.append(item);
94 }
95
96 qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes);
97 if (type == QTextBoundaryFinder::Word)
98 HB_GetWordBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
99 else if (type == QTextBoundaryFinder::Sentence)
100 HB_GetSentenceBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
101}
102
103/*! \class QTextBoundaryFinder
104
105 \brief The QTextBoundaryFinder class provides a way of finding Unicode text boundaries in a string.
106
107 \since 4.4
108 \ingroup tools
109 \ingroup shared
110 \ingroup text
111 \reentrant
112
113 QTextBoundaryFinder allows to find Unicode text boundaries in a
114 string, similar to the Unicode text boundary specification (see
115 http://www.unicode.org/reports/tr29/tr29-11.html).
116
117 QTextBoundaryFinder can operate on a QString in four possible
118 modes depending on the value of \a BoundaryType.
119
120 Units of Unicode characters that make up what the user thinks of
121 as a character or basic unit of the language are here called
122 Grapheme clusters. The two unicode characters 'A' + diaeresis do
123 for example form one grapheme cluster as the user thinks of them
124 as one character, yet it is in this case represented by two
125 unicode code points.
126
127 Word boundaries are there to locate the start and end of what a
128 language considers to be a word.
129
130 Line break boundaries give possible places where a line break
131 might happen and sentence boundaries will show the beginning and
132 end of whole sentences.
133*/
134
135/*!
136 \enum QTextBoundaryFinder::BoundaryType
137
138 \value Grapheme Finds a grapheme which is the smallest boundary. It
139 including letters, punctation marks, numerals and more.
140 \value Word Finds a word.
141 \value Line Finds possible positions for breaking the text into multiple
142 lines.
143 \value Sentence Finds sentence boundaries. These include periods, question
144 marks etc.
145*/
146
147/*!
148 \enum QTextBoundaryFinder::BoundaryReason
149
150 \value NotAtBoundary The boundary finder is not at a boundary position.
151 \value StartWord The boundary finder is at the start of a word.
152 \value EndWord The boundary finder is at the end of a word.
153*/
154
155/*!
156 Constructs an invalid QTextBoundaryFinder object.
157*/
158QTextBoundaryFinder::QTextBoundaryFinder()
159 : t(Grapheme)
160 , chars(0)
161 , length(0)
162 , freePrivate(true)
163 , d(0)
164{
165}
166
167/*!
168 Copies the QTextBoundaryFinder object, \a other.
169*/
170QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other)
171 : t(other.t)
172 , s(other.s)
173 , chars(other.chars)
174 , length(other.length)
175 , pos(other.pos)
176 , freePrivate(true)
177{
178 d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
179 memcpy(d, other.d, length*sizeof(HB_CharAttributes));
180}
181
182/*!
183 Assigns the object, \a other, to another QTextBoundaryFinder object.
184*/
185QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &other)
186{
187 if (&other == this)
188 return *this;
189
190 t = other.t;
191 s = other.s;
192 chars = other.chars;
193 length = other.length;
194 pos = other.pos;
195 freePrivate = true;
196
197 d = (QTextBoundaryFinderPrivate *) realloc(d, length*sizeof(HB_CharAttributes));
198 memcpy(d, other.d, length*sizeof(HB_CharAttributes));
199
200 return *this;
201}
202
203/*!
204 Destructs the QTextBoundaryFinder object.
205*/
206QTextBoundaryFinder::~QTextBoundaryFinder()
207{
208 if (freePrivate)
209 free(d);
210}
211
212/*!
213 Creates a QTextBoundaryFinder object of \a type operating on \a string.
214*/
215QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &string)
216 : t(type)
217 , s(string)
218 , chars(string.unicode())
219 , length(string.length())
220 , pos(0)
221 , freePrivate(true)
222{
223 d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
224 init(t, chars, length, d->attributes);
225}
226
227/*!
228 Creates a QTextBoundaryFinder object of \a type operating on \a chars
229 with \a length.
230
231 \a buffer is an optional working buffer of size \a bufferSize you can pass to
232 the QTextBoundaryFinder. If the buffer is large enough to hold the working
233 data required, it will use this instead of allocating its own buffer.
234
235 \warning QTextBoundaryFinder does not create a copy of \a chars. It is the
236 application programmer's responsability to ensure the array is allocated for
237 as long as the QTextBoundaryFinder object stays alive. The same applies to
238 \a buffer.
239*/
240QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, int length, unsigned char *buffer, int bufferSize)
241 : t(type)
242 , chars(chars)
243 , length(length)
244 , pos(0)
245{
246 if (buffer && (uint)bufferSize >= length*sizeof(HB_CharAttributes)) {
247 d = (QTextBoundaryFinderPrivate *)buffer;
248 freePrivate = false;
249 } else {
250 d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
251 freePrivate = true;
252 }
253 init(t, chars, length, d->attributes);
254}
255
256/*!
257 Moves the finder to the start of the string. This is equivalent to setPosition(0).
258
259 \sa setPosition(), position()
260*/
261void QTextBoundaryFinder::toStart()
262{
263 pos = 0;
264}
265
266/*!
267 Moves the finder to the end of the string. This is equivalent to setPosition(string.length()).
268
269 \sa setPosition(), position()
270*/
271void QTextBoundaryFinder::toEnd()
272{
273 pos = length;
274}
275
276/*!
277 Returns the current position of the QTextBoundaryFinder.
278
279 The range is from 0 (the beginning of the string) to the length of
280 the string inclusive.
281
282 \sa setPosition()
283*/
284int QTextBoundaryFinder::position() const
285{
286 return pos;
287}
288
289/*!
290 Sets the current position of the QTextBoundaryFinder to \a position.
291
292 If \a position is out of bounds, it will be bound to only valid
293 positions. In this case, valid positions are from 0 to the length of
294 the string inclusive.
295
296 \sa position()
297*/
298void QTextBoundaryFinder::setPosition(int position)
299{
300 pos = qBound(0, position, length);
301}
302
303/*! \fn QTextBoundaryFinder::BoundaryType QTextBoundaryFinder::type() const
304
305 Returns the type of the QTextBoundaryFinder.
306*/
307
308/*! \fn bool QTextBoundaryFinder::isValid() const
309
310 Returns true if the text boundary finder is valid; otherwise returns false.
311 A default QTextBoundaryFinder is invalid.
312*/
313
314/*!
315 Returns the string the QTextBoundaryFinder object operates on.
316*/
317QString QTextBoundaryFinder::string() const
318{
319 if (chars == s.unicode() && length == s.length())
320 return s;
321 return QString(chars, length);
322}
323
324
325/*!
326 Moves the QTextBoundaryFinder to the next boundary position and returns that position.
327
328 Returns -1 is there is no next boundary.
329*/
330int QTextBoundaryFinder::toNextBoundary()
331{
332 if (!d) {
333 pos = -1;
334 return pos;
335 }
336
337 if (pos < 0 || pos >= length) {
338 pos = -1;
339 return pos;
340 }
341 ++pos;
342 if (pos == length)
343 return pos;
344
345 switch(t) {
346 case Grapheme:
347 while (pos < length && !d->attributes[pos].charStop)
348 ++pos;
349 break;
350 case Word:
351 while (pos < length && !d->attributes[pos].wordBoundary)
352 ++pos;
353 break;
354 case Sentence:
355 while (pos < length && !d->attributes[pos].sentenceBoundary)
356 ++pos;
357 break;
358 case Line:
359 while (pos < length && d->attributes[pos].lineBreakType < HB_Break)
360 ++pos;
361 break;
362 }
363
364 return pos;
365}
366
367/*!
368 Moves the QTextBoundaryFinder to the previous boundary position and returns that position.
369
370 Returns -1 is there is no previous boundary.
371*/
372int QTextBoundaryFinder::toPreviousBoundary()
373{
374 if (!d) {
375 pos = -1;
376 return pos;
377 }
378
379 if (pos <= 0 || pos > length) {
380 pos = -1;
381 return pos;
382 }
383 --pos;
384 if (pos == 0)
385 return pos;
386
387 switch(t) {
388 case Grapheme:
389 while (pos > 0 && !d->attributes[pos].charStop)
390 --pos;
391 break;
392 case Word:
393 while (pos > 0 && !d->attributes[pos].wordBoundary)
394 --pos;
395 break;
396 case Sentence:
397 while (pos > 0 && !d->attributes[pos].sentenceBoundary)
398 --pos;
399 break;
400 case Line:
401 while (pos > 0 && d->attributes[pos].lineBreakType < HB_Break)
402 --pos;
403 break;
404 }
405
406 return pos;
407}
408
409/*!
410 Returns true if the object's position() is currently at a valid text boundary.
411*/
412bool QTextBoundaryFinder::isAtBoundary() const
413{
414 if (!d || pos < 0)
415 return false;
416
417 if (pos == length)
418 return true;
419
420 switch(t) {
421 case Grapheme:
422 return d->attributes[pos].charStop;
423 case Word:
424 return d->attributes[pos].wordBoundary;
425 case Line:
426 return d->attributes[pos].lineBreakType >= HB_Break;
427 case Sentence:
428 return d->attributes[pos].sentenceBoundary;
429 }
430 return false;
431}
432
433/*!
434 Returns the reasons for the boundary finder to have chosen the current position as a boundary.
435*/
436QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const
437{
438 if (!d)
439 return NotAtBoundary;
440 if (! isAtBoundary())
441 return NotAtBoundary;
442 if (pos == 0) {
443 if (d->attributes[pos].whiteSpace)
444 return NotAtBoundary;
445 return StartWord;
446 }
447 if (pos >= length - 1) {
448 if (d->attributes[length-1].whiteSpace)
449 return NotAtBoundary;
450 return EndWord;
451 }
452
453 BoundaryReasons answer;
454 const bool nextIsSpace = d->attributes[pos + 1].whiteSpace;
455 const bool prevIsSpace = d->attributes[pos - 1].whiteSpace;
456
457 if (d->attributes[pos].whiteSpace)
458 answer = EndWord;
459 else if (!prevIsSpace) {
460 answer = StartWord;
461 answer |= EndWord;
462 }
463
464 if (prevIsSpace)
465 answer |= StartWord;
466 if (nextIsSpace)
467 answer |= EndWord;
468 if (answer == 0) {
469 answer = StartWord;
470 answer |= EndWord;
471 }
472
473 return answer;
474}
475
476QT_END_NAMESPACE
Note: See TracBrowser for help on using the repository browser.