1 | /****************************************************************************
|
---|
2 | **
|
---|
3 | ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
|
---|
4 | ** Contact: Qt Software Information ([email protected])
|
---|
5 | **
|
---|
6 | ** This file is part of the QtCore module of the Qt Toolkit.
|
---|
7 | **
|
---|
8 | ** $QT_BEGIN_LICENSE:LGPL$
|
---|
9 | ** Commercial Usage
|
---|
10 | ** Licensees holding valid Qt Commercial licenses may use this file in
|
---|
11 | ** accordance with the Qt Commercial License Agreement provided with the
|
---|
12 | ** Software or, alternatively, in accordance with the terms contained in
|
---|
13 | ** a written agreement between you and Nokia.
|
---|
14 | **
|
---|
15 | ** GNU Lesser General Public License Usage
|
---|
16 | ** Alternatively, this file may be used under the terms of the GNU Lesser
|
---|
17 | ** General Public License version 2.1 as published by the Free Software
|
---|
18 | ** Foundation and appearing in the file LICENSE.LGPL included in the
|
---|
19 | ** packaging of this file. Please review the following information to
|
---|
20 | ** ensure the GNU Lesser General Public License version 2.1 requirements
|
---|
21 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
---|
22 | **
|
---|
23 | ** In addition, as a special exception, Nokia gives you certain
|
---|
24 | ** additional rights. These rights are described in the Nokia Qt LGPL
|
---|
25 | ** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
|
---|
26 | ** package.
|
---|
27 | **
|
---|
28 | ** GNU General Public License Usage
|
---|
29 | ** Alternatively, this file may be used under the terms of the GNU
|
---|
30 | ** General Public License version 3.0 as published by the Free Software
|
---|
31 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
32 | ** packaging of this file. Please review the following information to
|
---|
33 | ** ensure the GNU General Public License version 3.0 requirements will be
|
---|
34 | ** met: http://www.gnu.org/copyleft/gpl.html.
|
---|
35 | **
|
---|
36 | ** If you are unsure which license is appropriate for your use, please
|
---|
37 | ** contact the sales department at [email protected].
|
---|
38 | ** $QT_END_LICENSE$
|
---|
39 | **
|
---|
40 | ****************************************************************************/
|
---|
41 | #include "private/qharfbuzz_p.h"
|
---|
42 | #include <QtCore/qtextboundaryfinder.h>
|
---|
43 | #include <QtCore/qvarlengtharray.h>
|
---|
44 | #include <private/qunicodetables_p.h>
|
---|
45 | #include <qdebug.h>
|
---|
46 |
|
---|
47 | QT_BEGIN_NAMESPACE
|
---|
48 |
|
---|
49 | class QTextBoundaryFinderPrivate
|
---|
50 | {
|
---|
51 | public:
|
---|
52 | HB_CharAttributes attributes[1];
|
---|
53 | };
|
---|
54 |
|
---|
55 | static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int length, HB_CharAttributes *attributes)
|
---|
56 | {
|
---|
57 | QVarLengthArray<HB_ScriptItem> scriptItems;
|
---|
58 |
|
---|
59 | const ushort *string = reinterpret_cast<const ushort *>(chars);
|
---|
60 | const ushort *unicode = string;
|
---|
61 | // correctly assign script, isTab and isObject to the script analysis
|
---|
62 | const ushort *uc = unicode;
|
---|
63 | const ushort *e = uc + length;
|
---|
64 | int script = QUnicodeTables::Common;
|
---|
65 | int lastScript = QUnicodeTables::Common;
|
---|
66 | const ushort *start = uc;
|
---|
67 | while (uc < e) {
|
---|
68 | int s = QUnicodeTables::script(*uc);
|
---|
69 | if (s != QUnicodeTables::Inherited)
|
---|
70 | script = s;
|
---|
71 | if (*uc == QChar::ObjectReplacementCharacter || *uc == QChar::LineSeparator || *uc == 9)
|
---|
72 | script = QUnicodeTables::Common;
|
---|
73 | if (script != lastScript) {
|
---|
74 | if (uc != start) {
|
---|
75 | HB_ScriptItem item;
|
---|
76 | item.pos = start - string;
|
---|
77 | item.length = uc - start;
|
---|
78 | item.script = (HB_Script)lastScript;
|
---|
79 | item.bidiLevel = 0; // ### what's the proper value?
|
---|
80 | scriptItems.append(item);
|
---|
81 | start = uc;
|
---|
82 | }
|
---|
83 | lastScript = script;
|
---|
84 | }
|
---|
85 | ++uc;
|
---|
86 | }
|
---|
87 | if (uc != start) {
|
---|
88 | HB_ScriptItem item;
|
---|
89 | item.pos = start - string;
|
---|
90 | item.length = uc - start;
|
---|
91 | item.script = (HB_Script)lastScript;
|
---|
92 | item.bidiLevel = 0; // ### what's the proper value?
|
---|
93 | scriptItems.append(item);
|
---|
94 | }
|
---|
95 |
|
---|
96 | qGetCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes);
|
---|
97 | if (type == QTextBoundaryFinder::Word)
|
---|
98 | HB_GetWordBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
|
---|
99 | else if (type == QTextBoundaryFinder::Sentence)
|
---|
100 | HB_GetSentenceBoundaries(string, length, scriptItems.data(), scriptItems.count(), attributes);
|
---|
101 | }
|
---|
102 |
|
---|
103 | /*! \class QTextBoundaryFinder
|
---|
104 |
|
---|
105 | \brief The QTextBoundaryFinder class provides a way of finding Unicode text boundaries in a string.
|
---|
106 |
|
---|
107 | \since 4.4
|
---|
108 | \ingroup tools
|
---|
109 | \ingroup shared
|
---|
110 | \ingroup text
|
---|
111 | \reentrant
|
---|
112 |
|
---|
113 | QTextBoundaryFinder allows to find Unicode text boundaries in a
|
---|
114 | string, similar to the Unicode text boundary specification (see
|
---|
115 | http://www.unicode.org/reports/tr29/tr29-11.html).
|
---|
116 |
|
---|
117 | QTextBoundaryFinder can operate on a QString in four possible
|
---|
118 | modes depending on the value of \a BoundaryType.
|
---|
119 |
|
---|
120 | Units of Unicode characters that make up what the user thinks of
|
---|
121 | as a character or basic unit of the language are here called
|
---|
122 | Grapheme clusters. The two unicode characters 'A' + diaeresis do
|
---|
123 | for example form one grapheme cluster as the user thinks of them
|
---|
124 | as one character, yet it is in this case represented by two
|
---|
125 | unicode code points.
|
---|
126 |
|
---|
127 | Word boundaries are there to locate the start and end of what a
|
---|
128 | language considers to be a word.
|
---|
129 |
|
---|
130 | Line break boundaries give possible places where a line break
|
---|
131 | might happen and sentence boundaries will show the beginning and
|
---|
132 | end of whole sentences.
|
---|
133 | */
|
---|
134 |
|
---|
135 | /*!
|
---|
136 | \enum QTextBoundaryFinder::BoundaryType
|
---|
137 |
|
---|
138 | \value Grapheme Finds a grapheme which is the smallest boundary. It
|
---|
139 | including letters, punctation marks, numerals and more.
|
---|
140 | \value Word Finds a word.
|
---|
141 | \value Line Finds possible positions for breaking the text into multiple
|
---|
142 | lines.
|
---|
143 | \value Sentence Finds sentence boundaries. These include periods, question
|
---|
144 | marks etc.
|
---|
145 | */
|
---|
146 |
|
---|
147 | /*!
|
---|
148 | \enum QTextBoundaryFinder::BoundaryReason
|
---|
149 |
|
---|
150 | \value NotAtBoundary The boundary finder is not at a boundary position.
|
---|
151 | \value StartWord The boundary finder is at the start of a word.
|
---|
152 | \value EndWord The boundary finder is at the end of a word.
|
---|
153 | */
|
---|
154 |
|
---|
155 | /*!
|
---|
156 | Constructs an invalid QTextBoundaryFinder object.
|
---|
157 | */
|
---|
158 | QTextBoundaryFinder::QTextBoundaryFinder()
|
---|
159 | : t(Grapheme)
|
---|
160 | , chars(0)
|
---|
161 | , length(0)
|
---|
162 | , freePrivate(true)
|
---|
163 | , d(0)
|
---|
164 | {
|
---|
165 | }
|
---|
166 |
|
---|
167 | /*!
|
---|
168 | Copies the QTextBoundaryFinder object, \a other.
|
---|
169 | */
|
---|
170 | QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other)
|
---|
171 | : t(other.t)
|
---|
172 | , s(other.s)
|
---|
173 | , chars(other.chars)
|
---|
174 | , length(other.length)
|
---|
175 | , pos(other.pos)
|
---|
176 | , freePrivate(true)
|
---|
177 | {
|
---|
178 | d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
|
---|
179 | memcpy(d, other.d, length*sizeof(HB_CharAttributes));
|
---|
180 | }
|
---|
181 |
|
---|
182 | /*!
|
---|
183 | Assigns the object, \a other, to another QTextBoundaryFinder object.
|
---|
184 | */
|
---|
185 | QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &other)
|
---|
186 | {
|
---|
187 | if (&other == this)
|
---|
188 | return *this;
|
---|
189 |
|
---|
190 | t = other.t;
|
---|
191 | s = other.s;
|
---|
192 | chars = other.chars;
|
---|
193 | length = other.length;
|
---|
194 | pos = other.pos;
|
---|
195 | freePrivate = true;
|
---|
196 |
|
---|
197 | d = (QTextBoundaryFinderPrivate *) realloc(d, length*sizeof(HB_CharAttributes));
|
---|
198 | memcpy(d, other.d, length*sizeof(HB_CharAttributes));
|
---|
199 |
|
---|
200 | return *this;
|
---|
201 | }
|
---|
202 |
|
---|
203 | /*!
|
---|
204 | Destructs the QTextBoundaryFinder object.
|
---|
205 | */
|
---|
206 | QTextBoundaryFinder::~QTextBoundaryFinder()
|
---|
207 | {
|
---|
208 | if (freePrivate)
|
---|
209 | free(d);
|
---|
210 | }
|
---|
211 |
|
---|
212 | /*!
|
---|
213 | Creates a QTextBoundaryFinder object of \a type operating on \a string.
|
---|
214 | */
|
---|
215 | QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &string)
|
---|
216 | : t(type)
|
---|
217 | , s(string)
|
---|
218 | , chars(string.unicode())
|
---|
219 | , length(string.length())
|
---|
220 | , pos(0)
|
---|
221 | , freePrivate(true)
|
---|
222 | {
|
---|
223 | d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
|
---|
224 | init(t, chars, length, d->attributes);
|
---|
225 | }
|
---|
226 |
|
---|
227 | /*!
|
---|
228 | Creates a QTextBoundaryFinder object of \a type operating on \a chars
|
---|
229 | with \a length.
|
---|
230 |
|
---|
231 | \a buffer is an optional working buffer of size \a bufferSize you can pass to
|
---|
232 | the QTextBoundaryFinder. If the buffer is large enough to hold the working
|
---|
233 | data required, it will use this instead of allocating its own buffer.
|
---|
234 |
|
---|
235 | \warning QTextBoundaryFinder does not create a copy of \a chars. It is the
|
---|
236 | application programmer's responsability to ensure the array is allocated for
|
---|
237 | as long as the QTextBoundaryFinder object stays alive. The same applies to
|
---|
238 | \a buffer.
|
---|
239 | */
|
---|
240 | QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, int length, unsigned char *buffer, int bufferSize)
|
---|
241 | : t(type)
|
---|
242 | , chars(chars)
|
---|
243 | , length(length)
|
---|
244 | , pos(0)
|
---|
245 | {
|
---|
246 | if (buffer && (uint)bufferSize >= length*sizeof(HB_CharAttributes)) {
|
---|
247 | d = (QTextBoundaryFinderPrivate *)buffer;
|
---|
248 | freePrivate = false;
|
---|
249 | } else {
|
---|
250 | d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(HB_CharAttributes));
|
---|
251 | freePrivate = true;
|
---|
252 | }
|
---|
253 | init(t, chars, length, d->attributes);
|
---|
254 | }
|
---|
255 |
|
---|
256 | /*!
|
---|
257 | Moves the finder to the start of the string. This is equivalent to setPosition(0).
|
---|
258 |
|
---|
259 | \sa setPosition(), position()
|
---|
260 | */
|
---|
261 | void QTextBoundaryFinder::toStart()
|
---|
262 | {
|
---|
263 | pos = 0;
|
---|
264 | }
|
---|
265 |
|
---|
266 | /*!
|
---|
267 | Moves the finder to the end of the string. This is equivalent to setPosition(string.length()).
|
---|
268 |
|
---|
269 | \sa setPosition(), position()
|
---|
270 | */
|
---|
271 | void QTextBoundaryFinder::toEnd()
|
---|
272 | {
|
---|
273 | pos = length;
|
---|
274 | }
|
---|
275 |
|
---|
276 | /*!
|
---|
277 | Returns the current position of the QTextBoundaryFinder.
|
---|
278 |
|
---|
279 | The range is from 0 (the beginning of the string) to the length of
|
---|
280 | the string inclusive.
|
---|
281 |
|
---|
282 | \sa setPosition()
|
---|
283 | */
|
---|
284 | int QTextBoundaryFinder::position() const
|
---|
285 | {
|
---|
286 | return pos;
|
---|
287 | }
|
---|
288 |
|
---|
289 | /*!
|
---|
290 | Sets the current position of the QTextBoundaryFinder to \a position.
|
---|
291 |
|
---|
292 | If \a position is out of bounds, it will be bound to only valid
|
---|
293 | positions. In this case, valid positions are from 0 to the length of
|
---|
294 | the string inclusive.
|
---|
295 |
|
---|
296 | \sa position()
|
---|
297 | */
|
---|
298 | void QTextBoundaryFinder::setPosition(int position)
|
---|
299 | {
|
---|
300 | pos = qBound(0, position, length);
|
---|
301 | }
|
---|
302 |
|
---|
303 | /*! \fn QTextBoundaryFinder::BoundaryType QTextBoundaryFinder::type() const
|
---|
304 |
|
---|
305 | Returns the type of the QTextBoundaryFinder.
|
---|
306 | */
|
---|
307 |
|
---|
308 | /*! \fn bool QTextBoundaryFinder::isValid() const
|
---|
309 |
|
---|
310 | Returns true if the text boundary finder is valid; otherwise returns false.
|
---|
311 | A default QTextBoundaryFinder is invalid.
|
---|
312 | */
|
---|
313 |
|
---|
314 | /*!
|
---|
315 | Returns the string the QTextBoundaryFinder object operates on.
|
---|
316 | */
|
---|
317 | QString QTextBoundaryFinder::string() const
|
---|
318 | {
|
---|
319 | if (chars == s.unicode() && length == s.length())
|
---|
320 | return s;
|
---|
321 | return QString(chars, length);
|
---|
322 | }
|
---|
323 |
|
---|
324 |
|
---|
325 | /*!
|
---|
326 | Moves the QTextBoundaryFinder to the next boundary position and returns that position.
|
---|
327 |
|
---|
328 | Returns -1 is there is no next boundary.
|
---|
329 | */
|
---|
330 | int QTextBoundaryFinder::toNextBoundary()
|
---|
331 | {
|
---|
332 | if (!d) {
|
---|
333 | pos = -1;
|
---|
334 | return pos;
|
---|
335 | }
|
---|
336 |
|
---|
337 | if (pos < 0 || pos >= length) {
|
---|
338 | pos = -1;
|
---|
339 | return pos;
|
---|
340 | }
|
---|
341 | ++pos;
|
---|
342 | if (pos == length)
|
---|
343 | return pos;
|
---|
344 |
|
---|
345 | switch(t) {
|
---|
346 | case Grapheme:
|
---|
347 | while (pos < length && !d->attributes[pos].charStop)
|
---|
348 | ++pos;
|
---|
349 | break;
|
---|
350 | case Word:
|
---|
351 | while (pos < length && !d->attributes[pos].wordBoundary)
|
---|
352 | ++pos;
|
---|
353 | break;
|
---|
354 | case Sentence:
|
---|
355 | while (pos < length && !d->attributes[pos].sentenceBoundary)
|
---|
356 | ++pos;
|
---|
357 | break;
|
---|
358 | case Line:
|
---|
359 | while (pos < length && d->attributes[pos].lineBreakType < HB_Break)
|
---|
360 | ++pos;
|
---|
361 | break;
|
---|
362 | }
|
---|
363 |
|
---|
364 | return pos;
|
---|
365 | }
|
---|
366 |
|
---|
367 | /*!
|
---|
368 | Moves the QTextBoundaryFinder to the previous boundary position and returns that position.
|
---|
369 |
|
---|
370 | Returns -1 is there is no previous boundary.
|
---|
371 | */
|
---|
372 | int QTextBoundaryFinder::toPreviousBoundary()
|
---|
373 | {
|
---|
374 | if (!d) {
|
---|
375 | pos = -1;
|
---|
376 | return pos;
|
---|
377 | }
|
---|
378 |
|
---|
379 | if (pos <= 0 || pos > length) {
|
---|
380 | pos = -1;
|
---|
381 | return pos;
|
---|
382 | }
|
---|
383 | --pos;
|
---|
384 | if (pos == 0)
|
---|
385 | return pos;
|
---|
386 |
|
---|
387 | switch(t) {
|
---|
388 | case Grapheme:
|
---|
389 | while (pos > 0 && !d->attributes[pos].charStop)
|
---|
390 | --pos;
|
---|
391 | break;
|
---|
392 | case Word:
|
---|
393 | while (pos > 0 && !d->attributes[pos].wordBoundary)
|
---|
394 | --pos;
|
---|
395 | break;
|
---|
396 | case Sentence:
|
---|
397 | while (pos > 0 && !d->attributes[pos].sentenceBoundary)
|
---|
398 | --pos;
|
---|
399 | break;
|
---|
400 | case Line:
|
---|
401 | while (pos > 0 && d->attributes[pos].lineBreakType < HB_Break)
|
---|
402 | --pos;
|
---|
403 | break;
|
---|
404 | }
|
---|
405 |
|
---|
406 | return pos;
|
---|
407 | }
|
---|
408 |
|
---|
409 | /*!
|
---|
410 | Returns true if the object's position() is currently at a valid text boundary.
|
---|
411 | */
|
---|
412 | bool QTextBoundaryFinder::isAtBoundary() const
|
---|
413 | {
|
---|
414 | if (!d || pos < 0)
|
---|
415 | return false;
|
---|
416 |
|
---|
417 | if (pos == length)
|
---|
418 | return true;
|
---|
419 |
|
---|
420 | switch(t) {
|
---|
421 | case Grapheme:
|
---|
422 | return d->attributes[pos].charStop;
|
---|
423 | case Word:
|
---|
424 | return d->attributes[pos].wordBoundary;
|
---|
425 | case Line:
|
---|
426 | return d->attributes[pos].lineBreakType >= HB_Break;
|
---|
427 | case Sentence:
|
---|
428 | return d->attributes[pos].sentenceBoundary;
|
---|
429 | }
|
---|
430 | return false;
|
---|
431 | }
|
---|
432 |
|
---|
433 | /*!
|
---|
434 | Returns the reasons for the boundary finder to have chosen the current position as a boundary.
|
---|
435 | */
|
---|
436 | QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const
|
---|
437 | {
|
---|
438 | if (!d)
|
---|
439 | return NotAtBoundary;
|
---|
440 | if (! isAtBoundary())
|
---|
441 | return NotAtBoundary;
|
---|
442 | if (pos == 0) {
|
---|
443 | if (d->attributes[pos].whiteSpace)
|
---|
444 | return NotAtBoundary;
|
---|
445 | return StartWord;
|
---|
446 | }
|
---|
447 | if (pos >= length - 1) {
|
---|
448 | if (d->attributes[length-1].whiteSpace)
|
---|
449 | return NotAtBoundary;
|
---|
450 | return EndWord;
|
---|
451 | }
|
---|
452 |
|
---|
453 | BoundaryReasons answer;
|
---|
454 | const bool nextIsSpace = d->attributes[pos + 1].whiteSpace;
|
---|
455 | const bool prevIsSpace = d->attributes[pos - 1].whiteSpace;
|
---|
456 |
|
---|
457 | if (d->attributes[pos].whiteSpace)
|
---|
458 | answer = EndWord;
|
---|
459 | else if (!prevIsSpace) {
|
---|
460 | answer = StartWord;
|
---|
461 | answer |= EndWord;
|
---|
462 | }
|
---|
463 |
|
---|
464 | if (prevIsSpace)
|
---|
465 | answer |= StartWord;
|
---|
466 | if (nextIsSpace)
|
---|
467 | answer |= EndWord;
|
---|
468 | if (answer == 0) {
|
---|
469 | answer = StartWord;
|
---|
470 | answer |= EndWord;
|
---|
471 | }
|
---|
472 |
|
---|
473 | return answer;
|
---|
474 | }
|
---|
475 |
|
---|
476 | QT_END_NAMESPACE
|
---|