ICU 77.1
77.1
common
unicode
normalizer2.h
Go to the documentation of this file.
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
* Copyright (C) 2009-2013, International Business Machines
7
* Corporation and others. All Rights Reserved.
8
*
9
*******************************************************************************
10
* file name: normalizer2.h
11
* encoding: UTF-8
12
* tab size: 8 (not used)
13
* indentation:4
14
*
15
* created on: 2009nov22
16
* created by: Markus W. Scherer
17
*/
18
19
#ifndef __NORMALIZER2_H__
20
#define __NORMALIZER2_H__
21
27
#include "
unicode/utypes.h
"
28
29
#if U_SHOW_CPLUSPLUS_API
30
31
#if !UCONFIG_NO_NORMALIZATION
32
33
#include "
unicode/stringpiece.h
"
34
#include "
unicode/uniset.h
"
35
#include "
unicode/unistr.h
"
36
#include "
unicode/unorm2.h
"
37
38
U_NAMESPACE_BEGIN
39
40
class
ByteSink;
41
85
class
U_COMMON_API
Normalizer2
:
public
UObject
{
86
public
:
91
~Normalizer2
();
92
104
static
const
Normalizer2
*
105
getNFCInstance
(
UErrorCode
&errorCode);
106
118
static
const
Normalizer2
*
119
getNFDInstance
(
UErrorCode
&errorCode);
120
132
static
const
Normalizer2
*
133
getNFKCInstance
(
UErrorCode
&errorCode);
134
146
static
const
Normalizer2
*
147
getNFKDInstance
(
UErrorCode
&errorCode);
148
163
static
const
Normalizer2
*
164
getNFKCCasefoldInstance
(
UErrorCode
&errorCode);
165
180
static
const
Normalizer2
*
181
getNFKCSimpleCasefoldInstance
(
UErrorCode
&errorCode);
182
204
static
const
Normalizer2
*
205
getInstance
(
const
char
*packageName,
206
const
char
*name,
207
UNormalization2Mode
mode,
208
UErrorCode
&errorCode);
209
220
UnicodeString
221
normalize
(
const
UnicodeString
&src,
UErrorCode
&errorCode)
const
{
222
UnicodeString
result;
223
normalize(src, result, errorCode);
224
return
result;
225
}
239
virtual
UnicodeString
&
240
normalize
(
const
UnicodeString
&src,
241
UnicodeString
&dest,
242
UErrorCode
&errorCode)
const
= 0;
243
266
virtual
void
267
normalizeUTF8
(uint32_t options,
StringPiece
src,
ByteSink
&sink,
268
Edits
*edits,
UErrorCode
&errorCode)
const
;
269
284
virtual
UnicodeString
&
285
normalizeSecondAndAppend
(
UnicodeString
&first,
286
const
UnicodeString
&second,
287
UErrorCode
&errorCode)
const
= 0;
302
virtual
UnicodeString
&
303
append
(
UnicodeString
&first,
304
const
UnicodeString
&second,
305
UErrorCode
&errorCode)
const
= 0;
306
320
virtual
UBool
321
getDecomposition
(
UChar32
c,
UnicodeString
&decomposition)
const
= 0;
322
347
virtual
UBool
348
getRawDecomposition
(
UChar32
c,
UnicodeString
&decomposition)
const
;
349
365
virtual
UChar32
366
composePair
(
UChar32
a,
UChar32
b)
const
;
367
376
virtual
uint8_t
377
getCombiningClass
(
UChar32
c)
const
;
378
393
virtual
UBool
394
isNormalized
(
const
UnicodeString
&s,
UErrorCode
&errorCode)
const
= 0;
414
virtual
UBool
415
isNormalizedUTF8
(
StringPiece
s,
UErrorCode
&errorCode)
const
;
416
417
433
virtual
UNormalizationCheckResult
434
quickCheck
(
const
UnicodeString
&s,
UErrorCode
&errorCode)
const
= 0;
435
458
virtual
int32_t
459
spanQuickCheckYes
(
const
UnicodeString
&s,
UErrorCode
&errorCode)
const
= 0;
460
474
virtual
UBool
hasBoundaryBefore
(
UChar32
c)
const
= 0;
475
490
virtual
UBool
hasBoundaryAfter
(
UChar32
c)
const
= 0;
491
505
virtual
UBool
isInert
(
UChar32
c)
const
= 0;
506
};
507
519
class
U_COMMON_API
FilteredNormalizer2
:
public
Normalizer2
{
520
public
:
531
FilteredNormalizer2
(
const
Normalizer2
&n2,
const
UnicodeSet
&filterSet) :
532
norm2(n2), set(filterSet) {}
533
538
~FilteredNormalizer2
();
539
553
virtual
UnicodeString
&
554
normalize
(
const
UnicodeString
&src,
555
UnicodeString
&dest,
556
UErrorCode
&errorCode)
const override
;
557
580
virtual
void
581
normalizeUTF8
(uint32_t options,
StringPiece
src,
ByteSink
&sink,
582
Edits
*edits,
UErrorCode
&errorCode)
const override
;
583
598
virtual
UnicodeString
&
599
normalizeSecondAndAppend
(
UnicodeString
&first,
600
const
UnicodeString
&second,
601
UErrorCode
&errorCode)
const override
;
616
virtual
UnicodeString
&
617
append
(
UnicodeString
&first,
618
const
UnicodeString
&second,
619
UErrorCode
&errorCode)
const override
;
620
632
virtual
UBool
633
getDecomposition
(
UChar32
c,
UnicodeString
&decomposition)
const override
;
634
646
virtual
UBool
647
getRawDecomposition
(
UChar32
c,
UnicodeString
&decomposition)
const override
;
648
659
virtual
UChar32
660
composePair
(
UChar32
a,
UChar32
b)
const override
;
661
670
virtual
uint8_t
671
getCombiningClass
(
UChar32
c)
const override
;
672
684
virtual
UBool
685
isNormalized
(
const
UnicodeString
&s,
UErrorCode
&errorCode)
const override
;
705
virtual
UBool
706
isNormalizedUTF8
(
StringPiece
s,
UErrorCode
&errorCode)
const override
;
718
virtual
UNormalizationCheckResult
719
quickCheck
(
const
UnicodeString
&s,
UErrorCode
&errorCode)
const override
;
731
virtual
int32_t
732
spanQuickCheckYes
(
const
UnicodeString
&s,
UErrorCode
&errorCode)
const override
;
733
742
virtual
UBool
hasBoundaryBefore
(
UChar32
c)
const override
;
743
752
virtual
UBool
hasBoundaryAfter
(
UChar32
c)
const override
;
753