diff options
-rw-r--r-- | lib/unicode_normalize/normalize.rb | 1 | ||||
-rw-r--r-- | template/unicode_norm_gen.tmpl | 11 |
2 files changed, 12 insertions, 0 deletions
diff --git a/lib/unicode_normalize/normalize.rb b/lib/unicode_normalize/normalize.rb index 1caf2cc8c8..e67fad187a 100644 --- a/lib/unicode_normalize/normalize.rb +++ b/lib/unicode_normalize/normalize.rb @@ -114,6 +114,7 @@ module UnicodeNormalize # :nodoc: last_class = accent_class end end + accents = nfc_one(accents) if accents.length>1 # TODO: change from recursion to loop hangul_comp_one(start+accents) end diff --git a/template/unicode_norm_gen.tmpl b/template/unicode_norm_gen.tmpl index a16712fbac..acf565f657 100644 --- a/template/unicode_norm_gen.tmpl +++ b/template/unicode_norm_gen.tmpl @@ -112,6 +112,17 @@ accent_array = combining_class.keys + composition_table.keys.collect {|key| key. composition_starters = composition_table.keys.collect {|key| key.first} +# Special treatment for Unicode 16.0.0 +# Add characters that can be decomposed (even indirectly) so that +# the first character in the decomposition is a an accent to accents. +# We do this here up to two levels deep. +# In the future, there may be even deeper levels. +starter_accents = composition_starters & accent_array +decomposition_table.each do |k, v| + accent_array << k if starter_accents.include? v.first + accent_array << k if starter_accents.include? decomposition_table[v.first]&.first +end + hangul_no_trailing = [] 0xAC00.step(0xD7A3, 28) {|c| hangul_no_trailing << c} |