diff options
author | Martin Dürst <[email protected]> | 2025-04-18 16:03:30 +0900 |
---|---|---|
committer | Martin Dürst <[email protected]> | 2025-04-18 16:07:17 +0900 |
commit | 900ece77b2d338300ea79d19e7c623043a0be810 (patch) | |
tree | a7a3d387a723762ba426d4579304fd4559054560 /template | |
parent | ab22f3910d51703ddf832e46c2a5e845b4703f50 (diff) |
prepare Unicode normalization for Unicode 16.0.0
Diffstat (limited to 'template')
-rw-r--r-- | template/unicode_norm_gen.tmpl | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/template/unicode_norm_gen.tmpl b/template/unicode_norm_gen.tmpl index a16712fbac..acf565f657 100644 --- a/template/unicode_norm_gen.tmpl +++ b/template/unicode_norm_gen.tmpl @@ -112,6 +112,17 @@ accent_array = combining_class.keys + composition_table.keys.collect {|key| key. composition_starters = composition_table.keys.collect {|key| key.first} +# Special treatment for Unicode 16.0.0 +# Add characters that can be decomposed (even indirectly) so that +# the first character in the decomposition is a an accent to accents. +# We do this here up to two levels deep. +# In the future, there may be even deeper levels. +starter_accents = composition_starters & accent_array +decomposition_table.each do |k, v| + accent_array << k if starter_accents.include? v.first + accent_array << k if starter_accents.include? decomposition_table[v.first]&.first +end + hangul_no_trailing = [] 0xAC00.step(0xD7A3, 28) {|c| hangul_no_trailing << c} |