summaryrefslogtreecommitdiff
path: root/template
diff options
context:
space:
mode:
authorMartin Dürst <[email protected]>2025-04-18 16:03:30 +0900
committerMartin Dürst <[email protected]>2025-04-18 16:07:17 +0900
commit900ece77b2d338300ea79d19e7c623043a0be810 (patch)
treea7a3d387a723762ba426d4579304fd4559054560 /template
parentab22f3910d51703ddf832e46c2a5e845b4703f50 (diff)
prepare Unicode normalization for Unicode 16.0.0
Diffstat (limited to 'template')
-rw-r--r--template/unicode_norm_gen.tmpl11
1 files changed, 11 insertions, 0 deletions
diff --git a/template/unicode_norm_gen.tmpl b/template/unicode_norm_gen.tmpl
index a16712fbac..acf565f657 100644
--- a/template/unicode_norm_gen.tmpl
+++ b/template/unicode_norm_gen.tmpl
@@ -112,6 +112,17 @@ accent_array = combining_class.keys + composition_table.keys.collect {|key| key.
composition_starters = composition_table.keys.collect {|key| key.first}
+# Special treatment for Unicode 16.0.0
+# Add characters that can be decomposed (even indirectly) so that
+# the first character in the decomposition is a an accent to accents.
+# We do this here up to two levels deep.
+# In the future, there may be even deeper levels.
+starter_accents = composition_starters & accent_array
+decomposition_table.each do |k, v|
+ accent_array << k if starter_accents.include? v.first
+ accent_array << k if starter_accents.include? decomposition_table[v.first]&.first
+end
+
hangul_no_trailing = []
0xAC00.step(0xD7A3, 28) {|c| hangul_no_trailing << c}