summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/unicode_normalize/normalize.rb1
-rw-r--r--template/unicode_norm_gen.tmpl11
2 files changed, 12 insertions, 0 deletions
diff --git a/lib/unicode_normalize/normalize.rb b/lib/unicode_normalize/normalize.rb
index 1caf2cc8c8..e67fad187a 100644
--- a/lib/unicode_normalize/normalize.rb
+++ b/lib/unicode_normalize/normalize.rb
@@ -114,6 +114,7 @@ module UnicodeNormalize # :nodoc:
last_class = accent_class
end
end
+ accents = nfc_one(accents) if accents.length>1 # TODO: change from recursion to loop
hangul_comp_one(start+accents)
end
diff --git a/template/unicode_norm_gen.tmpl b/template/unicode_norm_gen.tmpl
index a16712fbac..acf565f657 100644
--- a/template/unicode_norm_gen.tmpl
+++ b/template/unicode_norm_gen.tmpl
@@ -112,6 +112,17 @@ accent_array = combining_class.keys + composition_table.keys.collect {|key| key.
composition_starters = composition_table.keys.collect {|key| key.first}
+# Special treatment for Unicode 16.0.0
+# Add characters that can be decomposed (even indirectly) so that
+# the first character in the decomposition is a an accent to accents.
+# We do this here up to two levels deep.
+# In the future, there may be even deeper levels.
+starter_accents = composition_starters & accent_array
+decomposition_table.each do |k, v|
+ accent_array << k if starter_accents.include? v.first
+ accent_array << k if starter_accents.include? decomposition_table[v.first]&.first
+end
+
hangul_no_trailing = []
0xAC00.step(0xD7A3, 28) {|c| hangul_no_trailing << c}