Avi Drissman | 3e1a26c | 2022-09-15 20:26:03 | [diff] [blame] | 1 | // Copyright 2013 The Chromium Authors |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
Arthur Sonzogni | 24d53e3 | 2024-07-26 14:00:54 | [diff] [blame] | 5 | #ifdef UNSAFE_BUFFERS_BUILD |
| 6 | // TODO(crbug.com/40285824): Remove this and convert code to safer constructs. |
| 7 | #pragma allow_unsafe_buffers |
| 8 | #endif |
| 9 | |
Nick Diego Yamane | f9877ae | 2019-01-31 17:08:31 | [diff] [blame] | 10 | #include "ui/base/ime/character_composer.h" |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 11 | |
[email protected] | 67512c7e | 2011-07-08 04:32:24 | [diff] [blame] | 12 | #include <algorithm> |
| 13 | #include <iterator> |
Max Ihlenfeldt | ed435f1 | 2024-02-14 13:25:18 | [diff] [blame] | 14 | #include <optional> |
Nick Diego Yamane | f9877ae | 2019-01-31 17:08:31 | [diff] [blame] | 15 | #include <string> |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 16 | |
Hans Wennborg | 8586102b | 2020-05-05 13:43:29 | [diff] [blame] | 17 | #include "base/check.h" |
Max Ihlenfeldt | ed435f1 | 2024-02-14 13:25:18 | [diff] [blame] | 18 | #include "base/logging.h" |
Hans Wennborg | 8586102b | 2020-05-05 13:43:29 | [diff] [blame] | 19 | #include "base/notreached.h" |
kpschoedel | ed31edf | 2015-07-10 18:38:47 | [diff] [blame] | 20 | #include "base/strings/string_util.h" |
Max Ihlenfeldt | ed435f1 | 2024-02-14 13:25:18 | [diff] [blame] | 21 | #include "base/strings/stringprintf.h" |
Kevin Schoedel | e07b808 | 2017-07-04 15:43:03 | [diff] [blame] | 22 | #include "base/strings/utf_string_conversion_utils.h" |
[email protected] | c7057fbe | 2013-06-07 18:54:01 | [diff] [blame] | 23 | #include "base/strings/utf_string_conversions.h" |
[email protected] | d737d61 | 2012-03-21 07:58:14 | [diff] [blame] | 24 | #include "base/third_party/icu/icu_utf.h" |
[email protected] | af341de | 2013-12-02 05:51:52 | [diff] [blame] | 25 | #include "ui/events/event.h" |
kpschoedel | ed31edf | 2015-07-10 18:38:47 | [diff] [blame] | 26 | #include "ui/events/keycodes/dom/dom_key.h" |
| 27 | #include "ui/events/keycodes/dom/keycode_converter.h" |
[email protected] | 93ef86cb | 2014-06-16 19:08:55 | [diff] [blame] | 28 | #include "ui/events/keycodes/keyboard_codes.h" |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 29 | |
| 30 | namespace { |
| 31 | |
Nick Diego Yamane | f9877ae | 2019-01-31 17:08:31 | [diff] [blame] | 32 | #include "ui/base/ime/character_composer_data.h" |
[email protected] | 146d8d9 | 2013-07-05 04:43:05 | [diff] [blame] | 33 | |
kpschoedel | ed31edf | 2015-07-10 18:38:47 | [diff] [blame] | 34 | bool CheckCharacterComposeTable( |
| 35 | const ui::CharacterComposer::ComposeBuffer& compose_sequence, |
avi | 20f6a6d53 | 2015-12-23 08:05:24 | [diff] [blame] | 36 | uint32_t* composed_character) { |
kpschoedel | ed31edf | 2015-07-10 18:38:47 | [diff] [blame] | 37 | const ui::TreeComposeChecker kTreeComposeChecker(kCompositions); |
| 38 | return kTreeComposeChecker.CheckSequence(compose_sequence, |
| 39 | composed_character) != |
| 40 | ui::ComposeChecker::CheckSequenceResult::NO_MATCH; |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 41 | } |
| 42 | |
[email protected] | d737d61 | 2012-03-21 07:58:14 | [diff] [blame] | 43 | // Converts |character| to UTF16 string. |
| 44 | // Returns false when |character| is not a valid character. |
Jan Wilken Dörrie | 5263957 | 2021-03-11 16:49:54 | [diff] [blame] | 45 | bool UTF32CharacterToUTF16(uint32_t character, std::u16string* output) { |
[email protected] | d737d61 | 2012-03-21 07:58:14 | [diff] [blame] | 46 | output->clear(); |
| 47 | // Reject invalid character. (e.g. codepoint greater than 0x10ffff) |
| 48 | if (!CBU_IS_UNICODE_CHAR(character)) |
| 49 | return false; |
| 50 | if (character) { |
| 51 | output->resize(CBU16_LENGTH(character)); |
| 52 | size_t i = 0; |
| 53 | CBU16_APPEND_UNSAFE(&(*output)[0], i, character); |
| 54 | } |
| 55 | return true; |
| 56 | } |
| 57 | |
[email protected] | 93ef86cb | 2014-06-16 19:08:55 | [diff] [blame] | 58 | // Returns an hexadecimal digit integer (0 to 15) corresponding to |keycode|. |
| 59 | // -1 is returned when |keycode| cannot be a hexadecimal digit. |
| 60 | int KeycodeToHexDigit(unsigned int keycode) { |
| 61 | if (ui::VKEY_0 <= keycode && keycode <= ui::VKEY_9) |
| 62 | return keycode - ui::VKEY_0; |
| 63 | if (ui::VKEY_A <= keycode && keycode <= ui::VKEY_F) |
| 64 | return keycode - ui::VKEY_A + 10; |
| 65 | return -1; // |keycode| cannot be a hexadecimal digit. |
| 66 | } |
| 67 | |
Max Ihlenfeldt | ed435f1 | 2024-02-14 13:25:18 | [diff] [blame] | 68 | // `ui::DomKey` only offers `ToDeadKeyCombiningCharacter()`, but we need the |
| 69 | // non-combining character for the dead key for the preedit string. If we use |
| 70 | // the combining character, it may combine with the character preceding the |
| 71 | // preedit string, which is unwanted and confusing. |
| 72 | std::optional<char16_t> DeadKeyToNonCombiningCharacter(ui::DomKey dom_key) { |
| 73 | CHECK(dom_key.IsDeadKey()); |
| 74 | uint32_t combining_char = dom_key.ToDeadKeyCombiningCharacter(); |
| 75 | |
| 76 | // Unicode's list of "Combining Diacritical Marks" |
| 77 | // (https://www.unicode.org/charts/PDF/U0300.pdf) is much longer, but these |
| 78 | // should be the most commonly used ones. |
| 79 | switch (combining_char) { |
| 80 | // Combining grave. |
| 81 | case 0x300: |
| 82 | return u'`'; |
| 83 | // Combining acute. |
| 84 | case 0x301: |
| 85 | return u'´'; |
| 86 | // Combining circumflex. |
| 87 | case 0x302: |
| 88 | return u'^'; |
| 89 | // Combining tilde. |
| 90 | case 0x303: |
| 91 | return u'~'; |
| 92 | // Combining diaeresis. |
| 93 | case 0x308: |
| 94 | return u'¨'; |
| 95 | // Unknown combining character. |
| 96 | default: |
| 97 | LOG(WARNING) << "Unable to convert unknown dead key combining character " |
| 98 | "to non-combining variant: U+" |
| 99 | << base::StringPrintf("%04d", combining_char); |
| 100 | return std::nullopt; |
| 101 | } |
| 102 | } |
| 103 | |
[email protected] | 67512c7e | 2011-07-08 04:32:24 | [diff] [blame] | 104 | } // namespace |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 105 | |
[email protected] | 2e79f73 | 2011-11-10 17:48:52 | [diff] [blame] | 106 | namespace ui { |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 107 | |
Max Ihlenfeldt | 94aaecd | 2024-02-06 13:23:48 | [diff] [blame] | 108 | CharacterComposer::CharacterComposer(PreeditStringMode mode) |
| 109 | : preedit_string_mode_(mode) {} |
[email protected] | c6a06c51 | 2011-06-22 01:22:09 | [diff] [blame] | 110 | |
Max Ihlenfeldt | 94aaecd | 2024-02-06 13:23:48 | [diff] [blame] | 111 | CharacterComposer::~CharacterComposer() = default; |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 112 | |
| 113 | void CharacterComposer::Reset() { |
| 114 | compose_buffer_.clear(); |
kpschoedel | ed31edf | 2015-07-10 18:38:47 | [diff] [blame] | 115 | hex_buffer_.clear(); |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 116 | composed_character_.clear(); |
[email protected] | 6d68609 | 2012-06-11 11:14:25 | [diff] [blame] | 117 | preedit_string_.clear(); |
[email protected] | d737d61 | 2012-03-21 07:58:14 | [diff] [blame] | 118 | composition_mode_ = KEY_SEQUENCE_MODE; |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 119 | } |
| 120 | |
[email protected] | af341de | 2013-12-02 05:51:52 | [diff] [blame] | 121 | bool CharacterComposer::FilterKeyPress(const ui::KeyEvent& event) { |
Avi Drissman | ae99ae28 | 2024-07-22 20:44:28 | [diff] [blame] | 122 | if (event.type() != EventType::kKeyPressed && |
| 123 | event.type() != EventType::kKeyReleased) { |
[email protected] | af341de | 2013-12-02 05:51:52 | [diff] [blame] | 124 | return false; |
Avi Drissman | ae99ae28 | 2024-07-22 20:44:28 | [diff] [blame] | 125 | } |
[email protected] | af341de | 2013-12-02 05:51:52 | [diff] [blame] | 126 | |
asargent | f7e41c8 | 2015-07-09 23:22:02 | [diff] [blame] | 127 | // We don't care about modifier key presses. |
kpschoedel | ed31edf | 2015-07-10 18:38:47 | [diff] [blame] | 128 | if (KeycodeConverter::IsDomKeyForModifier(event.GetDomKey())) |
asargent | f7e41c8 | 2015-07-09 23:22:02 | [diff] [blame] | 129 | return false; |
| 130 | |
kpschoedel | ed31edf | 2015-07-10 18:38:47 | [diff] [blame] | 131 | composed_character_.clear(); |
| 132 | preedit_string_.clear(); |
| 133 | |
[email protected] | d737d61 | 2012-03-21 07:58:14 | [diff] [blame] | 134 | // When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE. |
pkasting | cc7f6ac | 2016-01-08 23:38:47 | [diff] [blame] | 135 | // We don't care about other modifiers like Alt. When CapsLock is on, we do |
| 136 | // nothing because what we receive is Ctrl+Shift+u (not U). |
kpschoedel | ed31edf | 2015-07-10 18:38:47 | [diff] [blame] | 137 | if (event.key_code() == VKEY_U && |
pkasting | cc7f6ac | 2016-01-08 23:38:47 | [diff] [blame] | 138 | (event.flags() & (EF_SHIFT_DOWN | EF_CONTROL_DOWN | EF_CAPS_LOCK_ON)) == |
kpschoedel | ed31edf | 2015-07-10 18:38:47 | [diff] [blame] | 139 | (EF_SHIFT_DOWN | EF_CONTROL_DOWN)) { |
[email protected] | d737d61 | 2012-03-21 07:58:14 | [diff] [blame] | 140 | if (composition_mode_ == KEY_SEQUENCE_MODE && compose_buffer_.empty()) { |
| 141 | // There is no ongoing composition. Let's switch to HEX_MODE. |
| 142 | composition_mode_ = HEX_MODE; |
[email protected] | 6d68609 | 2012-06-11 11:14:25 | [diff] [blame] | 143 | UpdatePreeditStringHexMode(); |
[email protected] | d737d61 | 2012-03-21 07:58:14 | [diff] [blame] | 144 | return true; |
| 145 | } |
| 146 | } |
| 147 | |
[email protected] | 6d68609 | 2012-06-11 11:14:25 | [diff] [blame] | 148 | // Filter key press in an appropriate manner. |
| 149 | switch (composition_mode_) { |
| 150 | case KEY_SEQUENCE_MODE: |
kpschoedel | ed31edf | 2015-07-10 18:38:47 | [diff] [blame] | 151 | return FilterKeyPressSequenceMode(event); |
[email protected] | 6d68609 | 2012-06-11 11:14:25 | [diff] [blame] | 152 | case HEX_MODE: |
kpschoedel | ed31edf | 2015-07-10 18:38:47 | [diff] [blame] | 153 | return FilterKeyPressHexMode(event); |
[email protected] | 6d68609 | 2012-06-11 11:14:25 | [diff] [blame] | 154 | default: |
Peter Boström | 00fa00f | 2024-08-06 21:55:32 | [diff] [blame] | 155 | NOTREACHED(); |
[email protected] | d737d61 | 2012-03-21 07:58:14 | [diff] [blame] | 156 | } |
[email protected] | 6d68609 | 2012-06-11 11:14:25 | [diff] [blame] | 157 | } |
[email protected] | d737d61 | 2012-03-21 07:58:14 | [diff] [blame] | 158 | |
kpschoedel | ed31edf | 2015-07-10 18:38:47 | [diff] [blame] | 159 | bool CharacterComposer::FilterKeyPressSequenceMode(const KeyEvent& event) { |
[email protected] | d737d61 | 2012-03-21 07:58:14 | [diff] [blame] | 160 | DCHECK(composition_mode_ == KEY_SEQUENCE_MODE); |
kpschoedel | 51d2e32 | 2015-08-28 15:29:35 | [diff] [blame] | 161 | compose_buffer_.push_back(event.GetDomKey()); |
[email protected] | 146d8d9 | 2013-07-05 04:43:05 | [diff] [blame] | 162 | |
[email protected] | 67512c7e | 2011-07-08 04:32:24 | [diff] [blame] | 163 | // Check compose table. |
avi | 20f6a6d53 | 2015-12-23 08:05:24 | [diff] [blame] | 164 | uint32_t composed_character_utf32 = 0; |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 165 | if (CheckCharacterComposeTable(compose_buffer_, &composed_character_utf32)) { |
[email protected] | 67512c7e | 2011-07-08 04:32:24 | [diff] [blame] | 166 | // Key press is recognized as a part of composition. |
[email protected] | d737d61 | 2012-03-21 07:58:14 | [diff] [blame] | 167 | if (composed_character_utf32 != 0) { |
[email protected] | 67512c7e | 2011-07-08 04:32:24 | [diff] [blame] | 168 | // We get a composed character. |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 169 | compose_buffer_.clear(); |
[email protected] | d737d61 | 2012-03-21 07:58:14 | [diff] [blame] | 170 | UTF32CharacterToUTF16(composed_character_utf32, &composed_character_); |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 171 | } |
Max Ihlenfeldt | 94aaecd | 2024-02-06 13:23:48 | [diff] [blame] | 172 | |
| 173 | if (preedit_string_mode_ == PreeditStringMode::kAlwaysEnabled) { |
| 174 | UpdatePreeditStringSequenceMode(); |
| 175 | } |
| 176 | |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 177 | return true; |
| 178 | } |
[email protected] | 67512c7e | 2011-07-08 04:32:24 | [diff] [blame] | 179 | // Key press is not a part of composition. |
| 180 | compose_buffer_.pop_back(); // Remove the keypress added this time. |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 181 | if (!compose_buffer_.empty()) { |
Kevin Schoedel | e07b808 | 2017-07-04 15:43:03 | [diff] [blame] | 182 | // Check for Windows-style composition fallback: If the dead key encodes |
| 183 | // a printable ASCII character, output that followed by the new keypress. |
| 184 | // (This could be extended to allow any printable Unicode character in |
| 185 | // the dead key, and/or for longer sequences, but there is no current use |
| 186 | // for that, so we keep it simple.) |
| 187 | if ((compose_buffer_.size() == 1) && (compose_buffer_[0].IsDeadKey())) { |
| 188 | int32_t dead_character = compose_buffer_[0].ToDeadKeyCombiningCharacter(); |
| 189 | if (dead_character >= 0x20 && dead_character <= 0x7E) { |
| 190 | DomKey current_key = event.GetDomKey(); |
| 191 | int32_t current_character = 0; |
| 192 | if (current_key.IsCharacter()) |
| 193 | current_character = current_key.ToCharacter(); |
| 194 | else if (current_key.IsDeadKey()) |
| 195 | current_character = current_key.ToDeadKeyCombiningCharacter(); |
| 196 | if (current_character) { |
| 197 | base::WriteUnicodeCharacter(dead_character, &composed_character_); |
| 198 | base::WriteUnicodeCharacter(current_character, &composed_character_); |
| 199 | } |
| 200 | } |
| 201 | } |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 202 | compose_buffer_.clear(); |
Max Ihlenfeldt | 94aaecd | 2024-02-06 13:23:48 | [diff] [blame] | 203 | |
| 204 | if (preedit_string_mode_ == PreeditStringMode::kAlwaysEnabled) { |
| 205 | UpdatePreeditStringSequenceMode(); |
| 206 | } |
| 207 | |
[email protected] | bc3a3210 | 2011-06-22 00:48:14 | [diff] [blame] | 208 | return true; |
| 209 | } |
| 210 | return false; |
| 211 | } |
| 212 | |
Max Ihlenfeldt | 94aaecd | 2024-02-06 13:23:48 | [diff] [blame] | 213 | void CharacterComposer::UpdatePreeditStringSequenceMode() { |
| 214 | CHECK_EQ(preedit_string_mode_, PreeditStringMode::kAlwaysEnabled); |
| 215 | for (auto key : compose_buffer_) { |
| 216 | if (key.IsCharacter()) { |
| 217 | base::WriteUnicodeCharacter(key.ToCharacter(), &preedit_string_); |
| 218 | } else if (key.IsDeadKey()) { |
Max Ihlenfeldt | ed435f1 | 2024-02-14 13:25:18 | [diff] [blame] | 219 | if (std::optional<char16_t> non_combining_character = |
| 220 | DeadKeyToNonCombiningCharacter(key)) { |
| 221 | base::WriteUnicodeCharacter(*non_combining_character, &preedit_string_); |
| 222 | } |
Max Ihlenfeldt | 94aaecd | 2024-02-06 13:23:48 | [diff] [blame] | 223 | } else if (key.IsComposeKey() && (compose_buffer_.size() == 1)) { |
| 224 | base::WriteUnicodeCharacter(kPreeditStringComposeKeySymbol, |
| 225 | &preedit_string_); |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | |
kpschoedel | ed31edf | 2015-07-10 18:38:47 | [diff] [blame] | 230 | bool CharacterComposer::FilterKeyPressHexMode(const KeyEvent& event) { |
[email protected] | 6d68609 | 2012-06-11 11:14:25 | [diff] [blame] | 231 | DCHECK(composition_mode_ == HEX_MODE); |
| 232 | const size_t kMaxHexSequenceLength = 8; |
Jan Wilken Dörrie | dfd39a2 | 2021-03-09 08:14:34 | [diff] [
|