blob: a3cb5400cedb14d98336602dad78efbdc5a33d77 [file] [log] [blame]
Avi Drissman3e1a26c2022-09-15 20:26:031// Copyright 2013 The Chromium Authors
[email protected]bc3a32102011-06-22 00:48:142// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
Arthur Sonzogni24d53e32024-07-26 14:00:545#ifdef UNSAFE_BUFFERS_BUILD
6// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
7#pragma allow_unsafe_buffers
8#endif
9
Nick Diego Yamanef9877ae2019-01-31 17:08:3110#include "ui/base/ime/character_composer.h"
[email protected]bc3a32102011-06-22 00:48:1411
[email protected]67512c7e2011-07-08 04:32:2412#include <algorithm>
13#include <iterator>
Max Ihlenfeldted435f12024-02-14 13:25:1814#include <optional>
Nick Diego Yamanef9877ae2019-01-31 17:08:3115#include <string>
[email protected]bc3a32102011-06-22 00:48:1416
Hans Wennborg8586102b2020-05-05 13:43:2917#include "base/check.h"
Max Ihlenfeldted435f12024-02-14 13:25:1818#include "base/logging.h"
Hans Wennborg8586102b2020-05-05 13:43:2919#include "base/notreached.h"
kpschoedeled31edf2015-07-10 18:38:4720#include "base/strings/string_util.h"
Max Ihlenfeldted435f12024-02-14 13:25:1821#include "base/strings/stringprintf.h"
Kevin Schoedele07b8082017-07-04 15:43:0322#include "base/strings/utf_string_conversion_utils.h"
[email protected]c7057fbe2013-06-07 18:54:0123#include "base/strings/utf_string_conversions.h"
[email protected]d737d612012-03-21 07:58:1424#include "base/third_party/icu/icu_utf.h"
[email protected]af341de2013-12-02 05:51:5225#include "ui/events/event.h"
kpschoedeled31edf2015-07-10 18:38:4726#include "ui/events/keycodes/dom/dom_key.h"
27#include "ui/events/keycodes/dom/keycode_converter.h"
[email protected]93ef86cb2014-06-16 19:08:5528#include "ui/events/keycodes/keyboard_codes.h"
[email protected]bc3a32102011-06-22 00:48:1429
30namespace {
31
Nick Diego Yamanef9877ae2019-01-31 17:08:3132#include "ui/base/ime/character_composer_data.h"
[email protected]146d8d92013-07-05 04:43:0533
kpschoedeled31edf2015-07-10 18:38:4734bool CheckCharacterComposeTable(
35 const ui::CharacterComposer::ComposeBuffer& compose_sequence,
avi20f6a6d532015-12-23 08:05:2436 uint32_t* composed_character) {
kpschoedeled31edf2015-07-10 18:38:4737 const ui::TreeComposeChecker kTreeComposeChecker(kCompositions);
38 return kTreeComposeChecker.CheckSequence(compose_sequence,
39 composed_character) !=
40 ui::ComposeChecker::CheckSequenceResult::NO_MATCH;
[email protected]bc3a32102011-06-22 00:48:1441}
42
[email protected]d737d612012-03-21 07:58:1443// Converts |character| to UTF16 string.
44// Returns false when |character| is not a valid character.
Jan Wilken Dörrie52639572021-03-11 16:49:5445bool UTF32CharacterToUTF16(uint32_t character, std::u16string* output) {
[email protected]d737d612012-03-21 07:58:1446 output->clear();
47 // Reject invalid character. (e.g. codepoint greater than 0x10ffff)
48 if (!CBU_IS_UNICODE_CHAR(character))
49 return false;
50 if (character) {
51 output->resize(CBU16_LENGTH(character));
52 size_t i = 0;
53 CBU16_APPEND_UNSAFE(&(*output)[0], i, character);
54 }
55 return true;
56}
57
[email protected]93ef86cb2014-06-16 19:08:5558// Returns an hexadecimal digit integer (0 to 15) corresponding to |keycode|.
59// -1 is returned when |keycode| cannot be a hexadecimal digit.
60int KeycodeToHexDigit(unsigned int keycode) {
61 if (ui::VKEY_0 <= keycode && keycode <= ui::VKEY_9)
62 return keycode - ui::VKEY_0;
63 if (ui::VKEY_A <= keycode && keycode <= ui::VKEY_F)
64 return keycode - ui::VKEY_A + 10;
65 return -1; // |keycode| cannot be a hexadecimal digit.
66}
67
Max Ihlenfeldted435f12024-02-14 13:25:1868// `ui::DomKey` only offers `ToDeadKeyCombiningCharacter()`, but we need the
69// non-combining character for the dead key for the preedit string. If we use
70// the combining character, it may combine with the character preceding the
71// preedit string, which is unwanted and confusing.
72std::optional<char16_t> DeadKeyToNonCombiningCharacter(ui::DomKey dom_key) {
73 CHECK(dom_key.IsDeadKey());
74 uint32_t combining_char = dom_key.ToDeadKeyCombiningCharacter();
75
76 // Unicode's list of "Combining Diacritical Marks"
77 // (https://www.unicode.org/charts/PDF/U0300.pdf) is much longer, but these
78 // should be the most commonly used ones.
79 switch (combining_char) {
80 // Combining grave.
81 case 0x300:
82 return u'`';
83 // Combining acute.
84 case 0x301:
85 return u'´';
86 // Combining circumflex.
87 case 0x302:
88 return u'^';
89 // Combining tilde.
90 case 0x303:
91 return u'~';
92 // Combining diaeresis.
93 case 0x308:
94 return u'¨';
95 // Unknown combining character.
96 default:
97 LOG(WARNING) << "Unable to convert unknown dead key combining character "
98 "to non-combining variant: U+"
99 << base::StringPrintf("%04d", combining_char);
100 return std::nullopt;
101 }
102}
103
[email protected]67512c7e2011-07-08 04:32:24104} // namespace
[email protected]bc3a32102011-06-22 00:48:14105
[email protected]2e79f732011-11-10 17:48:52106namespace ui {
[email protected]bc3a32102011-06-22 00:48:14107
Max Ihlenfeldt94aaecd2024-02-06 13:23:48108CharacterComposer::CharacterComposer(PreeditStringMode mode)
109 : preedit_string_mode_(mode) {}
[email protected]c6a06c512011-06-22 01:22:09110
Max Ihlenfeldt94aaecd2024-02-06 13:23:48111CharacterComposer::~CharacterComposer() = default;
[email protected]bc3a32102011-06-22 00:48:14112
113void CharacterComposer::Reset() {
114 compose_buffer_.clear();
kpschoedeled31edf2015-07-10 18:38:47115 hex_buffer_.clear();
[email protected]bc3a32102011-06-22 00:48:14116 composed_character_.clear();
[email protected]6d686092012-06-11 11:14:25117 preedit_string_.clear();
[email protected]d737d612012-03-21 07:58:14118 composition_mode_ = KEY_SEQUENCE_MODE;
[email protected]bc3a32102011-06-22 00:48:14119}
120
[email protected]af341de2013-12-02 05:51:52121bool CharacterComposer::FilterKeyPress(const ui::KeyEvent& event) {
Avi Drissmanae99ae282024-07-22 20:44:28122 if (event.type() != EventType::kKeyPressed &&
123 event.type() != EventType::kKeyReleased) {
[email protected]af341de2013-12-02 05:51:52124 return false;
Avi Drissmanae99ae282024-07-22 20:44:28125 }
[email protected]af341de2013-12-02 05:51:52126
asargentf7e41c82015-07-09 23:22:02127 // We don't care about modifier key presses.
kpschoedeled31edf2015-07-10 18:38:47128 if (KeycodeConverter::IsDomKeyForModifier(event.GetDomKey()))
asargentf7e41c82015-07-09 23:22:02129 return false;
130
kpschoedeled31edf2015-07-10 18:38:47131 composed_character_.clear();
132 preedit_string_.clear();
133
[email protected]d737d612012-03-21 07:58:14134 // When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE.
pkastingcc7f6ac2016-01-08 23:38:47135 // We don't care about other modifiers like Alt. When CapsLock is on, we do
136 // nothing because what we receive is Ctrl+Shift+u (not U).
kpschoedeled31edf2015-07-10 18:38:47137 if (event.key_code() == VKEY_U &&
pkastingcc7f6ac2016-01-08 23:38:47138 (event.flags() & (EF_SHIFT_DOWN | EF_CONTROL_DOWN | EF_CAPS_LOCK_ON)) ==
kpschoedeled31edf2015-07-10 18:38:47139 (EF_SHIFT_DOWN | EF_CONTROL_DOWN)) {
[email protected]d737d612012-03-21 07:58:14140 if (composition_mode_ == KEY_SEQUENCE_MODE && compose_buffer_.empty()) {
141 // There is no ongoing composition. Let's switch to HEX_MODE.
142 composition_mode_ = HEX_MODE;
[email protected]6d686092012-06-11 11:14:25143 UpdatePreeditStringHexMode();
[email protected]d737d612012-03-21 07:58:14144 return true;
145 }
146 }
147
[email protected]6d686092012-06-11 11:14:25148 // Filter key press in an appropriate manner.
149 switch (composition_mode_) {
150 case KEY_SEQUENCE_MODE:
kpschoedeled31edf2015-07-10 18:38:47151 return FilterKeyPressSequenceMode(event);
[email protected]6d686092012-06-11 11:14:25152 case HEX_MODE:
kpschoedeled31edf2015-07-10 18:38:47153 return FilterKeyPressHexMode(event);
[email protected]6d686092012-06-11 11:14:25154 default:
Peter Boström00fa00f2024-08-06 21:55:32155 NOTREACHED();
[email protected]d737d612012-03-21 07:58:14156 }
[email protected]6d686092012-06-11 11:14:25157}
[email protected]d737d612012-03-21 07:58:14158
kpschoedeled31edf2015-07-10 18:38:47159bool CharacterComposer::FilterKeyPressSequenceMode(const KeyEvent& event) {
[email protected]d737d612012-03-21 07:58:14160 DCHECK(composition_mode_ == KEY_SEQUENCE_MODE);
kpschoedel51d2e322015-08-28 15:29:35161 compose_buffer_.push_back(event.GetDomKey());
[email protected]146d8d92013-07-05 04:43:05162
[email protected]67512c7e2011-07-08 04:32:24163 // Check compose table.
avi20f6a6d532015-12-23 08:05:24164 uint32_t composed_character_utf32 = 0;
[email protected]bc3a32102011-06-22 00:48:14165 if (CheckCharacterComposeTable(compose_buffer_, &composed_character_utf32)) {
[email protected]67512c7e2011-07-08 04:32:24166 // Key press is recognized as a part of composition.
[email protected]d737d612012-03-21 07:58:14167 if (composed_character_utf32 != 0) {
[email protected]67512c7e2011-07-08 04:32:24168 // We get a composed character.
[email protected]bc3a32102011-06-22 00:48:14169 compose_buffer_.clear();
[email protected]d737d612012-03-21 07:58:14170 UTF32CharacterToUTF16(composed_character_utf32, &composed_character_);
[email protected]bc3a32102011-06-22 00:48:14171 }
Max Ihlenfeldt94aaecd2024-02-06 13:23:48172
173 if (preedit_string_mode_ == PreeditStringMode::kAlwaysEnabled) {
174 UpdatePreeditStringSequenceMode();
175 }
176
[email protected]bc3a32102011-06-22 00:48:14177 return true;
178 }
[email protected]67512c7e2011-07-08 04:32:24179 // Key press is not a part of composition.
180 compose_buffer_.pop_back(); // Remove the keypress added this time.
[email protected]bc3a32102011-06-22 00:48:14181 if (!compose_buffer_.empty()) {
Kevin Schoedele07b8082017-07-04 15:43:03182 // Check for Windows-style composition fallback: If the dead key encodes
183 // a printable ASCII character, output that followed by the new keypress.
184 // (This could be extended to allow any printable Unicode character in
185 // the dead key, and/or for longer sequences, but there is no current use
186 // for that, so we keep it simple.)
187 if ((compose_buffer_.size() == 1) && (compose_buffer_[0].IsDeadKey())) {
188 int32_t dead_character = compose_buffer_[0].ToDeadKeyCombiningCharacter();
189 if (dead_character >= 0x20 && dead_character <= 0x7E) {
190 DomKey current_key = event.GetDomKey();
191 int32_t current_character = 0;
192 if (current_key.IsCharacter())
193 current_character = current_key.ToCharacter();
194 else if (current_key.IsDeadKey())
195 current_character = current_key.ToDeadKeyCombiningCharacter();
196 if (current_character) {
197 base::WriteUnicodeCharacter(dead_character, &composed_character_);
198 base::WriteUnicodeCharacter(current_character, &composed_character_);
199 }
200 }
201 }
[email protected]bc3a32102011-06-22 00:48:14202 compose_buffer_.clear();
Max Ihlenfeldt94aaecd2024-02-06 13:23:48203
204 if (preedit_string_mode_ == PreeditStringMode::kAlwaysEnabled) {
205 UpdatePreeditStringSequenceMode();
206 }
207
[email protected]bc3a32102011-06-22 00:48:14208 return true;
209 }
210 return false;
211}
212
Max Ihlenfeldt94aaecd2024-02-06 13:23:48213void CharacterComposer::UpdatePreeditStringSequenceMode() {
214 CHECK_EQ(preedit_string_mode_, PreeditStringMode::kAlwaysEnabled);
215 for (auto key : compose_buffer_) {
216 if (key.IsCharacter()) {
217 base::WriteUnicodeCharacter(key.ToCharacter(), &preedit_string_);
218 } else if (key.IsDeadKey()) {
Max Ihlenfeldted435f12024-02-14 13:25:18219 if (std::optional<char16_t> non_combining_character =
220 DeadKeyToNonCombiningCharacter(key)) {
221 base::WriteUnicodeCharacter(*non_combining_character, &preedit_string_);
222 }
Max Ihlenfeldt94aaecd2024-02-06 13:23:48223 } else if (key.IsComposeKey() && (compose_buffer_.size() == 1)) {
224 base::WriteUnicodeCharacter(kPreeditStringComposeKeySymbol,
225 &preedit_string_);
226 }
227 }
228}
229
kpschoedeled31edf2015-07-10 18:38:47230bool CharacterComposer::FilterKeyPressHexMode(const KeyEvent& event) {
[email protected]6d686092012-06-11 11:14:25231 DCHECK(composition_mode_ == HEX_MODE);
232 const size_t kMaxHexSequenceLength = 8;
Jan Wilken Dörriedfd39a22021-03-09 08:14:34