summaryrefslogtreecommitdiff
diff options
authorNobuyoshi Nakada <[email protected]>2024-06-01 19:33:12 +0900
committerNobuyoshi Nakada <[email protected]>2024-06-01 19:33:12 +0900
commit05553cf22d43dd78b8f30cc4591230b5c000c538 (patch)
treeef14d16aa8033b29732e6e749d90e4f676f506fb
parentcda69b5910494a745d87b7932547341cb2fefe3a (diff)
[Bug #20517] Make a multibyte character one token at meta escape
-rw-r--r--parse.y6
-rw-r--r--test/ripper/test_lexer.rb54
2 files changed, 56 insertions, 4 deletions
diff --git a/parse.y b/parse.y
index 1f12203e27..477eb3cbdd 100644
--- a/parse.y
+++ b/parse.y
@@ -8155,7 +8155,11 @@ read_escape(struct parser_params *p, int flags)
}
return read_escape(p, flags|ESCAPE_META) | 0x80;
}
- else if (c == -1 || !ISASCII(c)) goto eof;
+ else if (c == -1) goto eof;
+ else if (!ISASCII(c)) {
+ tokskip_mbchar(p);
+ goto eof;
+ }
else {
int c2 = escaped_control_code(c);
if (c2) {
diff --git a/test/ripper/test_lexer.rb b/test/ripper/test_lexer.rb
index 7d62a7ee28..392c4e9cba 100644
--- a/test/ripper/test_lexer.rb
+++ b/test/ripper/test_lexer.rb
@@ -302,9 +302,8 @@ world"
[[6, 2], :on_tstring_content, "3\n", state(:EXPR_BEG)],
[[7, 0], :on_heredoc_end, "H1\n", state(:EXPR_BEG)],
]
- assert_equal(code, Ripper.tokenize(code).join(""))
- assert_equal(expected, result = Ripper.lex(code),
- proc {expected.zip(result) {|e, r| break diff(e, r) unless e == r}})
+
+ assert_lexer(expected, code)
code = <<~'HEREDOC'
<<-H1
@@ -330,6 +329,55 @@ world"
[[6, 0], :on_tstring_content, " 3\n", state(:EXPR_BEG)],
[[7, 0], :on_heredoc_end, "H1\n", state(:EXPR_BEG)],
]
+
+ assert_lexer(expected, code)
+ end
+
+ def test_invalid_escape_ctrl_mbchar
+ code = %["\\C-\u{3042}"]
+ expected = [
+ [[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)],
+ [[1, 1], :on_tstring_content, "\\C-\u{3042}", state(:EXPR_BEG)],
+ [[1, 7], :on_tstring_end, '"', state(:EXPR_END)],
+ ]
+
+ assert_lexer(expected, code)
+ end
+
+ def test_invalid_escape_meta_mbchar
+ code = %["\\M-\u{3042}"]
+ expected = [
+ [[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)],
+ [[1, 1], :on_tstring_content, "\\M-\u{3042}", state(:EXPR_BEG)],
+ [[1, 7], :on_tstring_end, '"', state(:EXPR_END)],
+ ]
+
+ assert_lexer(expected, code)
+ end
+
+ def test_invalid_escape_meta_ctrl_mbchar
+ code = %["\\M-\\C-\u{3042}"]
+ expected = [
+ [[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)],
+ [[1, 1], :on_tstring_content, "\\M-\\C-\u{3042}", state(:EXPR_BEG)],
+ [[1, 10], :on_tstring_end, '"', state(:EXPR_END)],
+ ]
+
+ assert_lexer(expected, code)
+ end
+
+ def test_invalid_escape_ctrl_meta_mbchar
+ code = %["\\C-\\M-\u{3042}"]
+ expected = [
+ [[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)],
+ [[1, 1], :on_tstring_content, "\\C-\\M-\u{3042}", state(:EXPR_BEG)],
+ [[1, 10], :on_tstring_end, '"', state(:EXPR_END)],
+ ]
+
+ assert_lexer(expected, code)
+ end
+
+ def assert_lexer(expected, code)
assert_equal(code, Ripper.tokenize(code).join(""))
assert_equal(expected, result = Ripper.lex(code),
proc {expected.zip(result) {|e, r| break diff(e, r) unless e == r}})