diff options
author | Kevin Newton <[email protected]> | 2024-02-12 13:43:07 -0500 |
---|---|---|
committer | Kevin Newton <[email protected]> | 2024-02-12 14:48:09 -0500 |
commit | 1395838e1887d581a3d6cd28c24f7b44c44f5c26 (patch) | |
tree | 60483f96e31a7626bcc2593aadb435569fe38c5f | |
parent | 2131d04f43d81469f6ab4215bef4644390e70ee5 (diff) |
[PRISM] Check full lines for invalid UTF-8
-rw-r--r-- | prism_compile.c | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/prism_compile.c b/prism_compile.c index 0e507b56d9..cfad11a109 100644 --- a/prism_compile.c +++ b/prism_compile.c @@ -7979,11 +7979,19 @@ pm_parse_result_free(pm_parse_result_t *result) } /** - * Check if the given source slice is valid UTF-8. + * Check if the given source slice is valid UTF-8. The location represents the + * location of the error, but the slice of the source will include the content + * of all of the lines that the error touches, so we need to check those parts + * as well. */ static bool -pm_parse_input_error_utf8_p(const uint8_t *start, const uint8_t *end) +pm_parse_input_error_utf8_p(const pm_parser_t *parser, const pm_location_t *location) { + const pm_line_column_t start_location = pm_newline_list_line_column(&parser->newline_list, location->start); + const pm_line_column_t end_location = pm_newline_list_line_column(&parser->newline_list, location->end); + + const uint8_t *start = parser->start + parser->newline_list.offsets[start_location.line - 1]; + const uint8_t *end = ((end_location.line == parser->newline_list.size) ? parser->end : (parser->start + parser->newline_list.offsets[end_location.line])); size_t width; while (start < end) { @@ -8017,7 +8025,7 @@ pm_parse_input_error(const pm_parse_result_t *result) // contain invalid byte sequences. So if any source examples include // invalid UTF-8 byte sequences, we will skip showing source examples // entirely. - if (valid_utf8 && !pm_parse_input_error_utf8_p(error->location.start, error->location.end)) { + if (valid_utf8 && !pm_parse_input_error_utf8_p(&result->parser, &error->location)) { valid_utf8 = false; } } |