summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <[email protected]>2024-02-12 13:43:07 -0500
committerKevin Newton <[email protected]>2024-02-12 14:48:09 -0500
commit1395838e1887d581a3d6cd28c24f7b44c44f5c26 (patch)
tree60483f96e31a7626bcc2593aadb435569fe38c5f
parent2131d04f43d81469f6ab4215bef4644390e70ee5 (diff)
[PRISM] Check full lines for invalid UTF-8
-rw-r--r--prism_compile.c14
1 files changed, 11 insertions, 3 deletions
diff --git a/prism_compile.c b/prism_compile.c
index 0e507b56d9..cfad11a109 100644
--- a/prism_compile.c
+++ b/prism_compile.c
@@ -7979,11 +7979,19 @@ pm_parse_result_free(pm_parse_result_t *result)
}
/**
- * Check if the given source slice is valid UTF-8.
+ * Check if the given source slice is valid UTF-8. The location represents the
+ * location of the error, but the slice of the source will include the content
+ * of all of the lines that the error touches, so we need to check those parts
+ * as well.
*/
static bool
-pm_parse_input_error_utf8_p(const uint8_t *start, const uint8_t *end)
+pm_parse_input_error_utf8_p(const pm_parser_t *parser, const pm_location_t *location)
{
+ const pm_line_column_t start_location = pm_newline_list_line_column(&parser->newline_list, location->start);
+ const pm_line_column_t end_location = pm_newline_list_line_column(&parser->newline_list, location->end);
+
+ const uint8_t *start = parser->start + parser->newline_list.offsets[start_location.line - 1];
+ const uint8_t *end = ((end_location.line == parser->newline_list.size) ? parser->end : (parser->start + parser->newline_list.offsets[end_location.line]));
size_t width;
while (start < end) {
@@ -8017,7 +8025,7 @@ pm_parse_input_error(const pm_parse_result_t *result)
// contain invalid byte sequences. So if any source examples include
// invalid UTF-8 byte sequences, we will skip showing source examples
// entirely.
- if (valid_utf8 && !pm_parse_input_error_utf8_p(error->location.start, error->location.end)) {
+ if (valid_utf8 && !pm_parse_input_error_utf8_p(&result->parser, &error->location)) {
valid_utf8 = false;
}
}