Fix typos: misspell -w -error -source=text namespace.c
[ruby.git] / prism / util / pm_strpbrk.h
blobf387bd5782f169313afdd438767652cb42ac8408
1 /**
2 * @file pm_strpbrk.h
4 * A custom strpbrk implementation.
5 */
6 #ifndef PRISM_STRPBRK_H
7 #define PRISM_STRPBRK_H
9 #include "prism/defines.h"
10 #include "prism/diagnostic.h"
11 #include "prism/parser.h"
13 #include <stddef.h>
14 #include <string.h>
16 /**
17 * Here we have rolled our own version of strpbrk. The standard library strpbrk
18 * has undefined behavior when the source string is not null-terminated. We want
19 * to support strings that are not null-terminated because pm_parse does not
20 * have the contract that the string is null-terminated. (This is desirable
21 * because it means the extension can call pm_parse with the result of a call to
22 * mmap).
24 * The standard library strpbrk also does not support passing a maximum length
25 * to search. We want to support this for the reason mentioned above, but we
26 * also don't want it to stop on null bytes. Ruby actually allows null bytes
27 * within strings, comments, regular expressions, etc. So we need to be able to
28 * skip past them.
30 * Finally, we want to support encodings wherein the charset could contain
31 * characters that are trailing bytes of multi-byte characters. For example, in
32 * Shift-JIS, the backslash character can be a trailing byte. In that case we
33 * need to take a slower path and iterate one multi-byte character at a time.
35 * @param parser The parser.
36 * @param source The source to search.
37 * @param charset The charset to search for.
38 * @param length The maximum number of bytes to search.
39 * @param validate Whether to validate that the source string is valid in the
40 * current encoding of the parser.
41 * @return A pointer to the first character in the source string that is in the
42 * charset, or NULL if no such character exists.
44 const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate);
46 #endif