4 * The main header file for the prism parser.
9 #include "prism/defines.h"
10 #include "prism/util/pm_buffer.h"
11 #include "prism/util/pm_char.h"
12 #include "prism/util/pm_integer.h"
13 #include "prism/util/pm_memchr.h"
14 #include "prism/util/pm_strncasecmp.h"
15 #include "prism/util/pm_strpbrk.h"
16 #include "prism/ast.h"
17 #include "prism/diagnostic.h"
18 #include "prism/node.h"
19 #include "prism/options.h"
20 #include "prism/pack.h"
21 #include "prism/parser.h"
22 #include "prism/prettyprint.h"
23 #include "prism/regexp.h"
24 #include "prism/static_literals.h"
25 #include "prism/version.h"
43 * The prism version and the serialization format.
45 * @returns The prism version as a constant string.
47 PRISM_EXPORTED_FUNCTION
const char * pm_version(void);
50 * Initialize a parser with the given start and end pointers.
52 * @param parser The parser to initialize.
53 * @param source The source to parse.
54 * @param size The size of the source.
55 * @param options The optional options to use when parsing.
57 PRISM_EXPORTED_FUNCTION
void pm_parser_init(pm_parser_t
*parser
, const uint8_t *source
, size_t size
, const pm_options_t
*options
);
60 * Register a callback that will be called whenever prism changes the encoding
61 * it is using to parse based on the magic comment.
63 * @param parser The parser to register the callback with.
64 * @param callback The callback to register.
66 PRISM_EXPORTED_FUNCTION
void pm_parser_register_encoding_changed_callback(pm_parser_t
*parser
, pm_encoding_changed_callback_t callback
);
69 * Free any memory associated with the given parser.
71 * @param parser The parser to free.
73 PRISM_EXPORTED_FUNCTION
void pm_parser_free(pm_parser_t
*parser
);
76 * Initiate the parser with the given parser.
78 * @param parser The parser to use.
79 * @return The AST representing the source.
81 PRISM_EXPORTED_FUNCTION pm_node_t
* pm_parse(pm_parser_t
*parser
);
84 * This function is used in pm_parse_stream to retrieve a line of input from a
85 * stream. It closely mirrors that of fgets so that fgets can be used as the
86 * default implementation.
88 typedef char * (pm_parse_stream_fgets_t
)(char *string
, int size
, void *stream
);
91 * Parse a stream of Ruby source and return the tree.
93 * @param parser The parser to use.
94 * @param buffer The buffer to use.
95 * @param stream The stream to parse.
96 * @param stream_fgets The function to use to read from the stream.
97 * @param options The optional options to use when parsing.
98 * @return The AST representing the source.
100 PRISM_EXPORTED_FUNCTION pm_node_t
* pm_parse_stream(pm_parser_t
*parser
, pm_buffer_t
*buffer
, void *stream
, pm_parse_stream_fgets_t
*stream_fgets
, const pm_options_t
*options
);
102 // We optionally support serializing to a binary string. For systems that don't
103 // want or need this functionality, it can be turned off with the
104 // PRISM_EXCLUDE_SERIALIZATION define.
105 #ifndef PRISM_EXCLUDE_SERIALIZATION
108 * Parse and serialize the AST represented by the source that is read out of the
109 * given stream into to the given buffer.
111 * @param buffer The buffer to serialize to.
112 * @param stream The stream to parse.
113 * @param stream_fgets The function to use to read from the stream.
114 * @param data The optional data to pass to the parser.
116 PRISM_EXPORTED_FUNCTION
void pm_serialize_parse_stream(pm_buffer_t
*buffer
, void *stream
, pm_parse_stream_fgets_t
*stream_fgets
, const char *data
);
119 * Serialize the given list of comments to the given buffer.
121 * @param parser The parser to serialize.
122 * @param list The list of comments to serialize.
123 * @param buffer The buffer to serialize to.
125 void pm_serialize_comment_list(pm_parser_t
*parser
, pm_list_t
*list
, pm_buffer_t
*buffer
);
128 * Serialize the name of the encoding to the buffer.
130 * @param encoding The encoding to serialize.
131 * @param buffer The buffer to serialize to.
133 void pm_serialize_encoding(const pm_encoding_t
*encoding
, pm_buffer_t
*buffer
);
136 * Serialize the encoding, metadata, nodes, and constant pool.
138 * @param parser The parser to serialize.
139 * @param node The node to serialize.
140 * @param buffer The buffer to serialize to.
142 void pm_serialize_content(pm_parser_t
*parser
, pm_node_t
*node
, pm_buffer_t
*buffer
);
145 * Serialize the AST represented by the given node to the given buffer.
147 * @param parser The parser to serialize.
148 * @param node The node to serialize.
149 * @param buffer The buffer to serialize to.
151 PRISM_EXPORTED_FUNCTION
void pm_serialize(pm_parser_t
*parser
, pm_node_t
*node
, pm_buffer_t
*buffer
);
154 * Parse the given source to the AST and dump the AST to the given buffer.
156 * @param buffer The buffer to serialize to.
157 * @param source The source to parse.
158 * @param size The size of the source.
159 * @param data The optional data to pass to the parser.
161 PRISM_EXPORTED_FUNCTION
void pm_serialize_parse(pm_buffer_t
*buffer
, const uint8_t *source
, size_t size
, const char *data
);
164 * Parse and serialize the comments in the given source to the given buffer.
166 * @param buffer The buffer to serialize to.
167 * @param source The source to parse.
168 * @param size The size of the source.
169 * @param data The optional data to pass to the parser.
171 PRISM_EXPORTED_FUNCTION
void pm_serialize_parse_comments(pm_buffer_t
*buffer
, const uint8_t *source
, size_t size
, const char *data
);
174 * Lex the given source and serialize to the given buffer.
176 * @param source The source to lex.
177 * @param size The size of the source.
178 * @param buffer The buffer to serialize to.
179 * @param data The optional data to pass to the lexer.
181 PRISM_EXPORTED_FUNCTION
void pm_serialize_lex(pm_buffer_t
*buffer
, const uint8_t *source
, size_t size
, const char *data
);
184 * Parse and serialize both the AST and the tokens represented by the given
185 * source to the given buffer.
187 * @param buffer The buffer to serialize to.
188 * @param source The source to parse.
189 * @param size The size of the source.
190 * @param data The optional data to pass to the parser.
192 PRISM_EXPORTED_FUNCTION
void pm_serialize_parse_lex(pm_buffer_t
*buffer
, const uint8_t *source
, size_t size
, const char *data
);
197 * Parse the source and return true if it parses without errors or warnings.
199 * @param source The source to parse.
200 * @param size The size of the source.
201 * @param data The optional data to pass to the parser.
202 * @return True if the source parses without errors or warnings.
204 PRISM_EXPORTED_FUNCTION
bool pm_parse_success_p(const uint8_t *source
, size_t size
, const char *data
);
207 * Returns a string representation of the given token type.
209 * @param token_type The token type to convert to a string.
210 * @return A string representation of the given token type.
212 PRISM_EXPORTED_FUNCTION
const char * pm_token_type_name(pm_token_type_t token_type
);
215 * Returns the human name of the given token type.
217 * @param token_type The token type to convert to a human name.
218 * @return The human name of the given token type.
220 const char * pm_token_type_human(pm_token_type_t token_type
);
222 // We optionally support dumping to JSON. For systems that don't want or need
223 // this functionality, it can be turned off with the PRISM_EXCLUDE_JSON define.
224 #ifndef PRISM_EXCLUDE_JSON
227 * Dump JSON to the given buffer.
229 * @param buffer The buffer to serialize to.
230 * @param parser The parser that parsed the node.
231 * @param node The node to serialize.
233 PRISM_EXPORTED_FUNCTION
void pm_dump_json(pm_buffer_t
*buffer
, const pm_parser_t
*parser
, const pm_node_t
*node
);
238 * Represents the results of a slice query.
241 /** Returned if the encoding given to a slice query was invalid. */
242 PM_STRING_QUERY_ERROR
= -1,
244 /** Returned if the result of the slice query is false. */
245 PM_STRING_QUERY_FALSE
,
247 /** Returned if the result of the slice query is true. */
252 * Check that the slice is a valid local variable name.
254 * @param source The source to check.
255 * @param length The length of the source.
256 * @param encoding_name The name of the encoding of the source.
257 * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
258 * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
260 PRISM_EXPORTED_FUNCTION pm_string_query_t
pm_string_query_local(const uint8_t *source
, size_t length
, const char *encoding_name
);
263 * Check that the slice is a valid constant name.
265 * @param source The source to check.
266 * @param length The length of the source.
267 * @param encoding_name The name of the encoding of the source.
268 * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
269 * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
271 PRISM_EXPORTED_FUNCTION pm_string_query_t
pm_string_query_constant(const uint8_t *source
, size_t length
, const char *encoding_name
);
274 * Check that the slice is a valid method name.
276 * @param source The source to check.
277 * @param length The length of the source.
278 * @param encoding_name The name of the encoding of the source.
279 * @return PM_STRING_QUERY_TRUE if the query is true, PM_STRING_QUERY_FALSE if
280 * the query is false, and PM_STRING_QUERY_ERROR if the encoding was invalid.
282 PRISM_EXPORTED_FUNCTION pm_string_query_t
pm_string_query_method_name(const uint8_t *source
, size_t length
, const char *encoding_name
);
287 * Prism is a parser for the Ruby programming language. It is designed to be
288 * portable, error tolerant, and maintainable. It is written in C99 and has no
289 * dependencies. It is currently being integrated into
290 * [CRuby](https://github.com/ruby/ruby),
291 * [JRuby](https://github.com/jruby/jruby),
292 * [TruffleRuby](https://github.com/oracle/truffleruby),
293 * [Sorbet](https://github.com/sorbet/sorbet), and
294 * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
296 * @section getting-started Getting started
298 * If you're vendoring this project and compiling it statically then as long as
299 * you have a C99 compiler you will be fine. If you're linking against it as
300 * shared library, then you should compile with `-fvisibility=hidden` and
301 * `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface
304 * @section parsing Parsing
306 * In order to parse Ruby code, the structures and functions that you're going
307 * to want to use and be aware of are:
309 * * `pm_parser_t` - the main parser structure
310 * * `pm_parser_init` - initialize a parser
311 * * `pm_parse` - parse and return the root node
312 * * `pm_node_destroy` - deallocate the root node returned by `pm_parse`
313 * * `pm_parser_free` - free the internal memory of the parser
315 * Putting all of this together would look something like:
318 * void parse(const uint8_t *source, size_t length) {
319 * pm_parser_t parser;
320 * pm_parser_init(&parser, source, length, NULL);
322 * pm_node_t *root = pm_parse(&parser);
323 * printf("PARSED!\n");
325 * pm_node_destroy(&parser, root);
326 * pm_parser_free(&parser);
330 * All of the nodes "inherit" from `pm_node_t` by embedding those structures as
331 * their first member. This means you can downcast and upcast any node in the
332 * tree to a `pm_node_t`.
334 * @section serializing Serializing
336 * Prism provides the ability to serialize the AST and its related metadata into
337 * a binary format. This format is designed to be portable to different
338 * languages and runtimes so that you only need to make one FFI call in order to
339 * parse Ruby code. The structures and functions that you're going to want to
340 * use and be aware of are:
342 * * `pm_buffer_t` - a small buffer object that will hold the serialized AST
343 * * `pm_buffer_free` - free the memory associated with the buffer
344 * * `pm_serialize` - serialize the AST into a buffer
345 * * `pm_serialize_parse` - parse and serialize the AST into a buffer
347 * Putting all of this together would look something like:
350 * void serialize(const uint8_t *source, size_t length) {
351 * pm_buffer_t buffer = { 0 };
353 * pm_serialize_parse(&buffer, source, length, NULL);
354 * printf("SERIALIZED!\n");
356 * pm_buffer_free(&buffer);
360 * @section inspecting Inspecting
362 * Prism provides the ability to inspect the AST by pretty-printing nodes. You
363 * can do this with the `pm_prettyprint` function, which you would use like:
366 * void prettyprint(const uint8_t *source, size_t length) {
367 * pm_parser_t parser;
368 * pm_parser_init(&parser, source, length, NULL);
370 * pm_node_t *root = pm_parse(&parser);
371 * pm_buffer_t buffer = { 0 };
373 * pm_prettyprint(&buffer, &parser, root);
374 * printf("%*.s\n", (int) buffer.length, buffer.value);
376 * pm_buffer_free(&buffer);
377 * pm_node_destroy(&parser, root);
378 * pm_parser_free(&parser);