diff options
author | Kevin Newton <[email protected]> | 2024-02-23 14:43:20 -0500 |
---|---|---|
committer | git <[email protected]> | 2024-02-23 20:02:19 +0000 |
commit | ec6532b458b34e907a2474bab70642414bbb0d05 (patch) | |
tree | 38639f44e86397f4202ae5ebe74a0989e955c652 | |
parent | ce8531fed4c7295aee94d24124914441db578136 (diff) |
[ruby/prism] Add some encoding debugging to make testing easier
https://github.com/ruby/prism/commit/0c042561c6
-rw-r--r-- | lib/prism/debug.rb | 43 | ||||
-rw-r--r-- | prism/extension.c | 85 |
2 files changed, 128 insertions, 0 deletions
diff --git a/lib/prism/debug.rb b/lib/prism/debug.rb index c888436e4d..553205a04b 100644 --- a/lib/prism/debug.rb +++ b/lib/prism/debug.rb @@ -202,5 +202,48 @@ module Prism def self.newlines(source) Prism.parse(source).source.offsets end + + # A wrapping around prism's internal encoding data structures. This is used + # for reflection and debugging purposes. + class Encoding + # The name of the encoding, that can be passed to Encoding.find. + attr_reader :name + + # Initialize a new encoding with the given name and whether or not it is + # a multibyte encoding. + def initialize(name, multibyte) + @name = name + @multibyte = multibyte + end + + # Whether or not the encoding is a multibyte encoding. + def multibyte? + @multibyte + end + + # Returns the number of bytes of the first character in the source string, + # if it is valid for the encoding. Otherwise, returns 0. + def width(source) + Encoding._width(name, source) + end + + # Returns true if the first character in the source string is a valid + # alphanumeric character for the encoding. + def alnum?(source) + Encoding._alnum?(name, source) + end + + # Returns true if the first character in the source string is a valid + # alphabetic character for the encoding. + def alpha?(source) + Encoding._alpha?(name, source) + end + + # Returns true if the first character in the source string is a valid + # uppercase character for the encoding. + def upper?(source) + Encoding._upper?(name, source) + end + end end end diff --git a/prism/extension.c b/prism/extension.c index fd2281fd79..3eccb20e5a 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -21,6 +21,8 @@ VALUE rb_cPrismParseError; VALUE rb_cPrismParseWarning; VALUE rb_cPrismParseResult; +VALUE rb_cPrismDebugEncoding; + ID rb_option_id_filepath; ID rb_option_id_encoding; ID rb_option_id_line; @@ -1102,6 +1104,80 @@ format_errors(VALUE self, VALUE source, VALUE colorize) { return result; } +/** + * call-seq: Debug::Encoding.all -> Array[Debug::Encoding] + * + * Return an array of all of the encodings that prism knows about. + */ +static VALUE +encoding_all(VALUE self) { + VALUE encodings = rb_ary_new(); + + for (size_t index = 0; index < PM_ENCODING_MAXIMUM; index++) { + const pm_encoding_t *encoding = &pm_encodings[index]; + + VALUE encoding_argv[] = { rb_str_new_cstr(encoding->name), encoding->multibyte ? Qtrue : Qfalse }; + rb_ary_push(encodings, rb_class_new_instance(2, encoding_argv, rb_cPrismDebugEncoding)); + } + + return encodings; +} + +static const pm_encoding_t * +encoding_find(VALUE name) { + const uint8_t *source = (const uint8_t *) RSTRING_PTR(name); + size_t length = RSTRING_LEN(name); + + const pm_encoding_t *encoding = pm_encoding_find(source, source + length); + if (encoding == NULL) { rb_raise(rb_eArgError, "Unknown encoding: %s", source); } + + return encoding; +} + +/** + * call-seq: Debug::Encoding.width(source) -> Integer + * + * Returns the width of the first character in the given string if it is valid + * in the encoding. If it is not, this function returns 0. + */ +static VALUE +encoding_char_width(VALUE self, VALUE name, VALUE value) { + return ULONG2NUM(encoding_find(name)->char_width((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value))); +} + +/** + * call-seq: Debug::Encoding.alnum?(source) -> true | false + * + * Returns true if the first character in the given string is an alphanumeric + * character in the encoding. + */ +static VALUE +encoding_alnum_char(VALUE self, VALUE name, VALUE value) { + return encoding_find(name)->alnum_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse; +} + +/** + * call-seq: Debug::Encoding.alpha?(source) -> true | false + * + * Returns true if the first character in the given string is an alphabetic + * character in the encoding. + */ +static VALUE +encoding_alpha_char(VALUE self, VALUE name, VALUE value) { + return encoding_find(name)->alpha_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse; +} + +/** + * call-seq: Debug::Encoding.upper?(source) -> true | false + * + * Returns true if the first character in the given string is an uppercase + * character in the encoding. + */ +static VALUE +encoding_isupper_char(VALUE self, VALUE name, VALUE value) { + return encoding_find(name)->isupper_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) ? Qtrue : Qfalse; +} + /******************************************************************************/ /* Initialization of the extension */ /******************************************************************************/ @@ -1182,6 +1258,15 @@ Init_prism(void) { rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1); rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2); + // Next, define the functions that are exposed through the private + // Debug::Encoding class. + rb_cPrismDebugEncoding = rb_define_class_under(rb_cPrismDebug, "Encoding", rb_cObject); + rb_define_singleton_method(rb_cPrismDebugEncoding, "all", encoding_all, 0); + rb_define_singleton_method(rb_cPrismDebugEncoding, "_width", encoding_char_width, 2); + rb_define_singleton_method(rb_cPrismDebugEncoding, "_alnum?", encoding_alnum_char, 2); + rb_define_singleton_method(rb_cPrismDebugEncoding, "_alpha?", encoding_alpha_char, 2); + rb_define_singleton_method(rb_cPrismDebugEncoding, "_upper?", encoding_isupper_char, 2); + // Next, initialize the other APIs. Init_prism_api_node(); Init_prism_pack(); |