summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <[email protected]>2024-02-23 14:43:20 -0500
committergit <[email protected]>2024-02-23 20:02:19 +0000
commitec6532b458b34e907a2474bab70642414bbb0d05 (patch)
tree38639f44e86397f4202ae5ebe74a0989e955c652
parentce8531fed4c7295aee94d24124914441db578136 (diff)
[ruby/prism] Add some encoding debugging to make testing easier
https://github.com/ruby/prism/commit/0c042561c6
-rw-r--r--lib/prism/debug.rb43
-rw-r--r--prism/extension.c85
2 files changed, 128 insertions, 0 deletions
diff --git a/lib/prism/debug.rb b/lib/prism/debug.rb
index c888436e4d..553205a04b 100644
--- a/lib/prism/debug.rb
+++ b/lib/prism/debug.rb
@@ -202,5 +202,48 @@ module Prism
def self.newlines(source)
Prism.parse(source).source.offsets
end
+
+ # A wrapping around prism's internal encoding data structures. This is used
+ # for reflection and debugging purposes.
+ class Encoding
+ # The name of the encoding, that can be passed to Encoding.find.
+ attr_reader :name
+
+ # Initialize a new encoding with the given name and whether or not it is
+ # a multibyte encoding.
+ def initialize(name, multibyte)
+ @name = name
+ @multibyte = multibyte
+ end
+
+ # Whether or not the encoding is a multibyte encoding.
+ def multibyte?
+ @multibyte
+ end
+
+ # Returns the number of bytes of the first character in the source string,
+ # if it is valid for the encoding. Otherwise, returns 0.
+ def width(source)
+ Encoding._width(name, source)
+ end
+
+ # Returns true if the first character in the source string is a valid
+ # alphanumeric character for the encoding.
+ def alnum?(source)
+ Encoding._alnum?(name, source)
+ end
+
+ # Returns true if the first character in the source string is a valid
+ # alphabetic character for the encoding.
+ def alpha?(source)
+ Encoding._alpha?(name, source)
+ end
+
+ # Returns true if the first character in the source string is a valid
+ # uppercase character for the encoding.
+ def upper?(source)
+ Encoding._upper?(name, source)
+ end
+ end
end
end
diff --git a/prism/extension.c b/prism/extension.c
index fd2281fd79..3eccb20e5a 100644
--- a/prism/extension.c
+++ b/prism/extension.c
@@ -21,6 +21,8 @@ VALUE rb_cPrismParseError;
VALUE rb_cPrismParseWarning;
VALUE rb_cPrismParseResult;
+VALUE rb_cPrismDebugEncoding;
+
ID rb_option_id_filepath;
ID rb_option_id_encoding;
ID rb_option_id_line;
@@ -1102,6 +1104,80 @@ format_errors(VALUE self, VALUE source, VALUE colorize) {
return result;
}
+/**
+ * call-seq: Debug::Encoding.all -> Array[Debug::Encoding]
+ *
+ * Return an array of all of the encodings that prism knows about.
+ */
+static VALUE
+encoding_all(VALUE self) {
+ VALUE encodings = rb_ary_new();
+
+ for (size_t index = 0; index < PM_ENCODING_MAXIMUM; index++) {
+ const pm_encoding_t *encoding = &pm_encodings[index];
+
+ VALUE encoding_argv[] = { rb_str_new_cstr(encoding->name), encoding->multibyte ? Qtrue : Qfalse };
+ rb_ary_push(encodings, rb_class_new_instance(2, encoding_argv, rb_cPrismDebugEncoding));
+ }
+
+ return encodings;
+}
+
+static const pm_encoding_t *
+encoding_find(VALUE name) {
+ const uint8_t *source = (const uint8_t *) RSTRING_PTR(name);
+ size_t length = RSTRING_LEN(name);
+
+ const pm_encoding_t *encoding = pm_encoding_find(source, source + length);
+ if (encoding == NULL) { rb_raise(rb_eArgError, "Unknown encoding: %s", source); }
+
+ return encoding;
+}
+
+/**
+ * call-seq: Debug::Encoding.width(source) -> Integer
+ *
+ * Returns the width of the first character in the given string if it is valid
+ * in the encoding. If it is not, this function returns 0.
+ */
+static VALUE
+encoding_char_width(VALUE self, VALUE name, VALUE value) {
+ return ULONG2NUM(encoding_find(name)->char_width((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)));
+}
+
+/**
+ * call-seq: Debug::Encoding.alnum?(source) -> true | false
+ *
+ * Returns true if the first character in the given string is an alphanumeric
+ * character in the encoding.
+ */
+static VALUE
+encoding_alnum_char(VALUE self, VALUE name, VALUE value) {
+ return encoding_find(name)->alnum_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse;
+}
+
+/**
+ * call-seq: Debug::Encoding.alpha?(source) -> true | false
+ *
+ * Returns true if the first character in the given string is an alphabetic
+ * character in the encoding.
+ */
+static VALUE
+encoding_alpha_char(VALUE self, VALUE name, VALUE value) {
+ return encoding_find(name)->alpha_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse;
+}
+
+/**
+ * call-seq: Debug::Encoding.upper?(source) -> true | false
+ *
+ * Returns true if the first character in the given string is an uppercase
+ * character in the encoding.
+ */
+static VALUE
+encoding_isupper_char(VALUE self, VALUE name, VALUE value) {
+ return encoding_find(name)->isupper_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) ? Qtrue : Qfalse;
+}
+
/******************************************************************************/
/* Initialization of the extension */
/******************************************************************************/
@@ -1182,6 +1258,15 @@ Init_prism(void) {
rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2);
+ // Next, define the functions that are exposed through the private
+ // Debug::Encoding class.
+ rb_cPrismDebugEncoding = rb_define_class_under(rb_cPrismDebug, "Encoding", rb_cObject);
+ rb_define_singleton_method(rb_cPrismDebugEncoding, "all", encoding_all, 0);
+ rb_define_singleton_method(rb_cPrismDebugEncoding, "_width", encoding_char_width, 2);
+ rb_define_singleton_method(rb_cPrismDebugEncoding, "_alnum?", encoding_alnum_char, 2);
+ rb_define_singleton_method(rb_cPrismDebugEncoding, "_alpha?", encoding_alpha_char, 2);
+ rb_define_singleton_method(rb_cPrismDebugEncoding, "_upper?", encoding_isupper_char, 2);
+
// Next, initialize the other APIs.
Init_prism_api_node();
Init_prism_pack();