summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakashi Kokubun <[email protected]>2025-02-14 17:49:53 -0800
committerTakashi Kokubun <[email protected]>2025-04-18 21:52:58 +0900
commit562c35a560e2f99b9ab6b4ff352e4cf7a73f050e (patch)
tree1700951076720a166a4e136fa610cffa1e9ba8b8
parent53bee25068d11877b034af53797cf6e5d3d8e2be (diff)
Resurrect asm comment support
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/13131
-rw-r--r--zjit.c12
-rw-r--r--zjit/src/asm/mod.rs32
-rw-r--r--zjit/src/backend/arm64/mod.rs5
-rw-r--r--zjit/src/backend/mod.rs6
-rw-r--r--zjit/src/backend/x86_64/mod.rs5
-rw-r--r--zjit/src/codegen.rs19
-rw-r--r--zjit/src/cruby.rs38
-rw-r--r--zjit/src/cruby_bindings.inc.rs2
-rw-r--r--zjit/src/disasm.rs15
-rw-r--r--zjit/src/lib.rs3
-rw-r--r--zjit/src/state.rs2
11 files changed, 110 insertions, 29 deletions
diff --git a/zjit.c b/zjit.c
index 925924b328..8cb4776351 100644
--- a/zjit.c
+++ b/zjit.c
@@ -156,6 +156,18 @@ rb_zjit_reserve_addr_space(uint32_t mem_size)
#endif
}
+unsigned long
+rb_RSTRING_LEN(VALUE str)
+{
+ return RSTRING_LEN(str);
+}
+
+char *
+rb_RSTRING_PTR(VALUE str)
+{
+ return RSTRING_PTR(str);
+}
+
void
rb_zjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec, bool jit_exception)
{
diff --git a/zjit/src/asm/mod.rs b/zjit/src/asm/mod.rs
index 20ebeb0d95..ec2555fc2f 100644
--- a/zjit/src/asm/mod.rs
+++ b/zjit/src/asm/mod.rs
@@ -1,3 +1,4 @@
+use std::collections::BTreeMap;
//use std::fmt;
use std::rc::Rc;
use std::cell::RefCell;
@@ -34,6 +35,12 @@ pub struct CodeBlock {
// Current writing position
write_pos: usize,
+ // A switch for keeping comments. They take up memory.
+ keep_comments: bool,
+
+ // Comments for assembly instructions, if that feature is enabled
+ asm_comments: BTreeMap<usize, Vec<String>>,
+
// Set if the CodeBlock is unable to output some instructions,
// for example, when there is not enough space or when a jump
// target is too far away.
@@ -42,14 +49,37 @@ pub struct CodeBlock {
impl CodeBlock {
/// Make a new CodeBlock
- pub fn new(mem_block: Rc<RefCell<VirtualMem>>) -> Self {
+ pub fn new(mem_block: Rc<RefCell<VirtualMem>>, keep_comments: bool) -> Self {
Self {
mem_block,
write_pos: 0,
+ keep_comments,
+ asm_comments: BTreeMap::new(),
dropped_bytes: false,
}
}
+ /// Add an assembly comment if the feature is on.
+ pub fn add_comment(&mut self, comment: &str) {
+ if !self.keep_comments {
+ return;
+ }
+
+ let cur_ptr = self.get_write_ptr().raw_addr(self);
+
+ // If there's no current list of comments for this line number, add one.
+ let this_line_comments = self.asm_comments.entry(cur_ptr).or_default();
+
+ // Unless this comment is the same as the last one at this same line, add it.
+ if this_line_comments.last().map(String::as_str) != Some(comment) {
+ this_line_comments.push(comment.to_string());
+ }
+ }
+
+ pub fn comments_at(&self, pos: usize) -> Option<&Vec<String>> {
+ self.asm_comments.get(&pos)
+ }
+
pub fn get_write_pos(&self) -> usize {
self.write_pos
}
diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs
index 2fd6818ab4..2757e8de32 100644
--- a/zjit/src/backend/arm64/mod.rs
+++ b/zjit/src/backend/arm64/mod.rs
@@ -935,9 +935,8 @@ impl Assembler
let mut insn_gc_offsets: Vec<u32> = Vec::new();
match insn {
- Insn::Comment(_text) => {
- //cb.add_comment(text);
- unimplemented!("comments are not supported yet");
+ Insn::Comment(text) => {
+ cb.add_comment(text);
},
Insn::Label(_target) => {
//cb.write_label(target.unwrap_label_idx());
diff --git a/zjit/src/backend/mod.rs b/zjit/src/backend/mod.rs
index c5d8bcc5e2..20756a09d8 100644
--- a/zjit/src/backend/mod.rs
+++ b/zjit/src/backend/mod.rs
@@ -1657,7 +1657,7 @@ impl Assembler
#[cfg(feature = "disasm")]
if get_option!(dump_disasm) {
let end_addr = cb.get_write_ptr();
- let disasm = crate::disasm::disasm_addr_range(start_addr.raw_ptr(cb) as usize, end_addr.raw_ptr(cb) as usize);
+ let disasm = crate::disasm::disasm_addr_range(cb, start_addr.raw_ptr(cb) as usize, end_addr.raw_ptr(cb) as usize);
println!("{}", disasm);
}
ret
@@ -2153,16 +2153,14 @@ impl Assembler {
/// Macro to use format! for Insn::Comment, which skips a format! call
/// when not dumping disassembly.
-/*
macro_rules! asm_comment {
($asm:expr, $($fmt:tt)*) => {
- if $crate::options::get_option_ref!(dump_disasm).is_some() {
+ if $crate::options::get_option!(dump_disasm) {
$asm.push_insn(Insn::Comment(format!($($fmt)*)));
}
};
}
pub(crate) use asm_comment;
-*/
#[cfg(test)]
mod tests {
diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs
index 7beb5166a8..4c3aeacd2f 100644
--- a/zjit/src/backend/x86_64/mod.rs
+++ b/zjit/src/backend/x86_64/mod.rs
@@ -477,9 +477,8 @@ impl Assembler
let mut insn_gc_offsets: Vec<u32> = Vec::new();
match insn {
- Insn::Comment(_text) => {
- unimplemented!("comments are not supported yet");
- //cb.add_comment(text);
+ Insn::Comment(text) => {
+ cb.add_comment(text);
},
// Write the label at the current position
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index 9b40536187..d87d6e4126 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -10,10 +10,10 @@ use crate::{
use crate::get_option;
/// Compile SSA IR into machine code
-pub fn gen_function(cb: &mut CodeBlock, function: &Function) -> Option<CodePtr> {
+pub fn gen_function(cb: &mut CodeBlock, function: &Function, iseq: IseqPtr) -> Option<CodePtr> {
// Set up special registers
let mut asm = Assembler::new();
- gen_entry_prologue(&mut asm);
+ gen_entry_prologue(&mut asm, iseq);
// Compile each instruction in the IR
for insn in function.insns.iter() {
@@ -28,22 +28,15 @@ pub fn gen_function(cb: &mut CodeBlock, function: &Function) -> Option<CodePtr>
}
// Generate code if everything can be compiled
- let start_ptr = cb.get_write_ptr();
- asm.compile_with_regs(cb, Assembler::get_alloc_regs()); // TODO: resurrect cache busting for arm64
+ let start_ptr = asm.compile(cb).map(|(start_ptr, _)| start_ptr);
cb.mark_all_executable();
- #[cfg(feature = "disasm")]
- if get_option!(dump_disasm) {
- let end_ptr = cb.get_write_ptr();
- let disasm = crate::disasm::disasm_addr_range(start_ptr.raw_ptr(cb) as usize, end_ptr.raw_ptr(cb) as usize);
- println!("{}", disasm);
- }
-
- Some(start_ptr)
+ start_ptr
}
/// Compile an interpreter entry block to be inserted into an ISEQ
-fn gen_entry_prologue(asm: &mut Assembler) {
+fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) {
+ asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0));
asm.frame_setup();
// Save the registers we'll use for CFP, EP, SP
diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs
index 5e87b3d3a1..7707a143e3 100644
--- a/zjit/src/cruby.rs
+++ b/zjit/src/cruby.rs
@@ -630,6 +630,44 @@ pub fn cstr_to_rust_string(c_char_ptr: *const c_char) -> Option<String> {
}
}
+// Location is the file defining the method, colon, method name.
+// Filenames are sometimes internal strings supplied to eval,
+// so be careful with them.
+pub fn iseq_get_location(iseq: IseqPtr, pos: u16) -> String {
+ let iseq_label = unsafe { rb_iseq_label(iseq) };
+ let iseq_path = unsafe { rb_iseq_path(iseq) };
+ let iseq_lineno = unsafe { rb_iseq_line_no(iseq, pos as usize) };
+
+ let mut s = if iseq_label == Qnil {
+ "None".to_string()
+ } else {
+ ruby_str_to_rust(iseq_label)
+ };
+ s.push_str("@");
+ if iseq_path == Qnil {
+ s.push_str("None");
+ } else {
+ s.push_str(&ruby_str_to_rust(iseq_path));
+ }
+ s.push_str(":");
+ s.push_str(&iseq_lineno.to_string());
+ s
+}
+
+
+// Convert a CRuby UTF-8-encoded RSTRING into a Rust string.
+// This should work fine on ASCII strings and anything else
+// that is considered legal UTF-8, including embedded nulls.
+fn ruby_str_to_rust(v: VALUE) -> String {
+ let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
+ let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
+ let str_slice: &[u8] = unsafe { std::slice::from_raw_parts(str_ptr, str_len) };
+ match String::from_utf8(str_slice.to_vec()) {
+ Ok(utf8) => utf8,
+ Err(_) => String::new(),
+ }
+}
+
/// A location in Rust code for integrating with debugging facilities defined in C.
/// Use the [src_loc!] macro to crate an instance.
pub struct SourceLocation {
diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs
index 9c05c0eb54..ad16f1711b 100644
--- a/zjit/src/cruby_bindings.inc.rs
+++ b/zjit/src/cruby_bindings.inc.rs
@@ -908,6 +908,8 @@ extern "C" {
pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void);
pub fn rb_zjit_get_page_size() -> u32;
pub fn rb_zjit_reserve_addr_space(mem_size: u32) -> *mut u8;
+ pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong;
+ pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char;
pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int;
pub fn rb_iseq_pc_at_idx(iseq: *const rb_iseq_t, insn_idx: u32) -> *mut VALUE;
diff --git a/zjit/src/disasm.rs b/zjit/src/disasm.rs
index 88715f1968..5c7a7be704 100644
--- a/zjit/src/disasm.rs
+++ b/zjit/src/disasm.rs
@@ -1,5 +1,9 @@
-#[cfg(feature = "disasm")]
-pub fn disasm_addr_range(start_addr: usize, end_addr: usize) -> String {
+use crate::asm::CodeBlock;
+
+pub const BOLD_BEGIN: &str = "\x1b[1m";
+pub const BOLD_END: &str = "\x1b[22m";
+
+pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) -> String {
use std::fmt::Write;
let mut out = String::from("");
@@ -34,7 +38,12 @@ pub fn disasm_addr_range(start_addr: usize, end_addr: usize) -> String {
// For each instruction in this block
for insn in insns.as_ref() {
- // TODO: support comments
+ // Comments for this block
+ if let Some(comment_list) = cb.comments_at(insn.address() as usize) {
+ for comment in comment_list {
+ writeln!(&mut out, " {BOLD_BEGIN}# {comment}{BOLD_END}").unwrap();
+ }
+ }
writeln!(&mut out, " {insn}").unwrap();
}
diff --git a/zjit/src/lib.rs b/zjit/src/lib.rs
index 6782cc584d..6e2ad693c7 100644
--- a/zjit/src/lib.rs
+++ b/zjit/src/lib.rs
@@ -10,6 +10,7 @@ mod cast;
mod virtualmem;
mod asm;
mod backend;
+#[cfg(feature = "disasm")]
mod disasm;
mod options;
@@ -93,7 +94,7 @@ pub extern "C" fn rb_zjit_iseq_gen_entry_point(iseq: IseqPtr, _ec: EcPtr) -> *co
// Compile SSA IR into machine code
let cb = ZJITState::get_code_block();
- match gen_function(cb, &ssa) {
+ match gen_function(cb, &ssa, iseq) {
Some(start_ptr) => start_ptr.raw_ptr(cb),
// Compilation failed, continue executing in the interpreter only
diff --git a/zjit/src/state.rs b/zjit/src/state.rs
index 538f3c6989..e5606ea36f 100644
--- a/zjit/src/state.rs
+++ b/zjit/src/state.rs
@@ -50,7 +50,7 @@ impl ZJITState {
);
let mem_block = Rc::new(RefCell::new(mem_block));
- CodeBlock::new(mem_block.clone())
+ CodeBlock::new(mem_block.clone(), options.dump_disasm)
};
#[cfg(test)]
let cb = CodeBlock::new_dummy();