diff options
author | Takashi Kokubun <[email protected]> | 2025-02-14 17:49:53 -0800 |
---|---|---|
committer | Takashi Kokubun <[email protected]> | 2025-04-18 21:52:58 +0900 |
commit | 562c35a560e2f99b9ab6b4ff352e4cf7a73f050e (patch) | |
tree | 1700951076720a166a4e136fa610cffa1e9ba8b8 | |
parent | 53bee25068d11877b034af53797cf6e5d3d8e2be (diff) |
Resurrect asm comment support
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/13131
-rw-r--r-- | zjit.c | 12 | ||||
-rw-r--r-- | zjit/src/asm/mod.rs | 32 | ||||
-rw-r--r-- | zjit/src/backend/arm64/mod.rs | 5 | ||||
-rw-r--r-- | zjit/src/backend/mod.rs | 6 | ||||
-rw-r--r-- | zjit/src/backend/x86_64/mod.rs | 5 | ||||
-rw-r--r-- | zjit/src/codegen.rs | 19 | ||||
-rw-r--r-- | zjit/src/cruby.rs | 38 | ||||
-rw-r--r-- | zjit/src/cruby_bindings.inc.rs | 2 | ||||
-rw-r--r-- | zjit/src/disasm.rs | 15 | ||||
-rw-r--r-- | zjit/src/lib.rs | 3 | ||||
-rw-r--r-- | zjit/src/state.rs | 2 |
11 files changed, 110 insertions, 29 deletions
@@ -156,6 +156,18 @@ rb_zjit_reserve_addr_space(uint32_t mem_size) #endif } +unsigned long +rb_RSTRING_LEN(VALUE str) +{ + return RSTRING_LEN(str); +} + +char * +rb_RSTRING_PTR(VALUE str) +{ + return RSTRING_PTR(str); +} + void rb_zjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec, bool jit_exception) { diff --git a/zjit/src/asm/mod.rs b/zjit/src/asm/mod.rs index 20ebeb0d95..ec2555fc2f 100644 --- a/zjit/src/asm/mod.rs +++ b/zjit/src/asm/mod.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeMap; //use std::fmt; use std::rc::Rc; use std::cell::RefCell; @@ -34,6 +35,12 @@ pub struct CodeBlock { // Current writing position write_pos: usize, + // A switch for keeping comments. They take up memory. + keep_comments: bool, + + // Comments for assembly instructions, if that feature is enabled + asm_comments: BTreeMap<usize, Vec<String>>, + // Set if the CodeBlock is unable to output some instructions, // for example, when there is not enough space or when a jump // target is too far away. @@ -42,14 +49,37 @@ pub struct CodeBlock { impl CodeBlock { /// Make a new CodeBlock - pub fn new(mem_block: Rc<RefCell<VirtualMem>>) -> Self { + pub fn new(mem_block: Rc<RefCell<VirtualMem>>, keep_comments: bool) -> Self { Self { mem_block, write_pos: 0, + keep_comments, + asm_comments: BTreeMap::new(), dropped_bytes: false, } } + /// Add an assembly comment if the feature is on. + pub fn add_comment(&mut self, comment: &str) { + if !self.keep_comments { + return; + } + + let cur_ptr = self.get_write_ptr().raw_addr(self); + + // If there's no current list of comments for this line number, add one. + let this_line_comments = self.asm_comments.entry(cur_ptr).or_default(); + + // Unless this comment is the same as the last one at this same line, add it. + if this_line_comments.last().map(String::as_str) != Some(comment) { + this_line_comments.push(comment.to_string()); + } + } + + pub fn comments_at(&self, pos: usize) -> Option<&Vec<String>> { + self.asm_comments.get(&pos) + } + pub fn get_write_pos(&self) -> usize { self.write_pos } diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 2fd6818ab4..2757e8de32 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -935,9 +935,8 @@ impl Assembler let mut insn_gc_offsets: Vec<u32> = Vec::new(); match insn { - Insn::Comment(_text) => { - //cb.add_comment(text); - unimplemented!("comments are not supported yet"); + Insn::Comment(text) => { + cb.add_comment(text); }, Insn::Label(_target) => { //cb.write_label(target.unwrap_label_idx()); diff --git a/zjit/src/backend/mod.rs b/zjit/src/backend/mod.rs index c5d8bcc5e2..20756a09d8 100644 --- a/zjit/src/backend/mod.rs +++ b/zjit/src/backend/mod.rs @@ -1657,7 +1657,7 @@ impl Assembler #[cfg(feature = "disasm")] if get_option!(dump_disasm) { let end_addr = cb.get_write_ptr(); - let disasm = crate::disasm::disasm_addr_range(start_addr.raw_ptr(cb) as usize, end_addr.raw_ptr(cb) as usize); + let disasm = crate::disasm::disasm_addr_range(cb, start_addr.raw_ptr(cb) as usize, end_addr.raw_ptr(cb) as usize); println!("{}", disasm); } ret @@ -2153,16 +2153,14 @@ impl Assembler { /// Macro to use format! for Insn::Comment, which skips a format! call /// when not dumping disassembly. -/* macro_rules! asm_comment { ($asm:expr, $($fmt:tt)*) => { - if $crate::options::get_option_ref!(dump_disasm).is_some() { + if $crate::options::get_option!(dump_disasm) { $asm.push_insn(Insn::Comment(format!($($fmt)*))); } }; } pub(crate) use asm_comment; -*/ #[cfg(test)] mod tests { diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index 7beb5166a8..4c3aeacd2f 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -477,9 +477,8 @@ impl Assembler let mut insn_gc_offsets: Vec<u32> = Vec::new(); match insn { - Insn::Comment(_text) => { - unimplemented!("comments are not supported yet"); - //cb.add_comment(text); + Insn::Comment(text) => { + cb.add_comment(text); }, // Write the label at the current position diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 9b40536187..d87d6e4126 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -10,10 +10,10 @@ use crate::{ use crate::get_option; /// Compile SSA IR into machine code -pub fn gen_function(cb: &mut CodeBlock, function: &Function) -> Option<CodePtr> { +pub fn gen_function(cb: &mut CodeBlock, function: &Function, iseq: IseqPtr) -> Option<CodePtr> { // Set up special registers let mut asm = Assembler::new(); - gen_entry_prologue(&mut asm); + gen_entry_prologue(&mut asm, iseq); // Compile each instruction in the IR for insn in function.insns.iter() { @@ -28,22 +28,15 @@ pub fn gen_function(cb: &mut CodeBlock, function: &Function) -> Option<CodePtr> } // Generate code if everything can be compiled - let start_ptr = cb.get_write_ptr(); - asm.compile_with_regs(cb, Assembler::get_alloc_regs()); // TODO: resurrect cache busting for arm64 + let start_ptr = asm.compile(cb).map(|(start_ptr, _)| start_ptr); cb.mark_all_executable(); - #[cfg(feature = "disasm")] - if get_option!(dump_disasm) { - let end_ptr = cb.get_write_ptr(); - let disasm = crate::disasm::disasm_addr_range(start_ptr.raw_ptr(cb) as usize, end_ptr.raw_ptr(cb) as usize); - println!("{}", disasm); - } - - Some(start_ptr) + start_ptr } /// Compile an interpreter entry block to be inserted into an ISEQ -fn gen_entry_prologue(asm: &mut Assembler) { +fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) { + asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0)); asm.frame_setup(); // Save the registers we'll use for CFP, EP, SP diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 5e87b3d3a1..7707a143e3 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -630,6 +630,44 @@ pub fn cstr_to_rust_string(c_char_ptr: *const c_char) -> Option<String> { } } +// Location is the file defining the method, colon, method name. +// Filenames are sometimes internal strings supplied to eval, +// so be careful with them. +pub fn iseq_get_location(iseq: IseqPtr, pos: u16) -> String { + let iseq_label = unsafe { rb_iseq_label(iseq) }; + let iseq_path = unsafe { rb_iseq_path(iseq) }; + let iseq_lineno = unsafe { rb_iseq_line_no(iseq, pos as usize) }; + + let mut s = if iseq_label == Qnil { + "None".to_string() + } else { + ruby_str_to_rust(iseq_label) + }; + s.push_str("@"); + if iseq_path == Qnil { + s.push_str("None"); + } else { + s.push_str(&ruby_str_to_rust(iseq_path)); + } + s.push_str(":"); + s.push_str(&iseq_lineno.to_string()); + s +} + + +// Convert a CRuby UTF-8-encoded RSTRING into a Rust string. +// This should work fine on ASCII strings and anything else +// that is considered legal UTF-8, including embedded nulls. +fn ruby_str_to_rust(v: VALUE) -> String { + let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8; + let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap(); + let str_slice: &[u8] = unsafe { std::slice::from_raw_parts(str_ptr, str_len) }; + match String::from_utf8(str_slice.to_vec()) { + Ok(utf8) => utf8, + Err(_) => String::new(), + } +} + /// A location in Rust code for integrating with debugging facilities defined in C. /// Use the [src_loc!] macro to crate an instance. pub struct SourceLocation { diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 9c05c0eb54..ad16f1711b 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -908,6 +908,8 @@ extern "C" { pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void); pub fn rb_zjit_get_page_size() -> u32; pub fn rb_zjit_reserve_addr_space(mem_size: u32) -> *mut u8; + pub fn rb_RSTRING_LEN(str_: VALUE) -> ::std::os::raw::c_ulong; + pub fn rb_RSTRING_PTR(str_: VALUE) -> *mut ::std::os::raw::c_char; pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int; pub fn rb_iseq_pc_at_idx(iseq: *const rb_iseq_t, insn_idx: u32) -> *mut VALUE; diff --git a/zjit/src/disasm.rs b/zjit/src/disasm.rs index 88715f1968..5c7a7be704 100644 --- a/zjit/src/disasm.rs +++ b/zjit/src/disasm.rs @@ -1,5 +1,9 @@ -#[cfg(feature = "disasm")] -pub fn disasm_addr_range(start_addr: usize, end_addr: usize) -> String { +use crate::asm::CodeBlock; + +pub const BOLD_BEGIN: &str = "\x1b[1m"; +pub const BOLD_END: &str = "\x1b[22m"; + +pub fn disasm_addr_range(cb: &CodeBlock, start_addr: usize, end_addr: usize) -> String { use std::fmt::Write; let mut out = String::from(""); @@ -34,7 +38,12 @@ pub fn disasm_addr_range(start_addr: usize, end_addr: usize) -> String { // For each instruction in this block for insn in insns.as_ref() { - // TODO: support comments + // Comments for this block + if let Some(comment_list) = cb.comments_at(insn.address() as usize) { + for comment in comment_list { + writeln!(&mut out, " {BOLD_BEGIN}# {comment}{BOLD_END}").unwrap(); + } + } writeln!(&mut out, " {insn}").unwrap(); } diff --git a/zjit/src/lib.rs b/zjit/src/lib.rs index 6782cc584d..6e2ad693c7 100644 --- a/zjit/src/lib.rs +++ b/zjit/src/lib.rs @@ -10,6 +10,7 @@ mod cast; mod virtualmem; mod asm; mod backend; +#[cfg(feature = "disasm")] mod disasm; mod options; @@ -93,7 +94,7 @@ pub extern "C" fn rb_zjit_iseq_gen_entry_point(iseq: IseqPtr, _ec: EcPtr) -> *co // Compile SSA IR into machine code let cb = ZJITState::get_code_block(); - match gen_function(cb, &ssa) { + match gen_function(cb, &ssa, iseq) { Some(start_ptr) => start_ptr.raw_ptr(cb), // Compilation failed, continue executing in the interpreter only diff --git a/zjit/src/state.rs b/zjit/src/state.rs index 538f3c6989..e5606ea36f 100644 --- a/zjit/src/state.rs +++ b/zjit/src/state.rs @@ -50,7 +50,7 @@ impl ZJITState { ); let mem_block = Rc::new(RefCell::new(mem_block)); - CodeBlock::new(mem_block.clone()) + CodeBlock::new(mem_block.clone(), options.dump_disasm) }; #[cfg(test)] let cb = CodeBlock::new_dummy(); |