summaryrefslogtreecommitdiff
path: root/yjit/src
diff options
context:
space:
mode:
Diffstat (limited to 'yjit/src')
-rw-r--r--yjit/src/codegen.rs15
-rw-r--r--yjit/src/core.rs68
-rw-r--r--yjit/src/cruby_bindings.inc.rs1
-rw-r--r--yjit/src/stats.rs23
4 files changed, 96 insertions, 11 deletions
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index 5d26e58fa7..8847affd2b 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -1810,7 +1810,7 @@ fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd {
// Gets the EP of the ISeq of the containing method, or "local level".
// Equivalent of GET_LEP() macro.
-fn gen_get_lep(jit: &mut JITState, asm: &mut Assembler) -> Opnd {
+fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd {
// Equivalent of get_lvar_level() in compile.c
fn get_lvar_level(iseq: IseqPtr) -> u32 {
if iseq == unsafe { rb_get_iseq_body_local_iseq(iseq) } {
@@ -6910,6 +6910,12 @@ fn gen_send_iseq(
// Create a context for the callee
let mut callee_ctx = Context::default();
+ // If the callee has :inline_block annotation and the callsite has a block ISEQ,
+ // duplicate a callee block for each block ISEQ to make its `yield` monomorphic.
+ if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) {
+ callee_ctx.set_inline_block(iseq);
+ }
+
// Set the argument types in the callee's context
for arg_idx in 0..argc {
let stack_offs: u8 = (argc - arg_idx - 1).try_into().unwrap();
@@ -7904,6 +7910,13 @@ fn gen_invokeblock_specialized(
Counter::guard_invokeblock_tag_changed,
);
+ // If the current ISEQ is annotated to be inlined but it's not being inlined here,
+ // generate a dynamic dispatch to avoid making this yield megamorphic.
+ if unsafe { rb_yjit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() {
+ gen_counter_incr(asm, Counter::invokeblock_iseq_not_inlined);
+ return None;
+ }
+
let comptime_captured = unsafe { ((comptime_handler.0 & !0x3) as *const rb_captured_block).as_ref().unwrap() };
let comptime_iseq = unsafe { *comptime_captured.code.iseq.as_ref() };
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index 27c3541b59..9929ecb9a6 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -480,6 +480,13 @@ pub struct Context {
// Stack slot type/local_idx we track
// 8 temp types * 4 bits, total 32 bits
temp_payload: u32,
+
+ /// A pointer to a block ISEQ supplied by the caller. 0 if not inlined.
+ /// Not using IseqPtr to satisfy Default trait, and not using Option for #[repr(packed)]
+ /// TODO: This could be u16 if we have a global or per-ISEQ HashMap to convert IseqPtr
+ /// to serial indexes. We're thinking of overhauling Context structure in Ruby 3.4 which
+ /// could allow this to consume no bytes, so we're leaving this as is.
+ inline_block: u64,
}
/// Tuple of (iseq, idx) used to identify basic blocks
@@ -1400,14 +1407,19 @@ pub fn take_version_list(blockid: BlockId) -> VersionList {
}
/// Count the number of block versions matching a given blockid
-fn get_num_versions(blockid: BlockId) -> usize {
+/// `inlined: true` counts inlined versions, and `inlined: false` counts other versions.
+fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
let insn_idx = blockid.idx.as_usize();
match get_iseq_payload(blockid.iseq) {
Some(payload) => {
payload
.version_map
.get(insn_idx)
- .map(|versions| versions.len())
+ .map(|versions| {
+ versions.iter().filter(|&&version|
+ unsafe { version.as_ref() }.ctx.inline() == inlined
+ ).count()
+ })
.unwrap_or(0)
}
None => 0,
@@ -1465,6 +1477,9 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
return best_version;
}
+/// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
+const MAX_INLINE_VERSIONS: usize = 1000;
+
/// Produce a generic context when the block version limit is hit for a blockid
pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
// Guard chains implement limits separately, do nothing
@@ -1472,21 +1487,39 @@ pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
return *ctx;
}
+ let next_versions = get_num_versions(blockid, ctx.inline()) + 1;
+ let max_versions = if ctx.inline() {
+ MAX_INLINE_VERSIONS
+ } else {
+ get_option!(max_versions)
+ };
+
// If this block version we're about to add will hit the version limit
- if get_num_versions(blockid) + 1 >= get_option!(max_versions) {
+ if next_versions >= max_versions {
// Produce a generic context that stores no type information,
// but still respects the stack_size and sp_offset constraints.
// This new context will then match all future requests.
let generic_ctx = ctx.get_generic_ctx();
- debug_assert_ne!(
- TypeDiff::Incompatible,
- ctx.diff(&generic_ctx),
- "should substitute a compatible context",
- );
+ if cfg!(debug_assertions) {
+ let mut ctx = ctx.clone();
+ if ctx.inline() {
+ // Suppress TypeDiff::Incompatible from ctx.diff(). We return TypeDiff::Incompatible
+ // to keep inlining blocks until we hit the limit, but it's safe to give up inlining.
+ ctx.inline_block = 0;
+ assert!(generic_ctx.inline_block == 0);
+ }
+
+ assert_ne!(
+ TypeDiff::Incompatible,
+ ctx.diff(&generic_ctx),
+ "should substitute a compatible context",
+ );
+ }
return generic_ctx;
}
+ incr_counter_to!(max_inline_versions, next_versions);
return *ctx;
}
@@ -2020,6 +2053,16 @@ impl Context {
self.local_types = 0;
}
+ /// Return true if the code is inlined by the caller
+ pub fn inline(&self) -> bool {
+ self.inline_block != 0
+ }
+
+ /// Set a block ISEQ given to the Block of this Context
+ pub fn set_inline_block(&mut self, iseq: IseqPtr) {
+ self.inline_block = iseq as u64
+ }
+
/// Compute a difference score for two context objects
pub fn diff(&self, dst: &Context) -> TypeDiff {
// Self is the source context (at the end of the predecessor)
@@ -2065,6 +2108,13 @@ impl Context {
TypeDiff::Incompatible => return TypeDiff::Incompatible,
};
+ // Check the block to inline
+ if src.inline_block != dst.inline_block {
+ // find_block_version should not find existing blocks with different
+ // inline_block so that their yield will not be megamorphic.
+ return TypeDiff::Incompatible;
+ }
+
// For each local type we track
for i in 0.. MAX_LOCAL_TYPES {
let t_src = src.get_local_type(i);
@@ -3456,7 +3506,7 @@ mod tests {
#[test]
fn context_size() {
- assert_eq!(mem::size_of::<Context>(), 15);
+ assert_eq!(mem::size_of::<Context>(), 23);
}
#[test]
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index d67653890c..e6dba2b59d 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -449,6 +449,7 @@ pub struct iseq_inline_cvar_cache_entry {
}
pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1;
pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2;
+pub const BUILTIN_ATTR_INLINE_BLOCK: rb_builtin_attr = 4;
pub type rb_builtin_attr = u32;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
index 7df01448a4..d8dd4b9389 100644
--- a/yjit/src/stats.rs
+++ b/yjit/src/stats.rs
@@ -245,7 +245,7 @@ macro_rules! make_counters {
/// The list of counters that are available without --yjit-stats.
/// They are incremented only by `incr_counter!` and don't use `gen_counter_incr`.
-pub const DEFAULT_COUNTERS: [Counter; 8] = [
+pub const DEFAULT_COUNTERS: [Counter; 9] = [
Counter::code_gc_count,
Counter::compiled_iseq_entry,
Counter::cold_iseq_entry,
@@ -254,6 +254,7 @@ pub const DEFAULT_COUNTERS: [Counter; 8] = [
Counter::compiled_block_count,
Counter::compiled_branch_count,
Counter::compile_time_ns,
+ Counter::max_inline_versions,
];
/// Macro to increase a counter by name and count
@@ -269,6 +270,24 @@ macro_rules! incr_counter_by {
}
pub(crate) use incr_counter_by;
+/// Macro to increase a counter if the given value is larger
+macro_rules! incr_counter_to {
+ // Unsafe is ok here because options are initialized
+ // once before any Ruby code executes
+ ($counter_name:ident, $count:expr) => {
+ #[allow(unused_unsafe)]
+ {
+ unsafe {
+ $crate::stats::COUNTERS.$counter_name = u64::max(
+ $crate::stats::COUNTERS.$counter_name,
+ $count as u64,
+ )
+ }
+ }
+ };
+}
+pub(crate) use incr_counter_to;
+
/// Macro to increment a counter by name
macro_rules! incr_counter {
// Unsafe is ok here because options are initialized
@@ -395,6 +414,7 @@ make_counters! {
invokeblock_iseq_arg0_args_splat,
invokeblock_iseq_arg0_not_array,
invokeblock_iseq_arg0_wrong_len,
+ invokeblock_iseq_not_inlined,
invokeblock_ifunc_args_splat,
invokeblock_ifunc_kw_splat,
invokeblock_proc,
@@ -518,6 +538,7 @@ make_counters! {
defer_empty_count,
branch_insn_count,
branch_known_count,
+ max_inline_versions,
freed_iseq_count,