summaryrefslogtreecommitdiff
path: root/yjit_codegen.c
diff options
context:
space:
mode:
Diffstat (limited to 'yjit_codegen.c')
-rw-r--r--yjit_codegen.c318
1 files changed, 274 insertions, 44 deletions
diff --git a/yjit_codegen.c b/yjit_codegen.c
index abb6e6d3dd..2737791b53 100644
--- a/yjit_codegen.c
+++ b/yjit_codegen.c
@@ -1,17 +1,20 @@
-#include <assert.h>
-#include "insns.inc"
#include "internal.h"
+#include "insns.inc"
#include "vm_core.h"
#include "vm_sync.h"
#include "vm_callinfo.h"
#include "builtin.h"
+#include "gc.h"
#include "internal/compile.h"
#include "internal/class.h"
#include "internal/object.h"
+#include "internal/sanitizers.h"
#include "internal/string.h"
#include "internal/variable.h"
#include "internal/re.h"
#include "insns_info.inc"
+#include "probes.h"
+#include "probes_helper.h"
#include "yjit.h"
#include "yjit_iface.h"
#include "yjit_core.h"
@@ -36,6 +39,25 @@ codeblock_t* ocb = NULL;
// Code for exiting back to the interpreter from the leave insn
static void *leave_exit_code;
+// Code for full logic of returning from C method and exiting to the interpreter
+static uint32_t outline_full_cfunc_return_pos;
+
+// For implementing global code invalidation
+struct codepage_patch {
+ uint32_t mainline_patch_pos;
+ uint32_t outline_target_pos;
+};
+
+typedef rb_darray(struct codepage_patch) patch_array_t;
+
+static patch_array_t global_inval_patches = NULL;
+
+// This number keeps track of the number of bytes counting from the beginning
+// of the page that should not be changed. After patching for global
+// invalidation, no one should make changes to the invalidated code region
+// anymore.
+uint32_t yjit_codepage_frozen_bytes = 0;
+
// Print the current source location for debugging purposes
RBIMPL_ATTR_MAYBE_UNUSED()
static void
@@ -156,6 +178,28 @@ jit_save_sp(jitstate_t* jit, ctx_t* ctx)
}
}
+// jit_save_pc() + jit_save_sp(). Should be used before calling a routine that
+// could:
+// - Perform GC allocation
+// - Take the VM loock through RB_VM_LOCK_ENTER()
+// - Perform Ruby method call
+static void
+jit_prepare_routine_call(jitstate_t *jit, ctx_t *ctx, x86opnd_t scratch_reg)
+{
+ jit->record_boundary_patch_point = true;
+ jit_save_pc(jit, scratch_reg);
+ jit_save_sp(jit, ctx);
+}
+
+// Record the current codeblock write position for rewriting into a jump into
+// the outline block later. Used to implement global code invalidation.
+static void
+record_global_inval_patch(const codeblock_t *cb, uint32_t outline_block_target_pos)
+{
+ struct codepage_patch patch_point = { cb->write_pos, outline_block_target_pos };
+ if (!rb_darray_append(&global_inval_patches, patch_point)) rb_bug("allocation failed");
+}
+
static bool jit_guard_known_klass(jitstate_t *jit, ctx_t* ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit);
#if RUBY_DEBUG
@@ -290,15 +334,13 @@ _counted_side_exit(uint8_t *existing_side_exit, int64_t *counter)
// Generate an exit to return to the interpreter
-static uint8_t *
-yjit_gen_exit(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
+static uint32_t
+yjit_gen_exit(VALUE *exit_pc, ctx_t *ctx, codeblock_t *cb)
{
- uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
+ const uint32_t code_pos = cb->write_pos;
ADD_COMMENT(cb, "exit to interpreter");
- VALUE *exit_pc = jit->pc;
-
// Generate the code to exit to the interpreters
// Write the adjusted SP back into the CFP
if (ctx->sp_offset != 0) {
@@ -329,7 +371,7 @@ yjit_gen_exit(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
mov(cb, RAX, imm_opnd(Qundef));
ret(cb);
- return code_ptr;
+ return code_pos;
}
// Generate a continuation for gen_leave() that exits to the interpreter at REG_CFP->pc.
@@ -363,7 +405,8 @@ yjit_gen_leave_exit(codeblock_t *cb)
static uint8_t *
yjit_side_exit(jitstate_t *jit, ctx_t *ctx)
{
- return yjit_gen_exit(jit, ctx, ocb);
+ uint32_t pos = yjit_gen_exit(jit->pc, ctx, ocb);
+ return cb_get_ptr(ocb, pos);
}
// Generate a runtime guard that ensures the PC is at the start of the iseq,
@@ -399,6 +442,64 @@ yjit_pc_guard(const rb_iseq_t *iseq)
cb_link_labels(cb);
}
+// The code we generate in gen_send_cfunc() doesn't fire the c_return TracePoint event
+// like the interpreter. When tracing for c_return is enabled, we patch the code after
+// the C method return to call into this to fire the event.
+static void
+full_cfunc_return(rb_execution_context_t *ec, VALUE return_value)
+{
+ rb_control_frame_t *cfp = ec->cfp;
+ RUBY_ASSERT_ALWAYS(cfp == GET_EC()->cfp);
+ const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(cfp);
+
+ RUBY_ASSERT_ALWAYS(RUBYVM_CFUNC_FRAME_P(cfp));
+ RUBY_ASSERT_ALWAYS(me->def->type == VM_METHOD_TYPE_CFUNC);
+
+ // CHECK_CFP_CONSISTENCY("full_cfunc_return"); TODO revive this
+
+
+ // Pop the C func's frame and fire the c_return TracePoint event
+ // Note that this is the same order as vm_call_cfunc_with_frame().
+ rb_vm_pop_frame(ec);
+ EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, cfp->self, me->def->original_id, me->called_id, me->owner, return_value);
+ // Note, this deviates from the interpreter in that users need to enable
+ // a c_return TracePoint for this DTrace hook to work. A reasonable change
+ // since the Ruby return event works this way as well.
+ RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, me->owner, me->def->original_id);
+
+ // Push return value into the caller's stack. We know that it's a frame that
+ // uses cfp->sp because we are patching a call done with gen_send_cfunc().
+ ec->cfp->sp[0] = return_value;
+ ec->cfp->sp++;
+}
+
+// Landing code for when c_return tracing is enabled. See full_cfunc_return().
+static void
+gen_full_cfunc_return(void)
+{
+ codeblock_t *cb = ocb;
+ outline_full_cfunc_return_pos = ocb->write_pos;
+
+ // This chunk of code expect REG_EC to be filled properly and
+ // RAX to contain the return value of the C method.
+
+ // Call full_cfunc_return()
+ mov(cb, C_ARG_REGS[0], REG_EC);
+ mov(cb, C_ARG_REGS[1], RAX);
+ call_ptr(cb, REG0, (void *)full_cfunc_return);
+
+ // Count the exit
+ GEN_COUNTER_INC(cb, traced_cfunc_return);
+
+ // Return to the interpreter
+ pop(cb, REG_SP);
+ pop(cb, REG_EC);
+ pop(cb, REG_CFP);
+
+ mov(cb, RAX, imm_opnd(Qundef));
+ ret(cb);
+}
+
/*
Compile an interpreter entry block to be inserted into an iseq
Returns `NULL` if compilation fails.
@@ -473,6 +574,13 @@ jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context)
blockid_t jump_block = { jit->iseq, jit_next_insn_idx(jit) };
+ // We are at the end of the current instruction. Record the boundary.
+ if (jit->record_boundary_patch_point) {
+ uint32_t exit_pos = yjit_gen_exit(jit->pc + insn_len(jit->opcode), &reset_depth, ocb);
+ record_global_inval_patch(cb, exit_pos);
+ jit->record_boundary_patch_point = false;
+ }
+
// Generate the jump instruction
gen_direct_jump(
jit->block,
@@ -536,6 +644,14 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec)
jit.pc = pc;
jit.opcode = opcode;
+ // If previous instruction requested to record the boundary
+ if (jit.record_boundary_patch_point) {
+ // Generate an exit to this instruction and record it
+ uint32_t exit_pos = yjit_gen_exit(jit.pc, ctx, ocb);
+ record_global_inval_patch(cb, exit_pos);
+ jit.record_boundary_patch_point = false;
+ }
+
// Verify our existing assumption (DEBUG)
if (jit_at_current_insn(&jit)) {
verify_ctx(&jit, ctx);
@@ -546,7 +662,7 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec)
if (!gen_fn) {
// If we reach an unknown instruction,
// exit to the interpreter and stop compiling
- yjit_gen_exit(&jit, ctx, cb);
+ yjit_gen_exit(jit.pc, ctx, cb);
break;
}
@@ -576,7 +692,7 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec)
// TODO: if the codegen funcion makes changes to ctx and then return YJIT_CANT_COMPILE,
// the exit this generates would be wrong. We could save a copy of the entry context
// and assert that ctx is the same here.
- yjit_gen_exit(&jit, ctx, cb);
+ yjit_gen_exit(jit.pc, ctx, cb);
break;
}
@@ -596,6 +712,10 @@ yjit_gen_block(block_t *block, rb_execution_context_t *ec)
// Store the index of the last instruction in the block
block->end_idx = insn_idx;
+ // We currently can't handle cases where the request is for a block that
+ // doesn't go to the next instruction.
+ RUBY_ASSERT(!jit.record_boundary_patch_point);
+
if (YJIT_DUMP_MODE >= 2) {
// Dump list of compiled instrutions
fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq);
@@ -735,8 +855,7 @@ gen_newarray(jitstate_t* jit, ctx_t* ctx)
rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
// Save the PC and SP because we are allocating
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n));
@@ -760,8 +879,7 @@ gen_duparray(jitstate_t* jit, ctx_t* ctx)
VALUE ary = jit_get_arg(jit, 0);
// Save the PC and SP because we are allocating
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
// call rb_ary_resurrect(VALUE ary);
jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary);
@@ -783,8 +901,7 @@ gen_splatarray(jitstate_t* jit, ctx_t* ctx)
// Save the PC and SP because the callee may allocate
// Note that this modifies REG_SP, which is why we do it first
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
// Get the operands from the stack
x86opnd_t ary_opnd = ctx_stack_pop(ctx, 1);
@@ -908,8 +1025,7 @@ gen_newhash(jitstate_t* jit, ctx_t* ctx)
if (n == 0) {
// Save the PC and SP because we are allocating
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
// val = rb_hash_new();
call_ptr(cb, REG0, (void *)rb_hash_new);
@@ -1559,8 +1675,7 @@ gen_setinstancevariable(jitstate_t* jit, ctx_t* ctx)
// Save the PC and SP because the callee may allocate
// Note that this modifies REG_SP, which is why we do it first
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
// Get the operands from the stack
x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
@@ -1611,8 +1726,7 @@ gen_defined(jitstate_t* jit, ctx_t* ctx)
// Save the PC and SP because the callee may allocate
// Note that this modifies REG_SP, which is why we do it first
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
// Get the operands from the stack
x86opnd_t v_opnd = ctx_stack_pop(ctx, 1);
@@ -1706,8 +1820,7 @@ gen_concatstrings(jitstate_t* jit, ctx_t* ctx)
rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
// Save the PC and SP because we are allocating
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n));
@@ -1975,15 +2088,13 @@ gen_opt_aref(jitstate_t *jit, ctx_t *ctx)
// Call VALUE rb_hash_aref(VALUE hash, VALUE key).
{
- // Write incremented pc to cfp->pc as the routine can raise and allocate
- jit_save_pc(jit, REG0);
-
// About to change REG_SP which these operands depend on. Yikes.
mov(cb, C_ARG_REGS[0], recv_opnd);
mov(cb, C_ARG_REGS[1], idx_opnd);
+ // Write incremented pc to cfp->pc as the routine can raise and allocate
// Write sp to cfp->sp since rb_hash_aref might need to call #hash on the key
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
call_ptr(cb, REG0, (void *)rb_hash_aref);
@@ -2009,8 +2120,7 @@ gen_opt_aset(jitstate_t *jit, ctx_t *ctx)
{
// Save the PC and SP because the callee may allocate
// Note that this modifies REG_SP, which is why we do it first
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
uint8_t* side_exit = yjit_side_exit(jit, ctx);
@@ -2177,8 +2287,7 @@ gen_opt_mod(jitstate_t* jit, ctx_t* ctx)
{
// Save the PC and SP because the callee may allocate bignums
// Note that this modifies REG_SP, which is why we do it first
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
uint8_t* side_exit = yjit_side_exit(jit, ctx);
@@ -2691,6 +2800,25 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const
return YJIT_CANT_COMPILE;
}
+ // Don't JIT if tracing c_call or c_return
+ {
+ rb_event_flag_t tracing_events;
+ if (rb_multi_ractor_p()) {
+ tracing_events = ruby_vm_event_enabled_global_flags;
+ }
+ else {
+ // We could always use ruby_vm_event_enabled_global_flags,
+ // but since events are never removed from it, doing so would mean
+ // we don't compile even after tracing is disabled.
+ tracing_events = rb_ec_ractor_hooks(jit->ec)->events;
+ }
+
+ if (tracing_events & (RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN)) {
+ GEN_COUNTER_INC(cb, send_cfunc_tracing);
+ return YJIT_CANT_COMPILE;
+ }
+ }
+
// Delegate to codegen for C methods if we have it.
{
method_codegen_t known_cfunc_codegen;
@@ -2842,6 +2970,9 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const
// Invalidation logic is in rb_yjit_method_lookup_change()
call_ptr(cb, REG0, (void*)cfunc->func);
+ // Record code position for TracePoint patching. See full_cfunc_return().
+ record_global_inval_patch(cb, outline_full_cfunc_return_pos);
+
// Push the return value on the Ruby stack
x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
mov(cb, stack_ret, RAX);
@@ -2856,7 +2987,7 @@ gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const
// cfunc calls may corrupt types
ctx_clear_local_types(ctx);
- // Note: gen_oswb_iseq() jumps to the next instruction with ctx->sp_offset == 0
+ // Note: gen_send_iseq() jumps to the next instruction with ctx->sp_offset == 0
// after the call, while this does not. This difference prevents
// the two call types from sharing the same successor.
@@ -3480,8 +3611,7 @@ gen_getglobal(jitstate_t* jit, ctx_t* ctx)
ID gid = jit_get_arg(jit, 0);
// Save the PC and SP because we might make a Ruby call for warning
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
mov(cb, C_ARG_REGS[0], imm_opnd(gid));
@@ -3500,8 +3630,7 @@ gen_setglobal(jitstate_t* jit, ctx_t* ctx)
// Save the PC and SP because we might make a Ruby call for
// Kernel#set_trace_var
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
mov(cb, C_ARG_REGS[0], imm_opnd(gid));
@@ -3519,8 +3648,7 @@ gen_tostring(jitstate_t* jit, ctx_t* ctx)
{
// Save the PC and SP because we might make a Ruby call for
// Kernel#set_trace_var
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
x86opnd_t str = ctx_stack_pop(ctx, 1);
x86opnd_t val = ctx_stack_pop(ctx, 1);
@@ -3545,8 +3673,7 @@ gen_toregexp(jitstate_t* jit, ctx_t* ctx)
// Save the PC and SP because this allocates an object and could
// raise an exception.
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)cnt));
ctx_stack_pop(ctx, cnt);
@@ -3678,8 +3805,7 @@ gen_opt_invokebuiltin_delegate(jitstate_t *jit, ctx_t *ctx)
}
// If the calls don't allocate, do they need up to date PC, SP?
- jit_save_pc(jit, REG0);
- jit_save_sp(jit, ctx);
+ jit_prepare_routine_call(jit, ctx, REG0);
if (bf->argc > 0) {
// Load environment pointer EP from CFP
@@ -3706,6 +3832,107 @@ gen_opt_invokebuiltin_delegate(jitstate_t *jit, ctx_t *ctx)
return YJIT_KEEP_COMPILING;
}
+static int tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data);
+static void invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq);
+
+// Invalidate all generated code and patch C method return code to contain
+// logic for firing the c_return TracePoint event. Once rb_vm_barrier()
+// returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which
+// means they are inside a C routine. If there are any generated code on-stack,
+// they are waiting for a return from a C routine. For every routine call, we
+// patch in an exit after the body of the containing VM instruction. This makes
+// it so all the invalidated code exit as soon as execution logically reaches
+// the next VM instruction.
+// The c_return event needs special handling as our codegen never outputs code
+// that contains tracing logic. If we let the normal output code run until the
+// start of the next VM instruction by relying on the patching scheme above, we
+// would fail to fire the c_return event. To handle it, we patch in the full
+// logic at the return address. See full_cfunc_return().
+// In addition to patching, we prevent future entries into invalidated code by
+// removing all live blocks from their iseq.
+void
+yjit_tracing_invalidate_all(void)
+{
+ if (!rb_yjit_enabled_p()) return;
+
+ // Stop other ractors since we are going to patch machine code.
+ RB_VM_LOCK_ENTER();
+ rb_vm_barrier();
+
+ // Make it so all live block versions are no longer valid branch targets
+ rb_objspace_each_objects(tracing_invalidate_all_i, NULL);
+
+ // Apply patches
+ const uint32_t old_pos = cb->write_pos;
+ rb_darray_for(global_inval_patches, patch_idx) {
+ struct codepage_patch patch = rb_darray_get(global_inval_patches, patch_idx);
+ cb_set_pos(cb, patch.mainline_patch_pos);
+ uint8_t *jump_target = cb_get_ptr(ocb, patch.outline_target_pos);
+ jmp_ptr(cb, jump_target);
+ }
+ cb_set_pos(cb, old_pos);
+
+ // Freeze invalidated part of the codepage. We only want to wait for
+ // running instances of the code to exit from now on, so we shouldn't
+ // change the code. There could be other ractors sleeping in
+ // branch_stub_hit(), for example. We could harden this by changing memory
+ // protection on the frozen range.
+ RUBY_ASSERT_ALWAYS(yjit_codepage_frozen_bytes <= old_pos && "frozen bytes should increase monotonically");
+ yjit_codepage_frozen_bytes = old_pos;
+
+ RB_VM_LOCK_LEAVE();
+}
+
+static int
+tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data)
+{
+ VALUE v = (VALUE)vstart;
+ for (; v != (VALUE)vend; v += stride) {
+ void *ptr = asan_poisoned_object_p(v);
+ asan_unpoison_object(v, false);
+
+ if (rb_obj_is_iseq(v)) {
+ rb_iseq_t *iseq = (rb_iseq_t *)v;
+ invalidate_all_blocks_for_tracing(iseq);
+ }
+
+ asan_poison_object_if(ptr, v);
+ }
+ return 0;
+}
+
+static void
+invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq)
+{
+ struct rb_iseq_constant_body *body = iseq->body;
+ if (!body) return; // iseq yet to be initialized
+
+ ASSERT_vm_locking();
+
+ // Empty all blocks on the iseq so we don't compile new blocks that jump to the
+ // invalidted region.
+ // TODO Leaking the blocks for now since we might have situations where
+ // a different ractor is waiting in branch_stub_hit(). If we free the block
+ // that ractor can wake up with a dangling block.
+ rb_darray_for(body->yjit_blocks, version_array_idx) {
+ rb_yjit_block_array_t version_array = rb_darray_get(body->yjit_blocks, version_array_idx);
+ rb_darray_for(version_array, version_idx) {
+ // Stop listening for invalidation events like basic operation redefinition.
+ block_t *block = rb_darray_get(version_array, version_idx);
+ yjit_unlink_method_lookup_dependency(block);
+ yjit_block_assumptions_free(block);
+ }
+ rb_darray_free(version_array);
+ }
+ rb_darray_free(body->yjit_blocks);
+ body->yjit_blocks = NULL;
+
+#if USE_MJIT
+ // Reset output code entry point
+ body->jit_func = NULL;
+#endif
+}
+
static void
yjit_reg_method(VALUE klass, const char *mid_str, method_codegen_t gen_fn)
{
@@ -3749,6 +3976,9 @@ yjit_init_codegen(void)
// Generate the interpreter exit code for leave
leave_exit_code = yjit_gen_leave_exit(cb);
+ // Generate full exit code for C func
+ gen_full_cfunc_return();
+
// Map YARV opcodes to the corresponding codegen functions
yjit_reg_op(BIN(nop), gen_nop);
yjit_reg_op(BIN(dup), gen_dup);