summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/yjit/yjit.md28
-rwxr-xr-x[-rw-r--r--]misc/yjit_perf.py32
2 files changed, 47 insertions, 13 deletions
diff --git a/doc/yjit/yjit.md b/doc/yjit/yjit.md
index 8aab1aed22..4508bce25f 100644
--- a/doc/yjit/yjit.md
+++ b/doc/yjit/yjit.md
@@ -480,13 +480,8 @@ perf script --fields +pid > /tmp/test.perf
You can also profile the number of cycles consumed by code generated by each YJIT function.
```bash
-# Build perf from source for Python support
-# [Optional] libelf-dev libunwind-dev libaudit-dev libslang2-dev libdw-dev
-sudo apt-get install libpython3-dev python3-pip flex libtraceevent-dev
-git clone https://github.com/torvalds/linux
-cd linux/tools/perf
-make
-make install
+# Install perf
+apt-get install linux-tools-common linux-tools-generic linux-tools-`uname -r`
# [Optional] Allow running perf without sudo
echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
@@ -497,5 +492,24 @@ cd ../yjit-bench
PERF=record ruby --yjit-perf=codegen -Iharness-perf benchmarks/lobsters/benchmark.rb
# Aggregate results
+perf script > /tmp/perf.txt
+../ruby/misc/yjit_perf.py /tmp/perf.txt
+```
+
+#### Building perf with Python support
+
+The above instructions work fine for most people, but you could also use
+a handy `perf script -s` interface if you build perf from source.
+
+```bash
+# Build perf from source for Python support
+sudo apt-get install libpython3-dev python3-pip flex libtraceevent-dev \
+ libelf-dev libunwind-dev libaudit-dev libslang2-dev libdw-dev
+git clone --depth=1 https://github.com/torvalds/linux
+cd linux/tools/perf
+make
+make install
+
+# Aggregate results
perf script -s ../ruby/misc/yjit_perf.py
```
diff --git a/misc/yjit_perf.py b/misc/yjit_perf.py
index 44c232254e..61434e5eb4 100644..100755
--- a/misc/yjit_perf.py
+++ b/misc/yjit_perf.py
@@ -1,12 +1,9 @@
+#!/usr/bin/env python3
import os
import sys
from collections import Counter, defaultdict
import os.path
-sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
-from perf_trace_context import *
-from EventClass import *
-
# Aggregating cycles per symbol and dso
total_cycles = 0
category_cycles = Counter()
@@ -57,11 +54,10 @@ def categorize_symbol(dso, symbol):
def process_event(event):
global total_cycles, category_cycles, detailed_category_cycles, categories
- sample = event["sample"]
full_dso = event.get("dso", "Unknown_dso")
dso = os.path.basename(full_dso)
symbol = event.get("symbol", "[unknown]")
- cycles = sample["period"]
+ cycles = event["sample"]["period"]
total_cycles += cycles
category = categorize_symbol(dso, symbol)
@@ -94,3 +90,27 @@ def trace_end():
for (dso, symbol), cycles in symbols.most_common():
symbol_ratio = (cycles / category_total) * 100
print("{:<20} {:<50} {:>20.2f}% {:>15}".format(dso, truncate_symbol(symbol), symbol_ratio, cycles))
+
+# There are two ways to use this script:
+# 1) perf script -s misc/yjit_perf.py -- native interface
+# 2) perf script > perf.txt && misc/yjit_perf.py perf.txt -- hack, which doesn't require perf with Python support
+#
+# In both cases, __name__ is "__main__". The following code implements (2) when sys.argv is 2.
+if __name__ == "__main__" and len(sys.argv) == 2:
+ if len(sys.argv) != 2:
+ print("Usage: yjit_perf.py <filename>")
+ sys.exit(1)
+
+ with open(sys.argv[1], "r") as file:
+ for line in file:
+ # [Example]
+ # ruby 78207 3482.848465: 1212775 cpu_core/cycles:P/: 5c0333f682e1 [JIT] getlocal_WC_0+0x0 (/tmp/perf-78207.map)
+ row = line.split(maxsplit=6)
+
+ period = row[3] # "1212775"
+ symbol, dso = row[6].split(" (") # "[JIT] getlocal_WC_0+0x0", "/tmp/perf-78207.map)\n"
+ symbol = symbol.split("+")[0] # "[JIT] getlocal_WC_0"
+ dso = dso.split(")")[0] # "/tmp/perf-78207.map"
+
+ process_event({"dso": dso, "symbol": symbol, "sample": {"period": int(period)}})
+ trace_end()