Skip to content

Commit b97b769

Browse files
committed
GH-126910: Add gdb support for unwinding JIT frames
1 parent 2b6a137 commit b97b769

27 files changed

Lines changed: 1559 additions & 750 deletions

Doc/c-api/perfmaps.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Note that holding an :term:`attached thread state` is not required for these API
3131
or ``-2`` on failure to create a lock. Check ``errno`` for more information
3232
about the cause of a failure.
3333

34-
.. c:function:: int PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name)
34+
.. c:function:: int PyUnstable_WritePerfMapEntry(const void *code_addr, size_t code_size, const char *entry_name)
3535
3636
Write one single entry to the ``/tmp/perf-$pid.map`` file. This function is
3737
thread safe. Here is what an example entry looks like::

Include/cpython/ceval.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ typedef struct {
3838
PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void);
3939
PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
4040
const void *code_addr,
41-
unsigned int code_size,
41+
size_t code_size,
4242
const char *entry_name);
4343
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);
4444
PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename);

Include/internal/pycore_ceval.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ typedef struct {
9494
void* (*init_state)(void);
9595
// Callback to register every trampoline being created
9696
void (*write_state)(void* state, const void *code_addr,
97-
unsigned int code_size, PyCodeObject* code);
97+
size_t code_size, PyCodeObject* code);
9898
// Callback to free the trampoline state
9999
int (*free_state)(void* state);
100100
} _PyPerf_Callbacks;
@@ -108,6 +108,10 @@ extern PyStatus _PyPerfTrampoline_AfterFork_Child(void);
108108
#ifdef PY_HAVE_PERF_TRAMPOLINE
109109
extern _PyPerf_Callbacks _Py_perfmap_callbacks;
110110
extern _PyPerf_Callbacks _Py_perfmap_jit_callbacks;
111+
extern void _PyPerfJit_WriteNamedCode(const void *code_addr,
112+
size_t code_size,
113+
const char *entry,
114+
const char *filename);
111115
#endif
112116

113117
static inline PyObject*

Include/internal/pycore_interp_structs.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ struct code_arena_st;
6969
struct trampoline_api_st {
7070
void* (*init_state)(void);
7171
void (*write_state)(void* state, const void *code_addr,
72-
unsigned int code_size, PyCodeObject* code);
72+
size_t code_size, PyCodeObject* code);
7373
int (*free_state)(void* state);
7474
void *state;
7575
Py_ssize_t code_padding;

Include/internal/pycore_jit.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ typedef _Py_CODEUNIT *(*jit_func)(
2323
_PyStackRef _tos_cache0, _PyStackRef _tos_cache1, _PyStackRef _tos_cache2
2424
);
2525

26-
_Py_CODEUNIT *_PyJIT(
26+
_Py_CODEUNIT *_PyJIT_Entry(
2727
_PyExecutorObject *executor, _PyInterpreterFrame *frame,
2828
_PyStackRef *stack_pointer, PyThreadState *tstate
2929
);
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#ifndef Py_INTERNAL_JIT_UNWIND_H
2+
#define Py_INTERNAL_JIT_UNWIND_H
3+
4+
#ifndef Py_BUILD_CORE
5+
# error "this header requires Py_BUILD_CORE define"
6+
#endif
7+
8+
#include <stddef.h>
9+
#include <stdint.h>
10+
11+
#if defined(PY_HAVE_PERF_TRAMPOLINE) || (defined(__linux__) && defined(__ELF__))
12+
13+
/* DWARF exception-handling pointer encodings shared by JIT unwind users. */
14+
enum {
15+
DWRF_EH_PE_absptr = 0x00,
16+
DWRF_EH_PE_omit = 0xff,
17+
18+
/* Data type encodings */
19+
DWRF_EH_PE_uleb128 = 0x01,
20+
DWRF_EH_PE_udata2 = 0x02,
21+
DWRF_EH_PE_udata4 = 0x03,
22+
DWRF_EH_PE_udata8 = 0x04,
23+
DWRF_EH_PE_sleb128 = 0x09,
24+
DWRF_EH_PE_sdata2 = 0x0a,
25+
DWRF_EH_PE_sdata4 = 0x0b,
26+
DWRF_EH_PE_sdata8 = 0x0c,
27+
DWRF_EH_PE_signed = 0x08,
28+
29+
/* Reference type encodings */
30+
DWRF_EH_PE_pcrel = 0x10,
31+
DWRF_EH_PE_textrel = 0x20,
32+
DWRF_EH_PE_datarel = 0x30,
33+
DWRF_EH_PE_funcrel = 0x40,
34+
DWRF_EH_PE_aligned = 0x50,
35+
DWRF_EH_PE_indirect = 0x80
36+
};
37+
38+
/* Return the size of the generated .eh_frame data for the given encoding. */
39+
size_t _PyJitUnwind_EhFrameSize(int absolute_addr);
40+
41+
/*
42+
* Build DWARF .eh_frame data for JIT code; returns size written or 0 on error.
43+
* absolute_addr selects the FDE address encoding:
44+
* - 0: PC-relative offsets (perf jitdump synthesized DSO).
45+
* - nonzero: absolute addresses (GDB JIT in-memory ELF).
46+
*/
47+
size_t _PyJitUnwind_BuildEhFrame(uint8_t *buffer, size_t buffer_size,
48+
const void *code_addr, size_t code_size,
49+
int absolute_addr);
50+
51+
void *_PyJitUnwind_GdbRegisterCode(const void *code_addr,
52+
size_t code_size,
53+
const char *entry,
54+
const char *filename);
55+
56+
void _PyJitUnwind_GdbUnregisterCode(void *handle);
57+
58+
#endif // defined(PY_HAVE_PERF_TRAMPOLINE) || (defined(__linux__) && defined(__ELF__))
59+
60+
#endif // Py_INTERNAL_JIT_UNWIND_H

Include/internal/pycore_optimizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ typedef struct _PyExecutorObject {
198198
uint32_t code_size;
199199
size_t jit_size;
200200
void *jit_code;
201+
void *jit_gdb_handle;
201202
_PyExitData exits[1];
202203
} _PyExecutorObject;
203204

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Sample script for use by test_gdb.test_jit
2+
3+
import _testinternalcapi
4+
import operator
5+
6+
7+
WARMUP_ITERATIONS = _testinternalcapi.TIER2_THRESHOLD + 10
8+
9+
10+
def jit_bt_hot(depth, warming_up_caller=False):
11+
if depth == 0:
12+
if not warming_up_caller:
13+
id(42)
14+
return
15+
16+
for iteration in range(WARMUP_ITERATIONS):
17+
operator.call(
18+
jit_bt_hot,
19+
depth - 1,
20+
warming_up_caller or iteration + 1 != WARMUP_ITERATIONS,
21+
)
22+
23+
24+
# Warm the shared shim once without hitting builtin_id so the real run uses
25+
# the steady-state shim path when GDB breaks inside id(42).
26+
jit_bt_hot(1, warming_up_caller=True)
27+
jit_bt_hot(1)

Lib/test/test_gdb/test_jit.py

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
import os
2+
import platform
3+
import re
4+
import sys
5+
import unittest
6+
7+
from .util import setup_module, DebuggerTests
8+
9+
10+
JIT_SAMPLE_SCRIPT = os.path.join(os.path.dirname(__file__), "gdb_jit_sample.py")
11+
# In batch GDB, break in builtin_id() while it is running under JIT,
12+
# then repeatedly "finish" until the selected frame is the JIT executor.
13+
# That gives a deterministic backtrace starting with py::jit:executor.
14+
#
15+
# builtin_id() sits only a few helper frames above the JIT entry on this path.
16+
# This bound is just a generous upper limit so the test fails clearly if the
17+
# expected stack shape changes.
18+
MAX_FINISH_STEPS = 20
19+
# After landing on the JIT entry frame, single-step a bounded number of
20+
# instructions further into the blob so the backtrace is taken from JIT code
21+
# itself rather than the immediate helper-return site. The exact number of
22+
# steps is not significant: each step is cross-checked against the selected
23+
# frame's symbol so the test fails loudly if stepping escapes the registered
24+
# JIT region, instead of asserting against a misleading backtrace.
25+
MAX_JIT_ENTRY_STEPS = 4
26+
EVAL_FRAME_RE = r"(_PyEval_EvalFrameDefault|_PyEval_Vector)"
27+
JIT_EXECUTOR_FRAME = "py::jit:executor"
28+
JIT_ENTRY_SYMBOL = "_PyJIT_Entry"
29+
BACKTRACE_FRAME_RE = re.compile(r"^#\d+\s+.*$", re.MULTILINE)
30+
31+
FINISH_TO_JIT_EXECUTOR = (
32+
"python exec(\"import gdb\\n"
33+
f"target = {JIT_EXECUTOR_FRAME!r}\\n"
34+
f"for _ in range({MAX_FINISH_STEPS}):\\n"
35+
" frame = gdb.selected_frame()\\n"
36+
" if frame is not None and frame.name() == target:\\n"
37+
" break\\n"
38+
" gdb.execute('finish')\\n"
39+
"else:\\n"
40+
" raise RuntimeError('did not reach %s' % target)\\n\")"
41+
)
42+
STEP_INSIDE_JIT_EXECUTOR = (
43+
"python exec(\"import gdb\\n"
44+
f"target = {JIT_EXECUTOR_FRAME!r}\\n"
45+
f"for _ in range({MAX_JIT_ENTRY_STEPS}):\\n"
46+
" frame = gdb.selected_frame()\\n"
47+
" if frame is None or frame.name() != target:\\n"
48+
" raise RuntimeError('left JIT region during stepping: '\\n"
49+
" + repr(frame and frame.name()))\\n"
50+
" gdb.execute('si')\\n"
51+
"frame = gdb.selected_frame()\\n"
52+
"if frame is None or frame.name() != target:\\n"
53+
" raise RuntimeError('stepped out of JIT region after si')\\n\")"
54+
)
55+
56+
57+
def setUpModule():
58+
setup_module()
59+
60+
61+
# The GDB JIT interface registration is gated on __linux__ && __ELF__ in
62+
# Python/jit_unwind.c, and the synthetic EH-frame is only implemented for
63+
# x86_64 and AArch64 (a #error fires otherwise). Skip cleanly on other
64+
# platforms or architectures instead of producing timeouts / empty backtraces.
65+
# is_enabled() implies is_available() and also implies that the runtime has
66+
# JIT execution active; interpreter-only tier 2 builds don't hit this path.
67+
@unittest.skipUnless(sys.platform == "linux",
68+
"GDB JIT interface is only implemented for Linux + ELF")
69+
@unittest.skipUnless(platform.machine() in ("x86_64", "aarch64"),
70+
"GDB JIT CFI emitter only supports x86_64 and AArch64")
71+
@unittest.skipUnless(hasattr(sys, "_jit") and sys._jit.is_enabled(),
72+
"requires a JIT-enabled build with JIT execution active")
73+
class JitBacktraceTests(DebuggerTests):
74+
def get_stack_trace(self, **kwargs):
75+
# These tests validate the JIT-relevant part of the backtrace via
76+
# _assert_jit_backtrace_shape, so an unrelated "?? ()" frame below
77+
# the JIT/eval segment (e.g. libc without debug info) is tolerable.
78+
kwargs.setdefault("skip_on_truncation", False)
79+
return super().get_stack_trace(**kwargs)
80+
81+
def _extract_backtrace_frames(self, gdb_output):
82+
frames = BACKTRACE_FRAME_RE.findall(gdb_output)
83+
self.assertGreater(
84+
len(frames), 0,
85+
f"expected at least one GDB backtrace frame in output:\n{gdb_output}",
86+
)
87+
return frames
88+
89+
def _assert_jit_backtrace_shape(self, gdb_output, *, anchor_at_top):
90+
# Shape assertions applied to every JIT backtrace we produce:
91+
# 1. The synthetic JIT symbol appears exactly once. A second
92+
# py::jit:executor frame would mean the unwinder is
93+
# materializing two native frames for a single logical JIT
94+
# region, or failing to unwind out of the region entirely.
95+
# 2. The unwinder must climb back out of the JIT region into
96+
# the eval loop. Some platforms materialize a real
97+
# _PyJIT_Entry frame between the synthetic executor frame
98+
# and _PyEval_*, while others unwind directly from the
99+
# executor into _PyEval_*. Accept both shapes.
100+
# 3. For tests that assert a specific entry PC, the JIT frame
101+
# is also at #0.
102+
frames = self._extract_backtrace_frames(gdb_output)
103+
backtrace = "\n".join(frames)
104+
105+
jit_frames = [frame for frame in frames if JIT_EXECUTOR_FRAME in frame]
106+
jit_count = len(jit_frames)
107+
self.assertEqual(
108+
jit_count, 1,
109+
f"expected exactly 1 {JIT_EXECUTOR_FRAME} frame, got {jit_count}\n"
110+
f"backtrace:\n{backtrace}",
111+
)
112+
eval_frames = [frame for frame in frames if re.search(EVAL_FRAME_RE, frame)]
113+
eval_count = len(eval_frames)
114+
self.assertGreaterEqual(
115+
eval_count, 1,
116+
f"expected at least one _PyEval_* frame, got {eval_count}\n"
117+
f"backtrace:\n{backtrace}",
118+
)
119+
jit_frame_index = next(
120+
i for i, frame in enumerate(frames) if JIT_EXECUTOR_FRAME in frame
121+
)
122+
frames_after_jit = frames[jit_frame_index + 1:]
123+
first_eval_offset = next(
124+
(
125+
i for i, frame in enumerate(frames_after_jit)
126+
if re.search(EVAL_FRAME_RE, frame)
127+
),
128+
None,
129+
)
130+
self.assertIsNotNone(
131+
first_eval_offset,
132+
f"expected an eval frame after the JIT frame\n"
133+
f"backtrace:\n{backtrace}",
134+
)
135+
between_jit_and_eval = frames_after_jit[:first_eval_offset]
136+
jit_entry_frames = [
137+
frame for frame in between_jit_and_eval
138+
if JIT_ENTRY_SYMBOL in frame
139+
]
140+
self.assertLessEqual(
141+
len(jit_entry_frames), 1,
142+
f"expected at most one {JIT_ENTRY_SYMBOL} frame between the "
143+
f"executor and eval frames\nbacktrace:\n{backtrace}",
144+
)
145+
unexpected_between = [
146+
frame for frame in between_jit_and_eval
147+
if JIT_ENTRY_SYMBOL not in frame
148+
]
149+
self.assertFalse(
150+
unexpected_between,
151+
"expected only an optional _PyJIT_Entry frame between the "
152+
"executor and eval frames\n"
153+
f"backtrace:\n{backtrace}",
154+
)
155+
relevant_end = max(
156+
i
157+
for i, frame in enumerate(frames)
158+
if (
159+
JIT_EXECUTOR_FRAME in frame
160+
or JIT_ENTRY_SYMBOL in frame
161+
or re.search(EVAL_FRAME_RE, frame)
162+
)
163+
)
164+
truncated_frames = [
165+
frame for frame in frames[: relevant_end + 1]
166+
if " ?? ()" in frame
167+
]
168+
self.assertFalse(
169+
truncated_frames,
170+
"unexpected truncated frame before the validated JIT/eval segment\n"
171+
f"backtrace:\n{backtrace}",
172+
)
173+
if anchor_at_top:
174+
self.assertRegex(
175+
frames[0],
176+
re.compile(rf"^#0\s+{re.escape(JIT_EXECUTOR_FRAME)}"),
177+
)
178+
179+
def test_bt_unwinds_through_jit_frames(self):
180+
gdb_output = self.get_stack_trace(
181+
script=JIT_SAMPLE_SCRIPT,
182+
cmds_after_breakpoint=["bt"],
183+
PYTHON_JIT="1",
184+
)
185+
# The executor should appear as a named JIT frame and unwind back into
186+
# the eval loop.
187+
self._assert_jit_backtrace_shape(gdb_output, anchor_at_top=False)
188+
189+
def test_bt_handoff_from_jit_entry_to_executor(self):
190+
gdb_output = self.get_stack_trace(
191+
script=JIT_SAMPLE_SCRIPT,
192+
breakpoint=JIT_ENTRY_SYMBOL,
193+
cmds_after_breakpoint=[
194+
"delete 1",
195+
"tbreak builtin_id",
196+
"continue",
197+
"bt",
198+
],
199+
PYTHON_JIT="1",
200+
)
201+
# If we stop first in the shim and then continue into the real JIT
202+
# workload, the final backtrace should match the architecture's
203+
# executor unwind contract.
204+
self._assert_jit_backtrace_shape(gdb_output, anchor_at_top=False)
205+
206+
def test_bt_unwinds_from_inside_jit_executor(self):
207+
gdb_output = self.get_stack_trace(
208+
script=JIT_SAMPLE_SCRIPT,
209+
cmds_after_breakpoint=[
210+
FINISH_TO_JIT_EXECUTOR,
211+
STEP_INSIDE_JIT_EXECUTOR,
212+
"bt",
213+
],
214+
PYTHON_JIT="1",
215+
)
216+
# Once the selected PC is inside the JIT executor, we require that GDB
217+
# identifies the JIT frame at #0 and keeps unwinding into _PyEval_*.
218+
self._assert_jit_backtrace_shape(gdb_output, anchor_at_top=True)

0 commit comments

Comments
 (0)