|
| 1 | +import os |
| 2 | +import platform |
| 3 | +import re |
| 4 | +import sys |
| 5 | +import unittest |
| 6 | + |
| 7 | +from .util import setup_module, DebuggerTests |
| 8 | + |
| 9 | + |
| 10 | +JIT_SAMPLE_SCRIPT = os.path.join(os.path.dirname(__file__), "gdb_jit_sample.py") |
| 11 | +# In batch GDB, break in builtin_id() while it is running under JIT, |
| 12 | +# then repeatedly "finish" until the selected frame is the JIT executor. |
| 13 | +# That gives a deterministic backtrace starting with py::jit:executor. |
| 14 | +# |
| 15 | +# builtin_id() sits only a few helper frames above the JIT entry on this path. |
| 16 | +# This bound is just a generous upper limit so the test fails clearly if the |
| 17 | +# expected stack shape changes. |
| 18 | +MAX_FINISH_STEPS = 20 |
| 19 | +# After landing on the JIT entry frame, single-step a bounded number of |
| 20 | +# instructions further into the blob so the backtrace is taken from JIT code |
| 21 | +# itself rather than the immediate helper-return site. The exact number of |
| 22 | +# steps is not significant: each step is cross-checked against the selected |
| 23 | +# frame's symbol so the test fails loudly if stepping escapes the registered |
| 24 | +# JIT region, instead of asserting against a misleading backtrace. |
| 25 | +MAX_JIT_ENTRY_STEPS = 4 |
| 26 | +EVAL_FRAME_RE = r"(_PyEval_EvalFrameDefault|_PyEval_Vector)" |
| 27 | +JIT_EXECUTOR_FRAME = "py::jit:executor" |
| 28 | +JIT_ENTRY_SYMBOL = "_PyJIT_Entry" |
| 29 | +BACKTRACE_FRAME_RE = re.compile(r"^#\d+\s+.*$", re.MULTILINE) |
| 30 | + |
| 31 | +FINISH_TO_JIT_EXECUTOR = ( |
| 32 | + "python exec(\"import gdb\\n" |
| 33 | + f"target = {JIT_EXECUTOR_FRAME!r}\\n" |
| 34 | + f"for _ in range({MAX_FINISH_STEPS}):\\n" |
| 35 | + " frame = gdb.selected_frame()\\n" |
| 36 | + " if frame is not None and frame.name() == target:\\n" |
| 37 | + " break\\n" |
| 38 | + " gdb.execute('finish')\\n" |
| 39 | + "else:\\n" |
| 40 | + " raise RuntimeError('did not reach %s' % target)\\n\")" |
| 41 | +) |
| 42 | +STEP_INSIDE_JIT_EXECUTOR = ( |
| 43 | + "python exec(\"import gdb\\n" |
| 44 | + f"target = {JIT_EXECUTOR_FRAME!r}\\n" |
| 45 | + f"for _ in range({MAX_JIT_ENTRY_STEPS}):\\n" |
| 46 | + " frame = gdb.selected_frame()\\n" |
| 47 | + " if frame is None or frame.name() != target:\\n" |
| 48 | + " raise RuntimeError('left JIT region during stepping: '\\n" |
| 49 | + " + repr(frame and frame.name()))\\n" |
| 50 | + " gdb.execute('si')\\n" |
| 51 | + "frame = gdb.selected_frame()\\n" |
| 52 | + "if frame is None or frame.name() != target:\\n" |
| 53 | + " raise RuntimeError('stepped out of JIT region after si')\\n\")" |
| 54 | +) |
| 55 | + |
| 56 | + |
| 57 | +def setUpModule(): |
| 58 | + setup_module() |
| 59 | + |
| 60 | + |
| 61 | +# The GDB JIT interface registration is gated on __linux__ && __ELF__ in |
| 62 | +# Python/jit_unwind.c, and the synthetic EH-frame is only implemented for |
| 63 | +# x86_64 and AArch64 (a #error fires otherwise). Skip cleanly on other |
| 64 | +# platforms or architectures instead of producing timeouts / empty backtraces. |
| 65 | +# is_enabled() implies is_available() and also implies that the runtime has |
| 66 | +# JIT execution active; interpreter-only tier 2 builds don't hit this path. |
| 67 | +@unittest.skipUnless(sys.platform == "linux", |
| 68 | + "GDB JIT interface is only implemented for Linux + ELF") |
| 69 | +@unittest.skipUnless(platform.machine() in ("x86_64", "aarch64"), |
| 70 | + "GDB JIT CFI emitter only supports x86_64 and AArch64") |
| 71 | +@unittest.skipUnless(hasattr(sys, "_jit") and sys._jit.is_enabled(), |
| 72 | + "requires a JIT-enabled build with JIT execution active") |
| 73 | +class JitBacktraceTests(DebuggerTests): |
| 74 | + def get_stack_trace(self, **kwargs): |
| 75 | + # These tests validate the JIT-relevant part of the backtrace via |
| 76 | + # _assert_jit_backtrace_shape, so an unrelated "?? ()" frame below |
| 77 | + # the JIT/eval segment (e.g. libc without debug info) is tolerable. |
| 78 | + kwargs.setdefault("skip_on_truncation", False) |
| 79 | + return super().get_stack_trace(**kwargs) |
| 80 | + |
| 81 | + def _extract_backtrace_frames(self, gdb_output): |
| 82 | + frames = BACKTRACE_FRAME_RE.findall(gdb_output) |
| 83 | + self.assertGreater( |
| 84 | + len(frames), 0, |
| 85 | + f"expected at least one GDB backtrace frame in output:\n{gdb_output}", |
| 86 | + ) |
| 87 | + return frames |
| 88 | + |
| 89 | + def _assert_jit_backtrace_shape(self, gdb_output, *, anchor_at_top): |
| 90 | + # Shape assertions applied to every JIT backtrace we produce: |
| 91 | + # 1. The synthetic JIT symbol appears exactly once. A second |
| 92 | + # py::jit:executor frame would mean the unwinder is |
| 93 | + # materializing two native frames for a single logical JIT |
| 94 | + # region, or failing to unwind out of the region entirely. |
| 95 | + # 2. The unwinder must climb back out of the JIT region into |
| 96 | + # the eval loop. Some platforms materialize a real |
| 97 | + # _PyJIT_Entry frame between the synthetic executor frame |
| 98 | + # and _PyEval_*, while others unwind directly from the |
| 99 | + # executor into _PyEval_*. Accept both shapes. |
| 100 | + # 3. For tests that assert a specific entry PC, the JIT frame |
| 101 | + # is also at #0. |
| 102 | + frames = self._extract_backtrace_frames(gdb_output) |
| 103 | + backtrace = "\n".join(frames) |
| 104 | + |
| 105 | + jit_frames = [frame for frame in frames if JIT_EXECUTOR_FRAME in frame] |
| 106 | + jit_count = len(jit_frames) |
| 107 | + self.assertEqual( |
| 108 | + jit_count, 1, |
| 109 | + f"expected exactly 1 {JIT_EXECUTOR_FRAME} frame, got {jit_count}\n" |
| 110 | + f"backtrace:\n{backtrace}", |
| 111 | + ) |
| 112 | + eval_frames = [frame for frame in frames if re.search(EVAL_FRAME_RE, frame)] |
| 113 | + eval_count = len(eval_frames) |
| 114 | + self.assertGreaterEqual( |
| 115 | + eval_count, 1, |
| 116 | + f"expected at least one _PyEval_* frame, got {eval_count}\n" |
| 117 | + f"backtrace:\n{backtrace}", |
| 118 | + ) |
| 119 | + jit_frame_index = next( |
| 120 | + i for i, frame in enumerate(frames) if JIT_EXECUTOR_FRAME in frame |
| 121 | + ) |
| 122 | + frames_after_jit = frames[jit_frame_index + 1:] |
| 123 | + first_eval_offset = next( |
| 124 | + ( |
| 125 | + i for i, frame in enumerate(frames_after_jit) |
| 126 | + if re.search(EVAL_FRAME_RE, frame) |
| 127 | + ), |
| 128 | + None, |
| 129 | + ) |
| 130 | + self.assertIsNotNone( |
| 131 | + first_eval_offset, |
| 132 | + f"expected an eval frame after the JIT frame\n" |
| 133 | + f"backtrace:\n{backtrace}", |
| 134 | + ) |
| 135 | + between_jit_and_eval = frames_after_jit[:first_eval_offset] |
| 136 | + jit_entry_frames = [ |
| 137 | + frame for frame in between_jit_and_eval |
| 138 | + if JIT_ENTRY_SYMBOL in frame |
| 139 | + ] |
| 140 | + self.assertLessEqual( |
| 141 | + len(jit_entry_frames), 1, |
| 142 | + f"expected at most one {JIT_ENTRY_SYMBOL} frame between the " |
| 143 | + f"executor and eval frames\nbacktrace:\n{backtrace}", |
| 144 | + ) |
| 145 | + unexpected_between = [ |
| 146 | + frame for frame in between_jit_and_eval |
| 147 | + if JIT_ENTRY_SYMBOL not in frame |
| 148 | + ] |
| 149 | + self.assertFalse( |
| 150 | + unexpected_between, |
| 151 | + "expected only an optional _PyJIT_Entry frame between the " |
| 152 | + "executor and eval frames\n" |
| 153 | + f"backtrace:\n{backtrace}", |
| 154 | + ) |
| 155 | + relevant_end = max( |
| 156 | + i |
| 157 | + for i, frame in enumerate(frames) |
| 158 | + if ( |
| 159 | + JIT_EXECUTOR_FRAME in frame |
| 160 | + or JIT_ENTRY_SYMBOL in frame |
| 161 | + or re.search(EVAL_FRAME_RE, frame) |
| 162 | + ) |
| 163 | + ) |
| 164 | + truncated_frames = [ |
| 165 | + frame for frame in frames[: relevant_end + 1] |
| 166 | + if " ?? ()" in frame |
| 167 | + ] |
| 168 | + self.assertFalse( |
| 169 | + truncated_frames, |
| 170 | + "unexpected truncated frame before the validated JIT/eval segment\n" |
| 171 | + f"backtrace:\n{backtrace}", |
| 172 | + ) |
| 173 | + if anchor_at_top: |
| 174 | + self.assertRegex( |
| 175 | + frames[0], |
| 176 | + re.compile(rf"^#0\s+{re.escape(JIT_EXECUTOR_FRAME)}"), |
| 177 | + ) |
| 178 | + |
| 179 | + def test_bt_unwinds_through_jit_frames(self): |
| 180 | + gdb_output = self.get_stack_trace( |
| 181 | + script=JIT_SAMPLE_SCRIPT, |
| 182 | + cmds_after_breakpoint=["bt"], |
| 183 | + PYTHON_JIT="1", |
| 184 | + ) |
| 185 | + # The executor should appear as a named JIT frame and unwind back into |
| 186 | + # the eval loop. |
| 187 | + self._assert_jit_backtrace_shape(gdb_output, anchor_at_top=False) |
| 188 | + |
| 189 | + def test_bt_handoff_from_jit_entry_to_executor(self): |
| 190 | + gdb_output = self.get_stack_trace( |
| 191 | + script=JIT_SAMPLE_SCRIPT, |
| 192 | + breakpoint=JIT_ENTRY_SYMBOL, |
| 193 | + cmds_after_breakpoint=[ |
| 194 | + "delete 1", |
| 195 | + "tbreak builtin_id", |
| 196 | + "continue", |
| 197 | + "bt", |
| 198 | + ], |
| 199 | + PYTHON_JIT="1", |
| 200 | + ) |
| 201 | + # If we stop first in the shim and then continue into the real JIT |
| 202 | + # workload, the final backtrace should match the architecture's |
| 203 | + # executor unwind contract. |
| 204 | + self._assert_jit_backtrace_shape(gdb_output, anchor_at_top=False) |
| 205 | + |
| 206 | + def test_bt_unwinds_from_inside_jit_executor(self): |
| 207 | + gdb_output = self.get_stack_trace( |
| 208 | + script=JIT_SAMPLE_SCRIPT, |
| 209 | + cmds_after_breakpoint=[ |
| 210 | + FINISH_TO_JIT_EXECUTOR, |
| 211 | + STEP_INSIDE_JIT_EXECUTOR, |
| 212 | + "bt", |
| 213 | + ], |
| 214 | + PYTHON_JIT="1", |
| 215 | + ) |
| 216 | + # Once the selected PC is inside the JIT executor, we require that GDB |
| 217 | + # identifies the JIT frame at #0 and keeps unwinding into _PyEval_*. |
| 218 | + self._assert_jit_backtrace_shape(gdb_output, anchor_at_top=True) |
0 commit comments