Skip to content

Commit f028305

Browse files
committed
fix(evaluation): forward App through to the eval Runner
`_generate_inferences_from_root_agent` now accepts an optional `app` parameter. When provided, the eval Runner is built from a copy of the App with internal eval plugins (`_RequestIntercepterPlugin`, `EnsureRetryOptionsPlugin`) merged into `app.plugins`. The user's App is never mutated, and the App's `context_cache_config` / `resumability_config` ride along automatically. When `app` is None, the legacy bare-agent path is preserved. `_process_query` (used by the public `generate_responses` entry point) now resolves `agent.app` first and forwards it to the helper, so projects that wrap their root agent in an `App` get plugin coverage during eval without further changes. The CLI plumbing that hands the App down from `cli_eval` / `LocalEvalService` is in the next commit.
1 parent 17e0c3a commit f028305

2 files changed

Lines changed: 183 additions & 5 deletions

File tree

src/google/adk/evaluation/evaluation_generator.py

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from pydantic import BaseModel
2727

2828
from ..agents.llm_agent import Agent
29+
from ..apps.app import App
2930
from ..artifacts.base_artifact_service import BaseArtifactService
3031
from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
3132
from ..events.event import Event
@@ -143,7 +144,15 @@ async def _process_query(
143144
"""Process a query using the agent and evaluation dataset."""
144145
module_path = f"{module_name}"
145146
agent_module = importlib.import_module(module_path)
146-
root_agent = agent_module.agent.root_agent
147+
# Prefer the wrapping `App` when the module exposes one, so that
148+
# `app.plugins`, context-cache, and resumability configs participate
149+
# in eval runs the same way they do for `adk web` / `adk run`.
150+
app_obj = getattr(agent_module.agent, "app", None)
151+
if isinstance(app_obj, App):
152+
root_agent = app_obj.root_agent
153+
else:
154+
app_obj = None
155+
root_agent = agent_module.agent.root_agent
147156

148157
reset_func = getattr(agent_module.agent, "reset_data", None)
149158

@@ -157,6 +166,7 @@ async def _process_query(
157166
user_simulator=user_simulator,
158167
reset_func=reset_func,
159168
initial_session=initial_session,
169+
app=app_obj,
160170
)
161171

162172
@staticmethod
@@ -197,8 +207,17 @@ async def _generate_inferences_from_root_agent(
197207
session_service: Optional[BaseSessionService] = None,
198208
artifact_service: Optional[BaseArtifactService] = None,
199209
memory_service: Optional[BaseMemoryService] = None,
210+
app: Optional[App] = None,
200211
) -> list[Invocation]:
201-
"""Scrapes the root agent in coordination with the user simulator."""
212+
"""Scrapes the root agent in coordination with the user simulator.
213+
214+
If `app` is provided, the eval Runner is built from a copy of the App
215+
with internal eval plugins merged into `app.plugins`, preserving the
216+
App's `context_cache_config`, `resumability_config`, and any other
217+
application-wide configuration. Otherwise the Runner is built from
218+
the bare `root_agent` with only the internal eval plugins, matching
219+
the legacy behavior.
220+
"""
202221

203222
if not session_service:
204223
session_service = InMemorySessionService()
@@ -235,13 +254,39 @@ async def _generate_inferences_from_root_agent(
235254
ensure_retry_options_plugin = EnsureRetryOptionsPlugin(
236255
name="ensure_retry_options"
237256
)
257+
internal_eval_plugins = [
258+
request_intercepter_plugin,
259+
ensure_retry_options_plugin,
260+
]
261+
262+
if app is not None:
263+
# Copy the App so we don't mutate the user's instance, and merge our
264+
# internal eval plugins with the user's. Override `root_agent` so the
265+
# Runner targets the agent the caller actually asked us to evaluate
266+
# (e.g., a sub-agent), while still carrying the App's plugins,
267+
# context_cache_config, and resumability_config.
268+
runner_app = app.model_copy(
269+
update={
270+
"plugins": list(app.plugins) + internal_eval_plugins,
271+
"root_agent": root_agent,
272+
}
273+
)
274+
runner_kwargs: dict[str, Any] = {
275+
"app": runner_app,
276+
"app_name": app_name,
277+
}
278+
else:
279+
runner_kwargs = {
280+
"app_name": app_name,
281+
"agent": root_agent,
282+
"plugins": internal_eval_plugins,
283+
}
284+
238285
async with Runner(
239-
app_name=app_name,
240-
agent=root_agent,
286+
**runner_kwargs,
241287
artifact_service=artifact_service,
242288
session_service=session_service,
243289
memory_service=memory_service,
244-
plugins=[request_intercepter_plugin, ensure_retry_options_plugin],
245290
) as runner:
246291
events = []
247292
while True:

tests/unittests/evaluation/test_evaluation_generator.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,13 @@
1414

1515
from __future__ import annotations
1616

17+
from google.adk.agents.base_agent import BaseAgent
18+
from google.adk.apps.app import App
1719
from google.adk.evaluation.app_details import AgentDetails
1820
from google.adk.evaluation.app_details import AppDetails
1921
from google.adk.evaluation.evaluation_generator import EvaluationGenerator
2022
from google.adk.evaluation.request_intercepter_plugin import _RequestIntercepterPlugin
23+
from google.adk.plugins.base_plugin import BasePlugin
2124
from google.adk.evaluation.simulation.user_simulator import NextUserMessage
2225
from google.adk.evaluation.simulation.user_simulator import Status as UserSimulatorStatus
2326
from google.adk.evaluation.simulation.user_simulator import UserSimulator
@@ -479,3 +482,133 @@ async def mock_generate_inferences_side_effect(
479482
mock_generate_inferences.assert_called_once()
480483
called_with_content = mock_generate_inferences.call_args.args[3]
481484
assert called_with_content.parts[0].text == "message 1"
485+
486+
487+
class _SpyPlugin(BasePlugin):
488+
"""A user-defined plugin used to assert merge behavior."""
489+
490+
pass
491+
492+
493+
class TestGenerateInferencesFromRootAgentWithApp:
494+
"""Tests that App.plugins / configs are honored when an App is provided."""
495+
496+
@pytest.fixture
497+
def runner_cls(self, mocker):
498+
"""Patches Runner and returns the patched class for kwargs inspection."""
499+
mock_runner_cls = mocker.patch(
500+
"google.adk.evaluation.evaluation_generator.Runner"
501+
)
502+
mock_runner_instance = mocker.AsyncMock()
503+
mock_runner_instance.__aenter__.return_value = mock_runner_instance
504+
mock_runner_cls.return_value = mock_runner_instance
505+
yield mock_runner_cls
506+
507+
@pytest.fixture
508+
def stop_immediately_simulator(self, mocker):
509+
"""Returns a UserSimulator that stops on first call (no inference work)."""
510+
sim = mocker.MagicMock(spec=UserSimulator)
511+
sim.get_next_user_message = mocker.AsyncMock(
512+
return_value=NextUserMessage(
513+
status=UserSimulatorStatus.STOP_SIGNAL_DETECTED
514+
)
515+
)
516+
return sim
517+
518+
@pytest.mark.asyncio
519+
async def test_runner_built_from_app_when_provided(
520+
self, runner_cls, mock_session_service, stop_immediately_simulator
521+
):
522+
"""When `app` is passed, Runner is built with `app=` (merged) instead of `agent=`."""
523+
root_agent = BaseAgent(name="root_agent")
524+
user_plugin = _SpyPlugin(name="user_plugin")
525+
app = App(name="my_app", root_agent=root_agent, plugins=[user_plugin])
526+
527+
await EvaluationGenerator._generate_inferences_from_root_agent(
528+
root_agent=root_agent,
529+
user_simulator=stop_immediately_simulator,
530+
app=app,
531+
)
532+
533+
runner_cls.assert_called_once()
534+
kwargs = runner_cls.call_args.kwargs
535+
assert "agent" not in kwargs, (
536+
"Runner must not receive `agent=` when `app=` is provided "
537+
"(would raise ValueError)."
538+
)
539+
assert "plugins" not in kwargs, (
540+
"Runner must not receive `plugins=` when `app=` is provided "
541+
"(would raise ValueError)."
542+
)
543+
runner_app = kwargs["app"]
544+
assert isinstance(runner_app, App)
545+
plugin_names = [p.name for p in runner_app.plugins]
546+
assert "user_plugin" in plugin_names, (
547+
"User plugin must be preserved in the merged App passed to Runner."
548+
)
549+
assert "request_intercepter_plugin" in plugin_names
550+
assert "ensure_retry_options" in plugin_names
551+
552+
@pytest.mark.asyncio
553+
async def test_user_app_is_not_mutated(
554+
self, runner_cls, mock_session_service, stop_immediately_simulator
555+
):
556+
"""The user's App instance must not be mutated across eval runs."""
557+
root_agent = BaseAgent(name="root_agent")
558+
user_plugin = _SpyPlugin(name="user_plugin")
559+
app = App(name="my_app", root_agent=root_agent, plugins=[user_plugin])
560+
original_plugins_id = id(app.plugins)
561+
562+
for _ in range(3):
563+
await EvaluationGenerator._generate_inferences_from_root_agent(
564+
root_agent=root_agent,
565+
user_simulator=stop_immediately_simulator,
566+
app=app,
567+
)
568+
569+
# The user's App instance must still hold exactly its original plugin set,
570+
# regardless of how many eval runs reused it.
571+
assert app.plugins == [user_plugin]
572+
assert id(app.plugins) == original_plugins_id
573+
574+
@pytest.mark.asyncio
575+
async def test_runner_falls_back_to_bare_agent_when_no_app(
576+
self, runner_cls, mock_session_service, stop_immediately_simulator
577+
):
578+
"""When `app` is None, Runner is built with the legacy `agent=`/`plugins=` shape."""
579+
root_agent = BaseAgent(name="root_agent")
580+
581+
await EvaluationGenerator._generate_inferences_from_root_agent(
582+
root_agent=root_agent,
583+
user_simulator=stop_immediately_simulator,
584+
)
585+
586+
runner_cls.assert_called_once()
587+
kwargs = runner_cls.call_args.kwargs
588+
assert "app" not in kwargs
589+
assert kwargs["agent"] is root_agent
590+
plugin_names = [p.name for p in kwargs["plugins"]]
591+
assert plugin_names == [
592+
"request_intercepter_plugin",
593+
"ensure_retry_options",
594+
]
595+
596+
@pytest.mark.asyncio
597+
async def test_root_agent_override_propagates_to_merged_app(
598+
self, runner_cls, mock_session_service, stop_immediately_simulator
599+
):
600+
"""If a sub-agent is passed as root_agent, the merged App reflects that."""
601+
full_root = BaseAgent(name="full_root")
602+
sub_agent = BaseAgent(name="sub_agent")
603+
app = App(name="my_app", root_agent=full_root)
604+
605+
await EvaluationGenerator._generate_inferences_from_root_agent(
606+
root_agent=sub_agent,
607+
user_simulator=stop_immediately_simulator,
608+
app=app,
609+
)
610+
611+
runner_app = runner_cls.call_args.kwargs["app"]
612+
assert runner_app.root_agent is sub_agent
613+
# User's App must be untouched.
614+
assert app.root_agent is full_root

0 commit comments

Comments
 (0)