Skip to content

Commit 041e5d9

Browse files
saifer82claude
andcommitted
fix(eval): plumb App through LocalEvalService to fix App.plugins bypass
Closes the loop on https://github.com/google/adk-python/issues/<TBD>: when a project wraps its root agent in `App(root_agent=..., plugins=[...])` and runs `adk eval`, the registered plugins (e.g., `BigQueryAgentAnalyticsPlugin`) now fire on every invocation just like they do for `adk web` / `adk run`. Same applies to `App.context_cache_config` and `App.resumability_config`, which now ride along automatically. Changes: * `LocalEvalService.__init__` accepts an optional `app` keyword argument and forwards it to `_generate_inferences_from_root_agent` for each eval case. * `cli_tools_click.cli_eval` resolves the `App` via `get_app_or_root_agent` and passes it to `LocalEvalService`. * `cli_optimize` (GEPA prompt optimization) also routes through `LocalEvalService` but currently constructs it inside `LocalEvalSampler` with no `app` argument; bringing the optimize path under App-plugin coverage is a separate, narrower follow-up and is intentionally not included here. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c5c4b91 commit 041e5d9

3 files changed

Lines changed: 89 additions & 2 deletions

File tree

src/google/adk/cli/cli_tools_click.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -823,8 +823,8 @@ def cli_eval(
823823
from ..evaluation.simulation.user_simulator_provider import UserSimulatorProvider
824824
from .cli_eval import _collect_eval_results
825825
from .cli_eval import _collect_inferences
826+
from .cli_eval import get_app_or_root_agent
826827
from .cli_eval import get_default_metric_info
827-
from .cli_eval import get_root_agent
828828
from .cli_eval import parse_and_get_evals_to_run
829829
from .cli_eval import pretty_print_eval_result
830830
except ModuleNotFoundError as mnf:
@@ -834,7 +834,7 @@ def cli_eval(
834834
print(f"Using evaluation criteria: {eval_config}")
835835
eval_metrics = get_eval_metrics_from_config(eval_config)
836836

837-
root_agent = get_root_agent(agent_module_file_path)
837+
app, root_agent = get_app_or_root_agent(agent_module_file_path)
838838
app_name = os.path.basename(agent_module_file_path)
839839
agents_dir = os.path.dirname(agent_module_file_path)
840840
eval_sets_manager = None
@@ -940,6 +940,7 @@ def cli_eval(
940940
eval_set_results_manager=eval_set_results_manager,
941941
user_simulator_provider=user_simulator_provider,
942942
metric_evaluator_registry=metric_evaluator_registry,
943+
app=app,
943944
)
944945

945946
inference_results = asyncio.run(

src/google/adk/evaluation/local_eval_service.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from typing_extensions import override
2626

2727
from ..agents.base_agent import BaseAgent
28+
from ..apps.app import App
2829
from ..artifacts.base_artifact_service import BaseArtifactService
2930
from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
3031
from ..errors.not_found_error import NotFoundError
@@ -123,8 +124,20 @@ def __init__(
123124
session_id_supplier: Callable[[], str] = _get_session_id,
124125
user_simulator_provider: UserSimulatorProvider = UserSimulatorProvider(),
125126
memory_service: Optional[BaseMemoryService] = None,
127+
*,
128+
app: Optional[App] = None,
126129
):
130+
"""Initializes a LocalEvalService.
131+
132+
Args:
133+
app: Optional `App` that wraps `root_agent`. When provided, eval runs
134+
are executed through a Runner built from the App, so `app.plugins`,
135+
`app.context_cache_config`, and `app.resumability_config` are
136+
honored during inference. When None, the legacy bare-agent path is
137+
used.
138+
"""
127139
self._root_agent = root_agent
140+
self._app = app
128141
self._eval_sets_manager = eval_sets_manager
129142
metric_evaluator_registry = (
130143
metric_evaluator_registry or DEFAULT_METRIC_EVALUATOR_REGISTRY
@@ -491,6 +504,7 @@ async def _perform_inference_single_eval_item(
491504
session_service=self._session_service,
492505
artifact_service=self._artifact_service,
493506
memory_service=self._memory_service,
507+
app=self._app,
494508
)
495509
)
496510

tests/unittests/evaluation/test_local_eval_service.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from typing import Optional
2020

2121
from google.adk.agents.llm_agent import LlmAgent
22+
from google.adk.apps.app import App
2223
from google.adk.errors.not_found_error import NotFoundError
2324
from google.adk.evaluation.base_eval_service import EvaluateConfig
2425
from google.adk.evaluation.base_eval_service import EvaluateRequest
@@ -791,3 +792,74 @@ def test_copy_invocation_rubrics_to_actual_invocations():
791792
_copy_invocation_rubrics_to_actual_invocations(expected, actual)
792793
assert actual[0].rubrics == [rubric1]
793794
assert actual[1].rubrics == [rubric2]
795+
796+
797+
@pytest.mark.asyncio
798+
async def test_perform_inference_forwards_app_to_evaluation_generator(
799+
dummy_agent, mock_eval_sets_manager, mocker
800+
):
801+
"""LocalEvalService passes its `app` through to _generate_inferences_from_root_agent."""
802+
app = App(name="test_app", root_agent=dummy_agent)
803+
804+
eval_case = EvalCase(eval_id="case-1", conversation=[])
805+
mock_eval_sets_manager.get_eval_set.return_value = EvalSet(
806+
eval_set_id="set-1",
807+
eval_cases=[eval_case],
808+
)
809+
810+
mock_generate = mocker.patch(
811+
"google.adk.evaluation.local_eval_service.EvaluationGenerator._generate_inferences_from_root_agent",
812+
new=mocker.AsyncMock(return_value=[]),
813+
)
814+
815+
service = LocalEvalService(
816+
root_agent=dummy_agent,
817+
eval_sets_manager=mock_eval_sets_manager,
818+
app=app,
819+
)
820+
821+
request = InferenceRequest(
822+
app_name="test_app",
823+
eval_set_id="set-1",
824+
eval_case_ids=["case-1"],
825+
inference_config=InferenceConfig(),
826+
)
827+
async for _ in service.perform_inference(inference_request=request):
828+
pass
829+
830+
mock_generate.assert_awaited_once()
831+
assert mock_generate.await_args.kwargs["app"] is app
832+
833+
834+
@pytest.mark.asyncio
835+
async def test_perform_inference_passes_none_when_no_app(
836+
dummy_agent, mock_eval_sets_manager, mocker
837+
):
838+
"""When LocalEvalService has no `app`, it forwards None (legacy behavior)."""
839+
eval_case = EvalCase(eval_id="case-1", conversation=[])
840+
mock_eval_sets_manager.get_eval_set.return_value = EvalSet(
841+
eval_set_id="set-1",
842+
eval_cases=[eval_case],
843+
)
844+
845+
mock_generate = mocker.patch(
846+
"google.adk.evaluation.local_eval_service.EvaluationGenerator._generate_inferences_from_root_agent",
847+
new=mocker.AsyncMock(return_value=[]),
848+
)
849+
850+
service = LocalEvalService(
851+
root_agent=dummy_agent,
852+
eval_sets_manager=mock_eval_sets_manager,
853+
)
854+
855+
request = InferenceRequest(
856+
app_name="test_app",
857+
eval_set_id="set-1",
858+
eval_case_ids=["case-1"],
859+
inference_config=InferenceConfig(),
860+
)
861+
async for _ in service.perform_inference(inference_request=request):
862+
pass
863+
864+
mock_generate.assert_awaited_once()
865+
assert mock_generate.await_args.kwargs["app"] is None

0 commit comments

Comments
 (0)