Skip to content

Commit 0b5f4bf

Browse files
committed
refactor: group evaluation results by set ID to support multi-set processing and update test assertions
Change-Id: Ifdc28f07307ee65c87a2d1967aebb6074d72f90d
1 parent 3efeee5 commit 0b5f4bf

2 files changed

Lines changed: 14 additions & 13 deletions

File tree

src/google/adk/evaluation/local_eval_service.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -215,23 +215,24 @@ async def run_evaluation(inference_result):
215215
for inference_result in evaluate_request.inference_results
216216
]
217217

218-
eval_case_results = []
219-
app_name = None
220-
eval_set_id = None
218+
results_by_set = {}
221219

222220
for evaluation_task in asyncio.as_completed(evaluation_tasks):
223221
inference_result, eval_case_result = await evaluation_task
224-
eval_case_results.append(eval_case_result)
225-
app_name = inference_result.app_name
226-
eval_set_id = inference_result.eval_set_id
222+
results_by_set.setdefault(inference_result.eval_set_id, []).append(
223+
(inference_result.app_name, eval_case_result)
224+
)
227225
yield eval_case_result
228226

229-
if self._eval_set_results_manager and eval_case_results:
230-
self._eval_set_results_manager.save_eval_set_result(
231-
app_name=app_name,
232-
eval_set_id=eval_set_id,
233-
eval_case_results=eval_case_results,
234-
)
227+
if self._eval_set_results_manager:
228+
for eval_set_id, results in results_by_set.items():
229+
app_name = results[0][0]
230+
cases = [r[1] for r in results]
231+
self._eval_set_results_manager.save_eval_set_result(
232+
app_name=app_name,
233+
eval_set_id=eval_set_id,
234+
eval_case_results=cases,
235+
)
235236

236237
async def _evaluate_single_inference_result(
237238
self, inference_result: InferenceResult, evaluate_config: EvaluateConfig

tests/unittests/cli/utils/test_cli_tools_click.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1297,7 +1297,7 @@ def test_cli_eval_with_eval_set_id(
12971297
eval_set_results = eval_set_results_manager.list_eval_set_results(
12981298
app_name=app_name
12991299
)
1300-
assert len(eval_set_results) == 2
1300+
assert len(eval_set_results) == 1
13011301

13021302

13031303
def test_cli_create_eval_set(tmp_path: Path):

0 commit comments

Comments
 (0)