@@ -215,23 +215,24 @@ async def run_evaluation(inference_result):
215215 for inference_result in evaluate_request .inference_results
216216 ]
217217
218- eval_case_results = []
219- app_name = None
220- eval_set_id = None
218+ results_by_set = {}
221219
222220 for evaluation_task in asyncio .as_completed (evaluation_tasks ):
223221 inference_result , eval_case_result = await evaluation_task
224- eval_case_results . append ( eval_case_result )
225- app_name = inference_result .app_name
226- eval_set_id = inference_result . eval_set_id
222+ results_by_set . setdefault ( inference_result . eval_set_id , []). append (
223+ ( inference_result .app_name , eval_case_result )
224+ )
227225 yield eval_case_result
228226
229- if self ._eval_set_results_manager and eval_case_results :
230- self ._eval_set_results_manager .save_eval_set_result (
231- app_name = app_name ,
232- eval_set_id = eval_set_id ,
233- eval_case_results = eval_case_results ,
234- )
227+ if self ._eval_set_results_manager :
228+ for eval_set_id , results in results_by_set .items ():
229+ app_name = results [0 ][0 ]
230+ cases = [r [1 ] for r in results ]
231+ self ._eval_set_results_manager .save_eval_set_result (
232+ app_name = app_name ,
233+ eval_set_id = eval_set_id ,
234+ eval_case_results = cases ,
235+ )
235236
236237 async def _evaluate_single_inference_result (
237238 self , inference_result : InferenceResult , evaluate_config : EvaluateConfig
0 commit comments