@@ -6668,6 +6668,8 @@ def test_predefined_metric_retry_fail_on_resource_exhausted(
66686668 genai_errors .ClientError (code = 429 , response_json = error_response_json ),
66696669 genai_errors .ClientError (code = 429 , response_json = error_response_json ),
66706670 genai_errors .ClientError (code = 429 , response_json = error_response_json ),
6671+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
6672+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
66716673 ]
66726674
66736675 result = _evals_common ._execute_evaluation (
@@ -6676,18 +6678,13 @@ def test_predefined_metric_retry_fail_on_resource_exhausted(
66766678 metrics = [metric ],
66776679 )
66786680
6679- assert mock_private_evaluate_instances .call_count == 3
6680- assert mock_sleep .call_count == 2
6681+ assert mock_private_evaluate_instances .call_count == 5
6682+ assert mock_sleep .call_count == 4
66816683 assert len (result .summary_metrics ) == 1
66826684 summary_metric = result .summary_metrics [0 ]
66836685 assert summary_metric .metric_name == "summarization_quality"
66846686 assert summary_metric .mean_score is None
66856687 assert summary_metric .num_cases_error == 1
6686- assert (
6687- "Judge model resource exhausted after 3 retries"
6688- ) in result .eval_case_results [0 ].response_candidate_results [0 ].metric_results [
6689- "summarization_quality"
6690- ].error_message
66916688
66926689
66936690class TestEvaluationDataset :
@@ -7258,3 +7255,134 @@ def test_rate_limiter_no_sleep_when_enough_time_passed(self):
72587255 elapsed = real_time .time () - start
72597256 # 5 calls at 1000 QPS should take ~0.005s, certainly under 1s
72607257 assert elapsed < 1.0
7258+
7259+
7260+ class TestCallWithRetry :
7261+ """Tests for the shared _call_with_retry helper."""
7262+
7263+ @mock .patch ("time.sleep" , return_value = None )
7264+ def test_call_with_retry_success_on_first_try (self , mock_sleep ):
7265+ """Tests that _call_with_retry returns immediately on success."""
7266+ fn = mock .Mock (return_value = "success" )
7267+ result = _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
7268+ assert result == "success"
7269+ assert fn .call_count == 1
7270+ assert mock_sleep .call_count == 0
7271+
7272+ @mock .patch ("time.sleep" , return_value = None )
7273+ def test_call_with_retry_success_after_retries (self , mock_sleep ):
7274+ """Tests that _call_with_retry succeeds after transient failures."""
7275+ error_json = {"error" : {"code" : 429 , "message" : "exhausted" }}
7276+ fn = mock .Mock (
7277+ side_effect = [
7278+ genai_errors .ClientError (code = 429 , response_json = error_json ),
7279+ genai_errors .ClientError (code = 429 , response_json = error_json ),
7280+ "success" ,
7281+ ]
7282+ )
7283+ result = _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
7284+ assert result == "success"
7285+ assert fn .call_count == 3
7286+ assert mock_sleep .call_count == 2
7287+
7288+ @mock .patch ("time.sleep" , return_value = None )
7289+ def test_call_with_retry_raises_after_max_retries (self , mock_sleep ):
7290+ """Tests that _call_with_retry raises after exhausting retries."""
7291+ error_json = {"error" : {"code" : 429 , "message" : "exhausted" }}
7292+ fn = mock .Mock (
7293+ side_effect = genai_errors .ClientError (code = 429 , response_json = error_json )
7294+ )
7295+ with pytest .raises (genai_errors .ClientError ):
7296+ _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
7297+ assert fn .call_count == 5 # _MAX_RETRIES
7298+ assert mock_sleep .call_count == 4
7299+
7300+ @mock .patch ("time.sleep" , return_value = None )
7301+ def test_call_with_retry_retries_on_server_error (self , mock_sleep ):
7302+ """Tests retry on 503 ServiceUnavailable (ServerError)."""
7303+ error_json = {"error" : {"code" : 503 , "message" : "unavailable" }}
7304+ fn = mock .Mock (
7305+ side_effect = [
7306+ genai_errors .ServerError (code = 503 , response_json = error_json ),
7307+ "success" ,
7308+ ]
7309+ )
7310+ result = _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
7311+ assert result == "success"
7312+ assert fn .call_count == 2
7313+
7314+ @mock .patch ("time.sleep" , return_value = None )
7315+ def test_call_with_retry_no_retry_on_non_retryable (self , mock_sleep ):
7316+ """Tests that non-retryable errors are raised immediately."""
7317+ error_json = {"error" : {"code" : 400 , "message" : "bad request" }}
7318+ fn = mock .Mock (
7319+ side_effect = genai_errors .ClientError (code = 400 , response_json = error_json )
7320+ )
7321+ with pytest .raises (genai_errors .ClientError ):
7322+ _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
7323+ assert fn .call_count == 1
7324+ assert mock_sleep .call_count == 0
7325+
7326+
7327+ class TestComputationMetricRetry :
7328+ """Tests for retry behavior in ComputationMetricHandler."""
7329+
7330+ @mock .patch .object (
7331+ _evals_metric_handlers .ComputationMetricHandler ,
7332+ "SUPPORTED_COMPUTATION_METRICS" ,
7333+ frozenset (["bleu" ]),
7334+ )
7335+ @mock .patch ("time.sleep" , return_value = None )
7336+ # fmt: off
7337+ @mock .patch (
7338+ "vertexai._genai.evals.Evals.evaluate_instances"
7339+ )
7340+ # fmt: on
7341+ def test_computation_metric_retry_on_resource_exhausted (
7342+ self ,
7343+ mock_evaluate_instances ,
7344+ mock_sleep ,
7345+ mock_api_client_fixture ,
7346+ ):
7347+ """Tests that ComputationMetricHandler retries on 429."""
7348+ dataset_df = pd .DataFrame (
7349+ [
7350+ {
7351+ "prompt" : "Test prompt" ,
7352+ "response" : "Test response" ,
7353+ "reference" : "Test reference" ,
7354+ }
7355+ ]
7356+ )
7357+ input_dataset = vertexai_genai_types .EvaluationDataset (
7358+ eval_dataset_df = dataset_df
7359+ )
7360+ metric = vertexai_genai_types .Metric (name = "bleu" )
7361+ error_response_json = {
7362+ "error" : {
7363+ "code" : 429 ,
7364+ "message" : "Resource exhausted." ,
7365+ "status" : "RESOURCE_EXHAUSTED" ,
7366+ }
7367+ }
7368+ mock_bleu_result = mock .MagicMock ()
7369+ mock_bleu_result .model_dump .return_value = {
7370+ "bleu_results" : {"bleu_metric_values" : [{"score" : 0.85 }]}
7371+ }
7372+ mock_evaluate_instances .side_effect = [
7373+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
7374+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
7375+ mock_bleu_result ,
7376+ ]
7377+
7378+ result = _evals_common ._execute_evaluation (
7379+ api_client = mock_api_client_fixture ,
7380+ dataset = input_dataset ,
7381+ metrics = [metric ],
7382+ )
7383+
7384+ assert mock_evaluate_instances .call_count == 3
7385+ assert mock_sleep .call_count == 2
7386+ summary_metric = result .summary_metrics [0 ]
7387+ assert summary_metric .metric_name == "bleu"
7388+ assert summary_metric .mean_score == 0.85
0 commit comments