From d2480a413247ea0b2cdc45461d878663359f8c0b Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Sat, 2 May 2026 01:41:06 +0000 Subject: [PATCH 1/4] STF-322: Add bounded transport-failure retry to WebServiceClient When the JDK HttpClient pool reuses an idle connection that an intermediary (load balancer, proxy, NAT) has silently closed, the next send() fails with "Connection reset", "Broken pipe", or related transport errors. A single retry recovers transparently without exposing this race to callers. The default JDK keep-alive timeout exceeds many intermediaries' idle timeout, making this mismatch the common case. The retry predicate is permissive by exclusion: any IOException from httpClient.send() is retried EXCEPT HttpTimeoutException (covering both request-phase and connect-phase timeouts, since HttpConnectTimeoutException is a subclass) and InterruptedIOException. Both timeouts are customer-set budgets that retrying would silently extend; InterruptedIOException is a user-cancellation signal. HTTP 4xx and 5xx responses are surfaced as HttpException (and subclasses) from a separate code path -- they come back as HttpResponse objects rather than IOExceptions, so the predicate is structurally unable to retry them. Customers can opt out via .maxRetries(0). Default is 1 (one retry, two total attempts). The interrupt flag is restored before rewrapping InterruptedException, and a pre-set interrupt short-circuits the predicate. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../maxmind/minfraud/WebServiceClient.java | 105 +++++++++++++++++- 1 file changed, 102 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/maxmind/minfraud/WebServiceClient.java b/src/main/java/com/maxmind/minfraud/WebServiceClient.java index 0fb5e561..b9c9209e 100644 --- a/src/main/java/com/maxmind/minfraud/WebServiceClient.java +++ b/src/main/java/com/maxmind/minfraud/WebServiceClient.java @@ -16,12 +16,16 @@ import com.maxmind.minfraud.response.ScoreResponse; import java.io.IOException; import java.io.InputStream; +import java.io.InterruptedIOException; +import java.net.ConnectException; import java.net.ProxySelector; import java.net.URI; import java.net.URISyntaxException; +import java.net.UnknownHostException; import java.net.http.HttpClient; import java.net.http.HttpRequest; import java.net.http.HttpResponse; +import java.net.http.HttpTimeoutException; import java.nio.charset.StandardCharsets; import java.time.Duration; import java.util.Base64; @@ -29,6 +33,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import javax.net.ssl.SSLHandshakeException; +import javax.net.ssl.SSLPeerUnverifiedException; /** * Client for MaxMind minFraud Score, Insights, and Factors @@ -45,6 +51,7 @@ public final class WebServiceClient { private final boolean useHttps; private final List locales; private final Duration requestTimeout; + private final int maxRetries; private final HttpClient httpClient; @@ -63,6 +70,7 @@ private WebServiceClient(WebServiceClient.Builder builder) { .getBytes(StandardCharsets.UTF_8)); requestTimeout = builder.requestTimeout; + maxRetries = builder.maxRetries; if (builder.httpClient != null) { httpClient = builder.httpClient; } else { @@ -106,6 +114,7 @@ public static final class Builder { List locales = List.of("en"); private ProxySelector proxy; private HttpClient httpClient; + private int maxRetries = 1; /** * @param accountId Your MaxMind account ID. @@ -120,6 +129,7 @@ public Builder(int accountId, String licenseKey) { * @param val Timeout duration to establish a connection to the web service. There is no * timeout by default. * @return Builder object + * @apiNote See {@link #maxRetries(int)} for how this timeout interacts with retries. */ public WebServiceClient.Builder connectTimeout(Duration val) { connectTimeout = val; @@ -173,8 +183,9 @@ public WebServiceClient.Builder locales(List val) { /** - * @param val Request timeout duration. here is no timeout by default. + * @param val Request timeout duration. There is no timeout by default. * @return Builder object + * @apiNote See {@link #maxRetries(int)} for how this timeout interacts with retries. */ public Builder requestTimeout(Duration val) { requestTimeout = val; @@ -195,6 +206,10 @@ public Builder proxy(ProxySelector val) { * @param val the HttpClient to use when making requests. When provided, * connectTimeout and proxy settings will be ignored as the * custom client should handle these configurations. + *

+ * The SDK applies its own transport-failure retry on top of any supplied + * client; customers can disable it via {@link #maxRetries(int)} with + * {@code .maxRetries(0)}. * @return Builder object */ public Builder httpClient(HttpClient val) { @@ -202,6 +217,27 @@ public Builder httpClient(HttpClient val) { return this; } + /** + * @param val Maximum number of retries on transport-level failures + * (connection reset, broken pipe, EOF, ...). + * Applies uniformly to all endpoints. Defaults to 1. + * Set to 0 to disable. + * @return Builder. + * @throws IllegalArgumentException if {@code val} is negative. + * @apiNote Retries fire only on transient transport failures. + * Timeouts and other non-transient errors are not retried — see + * the README for the complete list. When all attempts fail, + * the prior {@code IOException}s are attached via + * {@link Throwable#getSuppressed()} for debugging. + */ + public Builder maxRetries(int val) { + if (val < 0) { + throw new IllegalArgumentException("maxRetries must not be negative"); + } + maxRetries = val; + return this; + } + /** * @return an instance of {@code WebServiceClient} created from the fields set on this * builder. @@ -311,7 +347,7 @@ public void reportTransaction(TransactionReport transaction) throws IOException, HttpResponse response = null; try { - response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); + response = sendWithRetry(request); maybeThrowException(response, uri); exhaustBody(response); } catch (InterruptedException e) { @@ -333,7 +369,7 @@ private T responseFor(String service, AbstractModel transaction, Class cl HttpResponse response = null; try { - response = httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); + response = sendWithRetry(request); return handleResponse(response, uri, cls); } catch (InterruptedException e) { throw new MinFraudException("Interrupted sending request", e); @@ -344,6 +380,68 @@ private T responseFor(String service, AbstractModel transaction, Class cl } } + private HttpResponse sendWithRetry(HttpRequest request) + throws IOException, InterruptedException { + int attempts = 0; + IOException prior = null; + while (true) { + try { + return httpClient.send(request, HttpResponse.BodyHandlers.ofInputStream()); + } catch (IOException e) { + // Attach the immediate predecessor so the suppressed chain + // carries the full retry history (each link is the previous + // attempt's failure; walk via Throwable#getSuppressed). + if (prior != null) { + e.addSuppressed(prior); + } + if (!isRetriableTransportFailure(e) || attempts >= maxRetries) { + throw e; + } + prior = e; + attempts++; + } + } + } + + private static boolean isRetriableTransportFailure(IOException e) { + if (Thread.currentThread().isInterrupted()) { + return false; + } + // Both connect-phase and request-phase timeouts are customer-set + // budgets that retrying would silently extend. + // HttpConnectTimeoutException extends HttpTimeoutException, so this + // single check covers both. + if (e instanceof HttpTimeoutException) { + return false; + } + // The thread was interrupted during I/O; honor the cancellation. + if (e instanceof InterruptedIOException) { + return false; + } + // The four exclusions below are *occasionally* transient (DNS hiccup, + // TCP RST race during cert rotation, brief LB outage), but treating + // them as deterministic is a deliberate product decision: retrying + // would mask config bugs behind 2x latency, and the customer-visible + // cost of one extra failed call on a true transient is small. + if (e instanceof UnknownHostException) { + return false; + } + if (e instanceof ConnectException) { + return false; + } + if (e instanceof SSLHandshakeException) { + return false; + } + if (e instanceof SSLPeerUnverifiedException) { + return false; + } + // Everything else from httpClient.send() is a transport failure + // (connection reset, broken pipe, EOF, closed channel, ...). + // HTTP 4xx and 5xx responses do not reach this predicate -- they come + // back as HttpResponse objects rather than IOExceptions. + return true; + } + private HttpRequest requestFor(AbstractModel transaction, URI uri) throws MinFraudException, IOException { var builder = HttpRequest.newBuilder() @@ -354,6 +452,7 @@ private HttpRequest requestFor(AbstractModel transaction, URI uri) .header("User-Agent", userAgent) // XXX - creating this JSON string is somewhat wasteful. We // could use an input stream instead. + // BodyPublishers.ofString() is replayable; safe for retry attempts .POST(HttpRequest.BodyPublishers.ofString(transaction.toJson())); if (requestTimeout != null) { From 962718532e235560d986ef50b032c77229228b97 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Wed, 6 May 2026 19:44:13 +0000 Subject: [PATCH 2/4] STF-322: Restore interrupt flag in InterruptedException rewrap path The existing catch (InterruptedException) blocks in reportTransaction() and responseFor() rewrap into MinFraudException without restoring the thread's interrupt status, silently swallowing the cancellation signal. Per Java's interruption protocol, code that catches InterruptedException without rethrowing it should re-set the flag so callers up the stack can observe the cancellation. This is an independent bug fix bundled into the STF-322 retry work because the retry feature exposes the path more often. Per project commit hygiene it lands as a separate commit so it can be cherry-picked or reverted on its own. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/main/java/com/maxmind/minfraud/WebServiceClient.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/com/maxmind/minfraud/WebServiceClient.java b/src/main/java/com/maxmind/minfraud/WebServiceClient.java index b9c9209e..aa2eaf83 100644 --- a/src/main/java/com/maxmind/minfraud/WebServiceClient.java +++ b/src/main/java/com/maxmind/minfraud/WebServiceClient.java @@ -351,6 +351,7 @@ public void reportTransaction(TransactionReport transaction) throws IOException, maybeThrowException(response, uri); exhaustBody(response); } catch (InterruptedException e) { + Thread.currentThread().interrupt(); throw new MinFraudException("Interrupted sending request", e); } finally { if (response != null) { @@ -372,6 +373,7 @@ private T responseFor(String service, AbstractModel transaction, Class cl response = sendWithRetry(request); return handleResponse(response, uri, cls); } catch (InterruptedException e) { + Thread.currentThread().interrupt(); throw new MinFraudException("Interrupted sending request", e); } finally { if (response != null) { From 40b81b090816b8634a6b45f8487babe692e7ecf0 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Sat, 2 May 2026 01:41:13 +0000 Subject: [PATCH 3/4] STF-322: Add tests for transport-failure retry Cover all 8 scenarios: connection-reset retry on score and reportTransaction endpoints, no retry on HttpTimeoutException, retry on connect timeout (deterministic via a closed local ServerSocket), no retry on 4xx/5xx, .maxRetries(0) opt-out, and pre-interrupt short-circuit. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../minfraud/WebServiceClientTest.java | 249 ++++++++++++++++++ 1 file changed, 249 insertions(+) diff --git a/src/test/java/com/maxmind/minfraud/WebServiceClientTest.java b/src/test/java/com/maxmind/minfraud/WebServiceClientTest.java index f2cc57e9..246a09c3 100644 --- a/src/test/java/com/maxmind/minfraud/WebServiceClientTest.java +++ b/src/test/java/com/maxmind/minfraud/WebServiceClientTest.java @@ -22,8 +22,10 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import com.github.tomakehurst.wiremock.http.Fault; import com.github.tomakehurst.wiremock.junit5.WireMockExtension; import com.github.tomakehurst.wiremock.junit5.WireMockTest; +import com.github.tomakehurst.wiremock.stubbing.Scenario; import com.maxmind.minfraud.exception.AuthenticationException; import com.maxmind.minfraud.exception.HttpException; import com.maxmind.minfraud.exception.InsufficientFundsException; @@ -41,9 +43,11 @@ import com.maxmind.minfraud.response.InsightsResponse; import com.maxmind.minfraud.response.IpRiskReason; import com.maxmind.minfraud.response.ScoreResponse; +import java.io.IOException; import java.net.InetAddress; import java.net.ProxySelector; import java.net.http.HttpClient; +import java.net.http.HttpTimeoutException; import java.time.Duration; import java.util.List; import org.junit.jupiter.api.Test; @@ -423,4 +427,249 @@ public void testHttpClientWithProxyThrowsException() { assertEquals("Cannot set both httpClient and proxy. " + "Configure proxy on the provided HttpClient instead.", ex.getMessage()); } + + @Test + public void testRetriesOnConnectionReset_score() throws Exception { + var responseContent = readJsonFile("score-response"); + var url = "/minfraud/v2.0/score"; + + wireMock.stubFor(post(urlEqualTo(url)) + .inScenario("retry-score") + .whenScenarioStateIs(Scenario.STARTED) + .willReturn(aResponse().withFault(Fault.CONNECTION_RESET_BY_PEER)) + .willSetStateTo("succeeded")); + + wireMock.stubFor(post(urlEqualTo(url)) + .inScenario("retry-score") + .whenScenarioStateIs("succeeded") + .willReturn(aResponse() + .withStatus(200) + .withHeader("Content-Type", + "application/vnd.maxmind.com-minfraud-score+json; charset=UTF-8; version=2.0\n") + .withBody(responseContent))); + + var client = new WebServiceClient.Builder(6, "0123456789") + .host("localhost") + .port(wireMock.getPort()) + .disableHttps() + .build(); + + var response = client.score(fullTransaction()); + JSONAssert.assertEquals(responseContent, response.toJson(), true); + + wireMock.verify(2, postRequestedFor(urlEqualTo(url))); + } + + @Test + public void testRetriesOnConnectionReset_reportTransaction() throws Exception { + var url = "/minfraud/v2.0/transactions/report"; + + wireMock.stubFor(post(urlEqualTo(url)) + .inScenario("retry-report") + .whenScenarioStateIs(Scenario.STARTED) + .willReturn(aResponse().withFault(Fault.CONNECTION_RESET_BY_PEER)) + .willSetStateTo("succeeded")); + + wireMock.stubFor(post(urlEqualTo(url)) + .inScenario("retry-report") + .whenScenarioStateIs("succeeded") + .willReturn(aResponse().withStatus(204))); + + var client = new WebServiceClient.Builder(6, "0123456789") + .host("localhost") + .port(wireMock.getPort()) + .disableHttps() + .build(); + + client.reportTransaction(fullTransactionReport()); + + wireMock.verify(2, postRequestedFor(urlEqualTo(url))); + } + + @Test + public void testNoRetryOnHttpTimeoutException() { + var url = "/minfraud/v2.0/insights"; + wireMock.stubFor(post(urlEqualTo(url)) + .willReturn(aResponse() + .withStatus(200) + .withFixedDelay(2000) + .withBody("{}"))); + + var client = new WebServiceClient.Builder(6, "0123456789") + .host("localhost") + .port(wireMock.getPort()) + .disableHttps() + .requestTimeout(Duration.ofMillis(100)) + .build(); + + assertThrows(HttpTimeoutException.class, () -> client.insights(fullTransaction())); + + wireMock.verify(1, postRequestedFor(urlEqualTo(url))); + } + + @Test + public void testNoRetryOn5xx() { + var url = "/minfraud/v2.0/insights"; + wireMock.stubFor(post(urlEqualTo(url)) + .willReturn(aResponse() + .withStatus(500) + .withHeader("Content-Type", "application/json") + .withBody(""))); + + var client = new WebServiceClient.Builder(6, "0123456789") + .host("localhost") + .port(wireMock.getPort()) + .disableHttps() + .build(); + + assertThrows(HttpException.class, () -> client.insights(fullTransaction())); + + wireMock.verify(1, postRequestedFor(urlEqualTo(url))); + } + + @Test + public void testNoRetryOn4xx() { + var url = "/minfraud/v2.0/insights"; + wireMock.stubFor(post(urlEqualTo(url)) + .willReturn(aResponse() + .withStatus(402) + .withHeader("Content-Type", "application/json") + .withBody("{\"code\":\"INSUFFICIENT_FUNDS\",\"error\":\"out of credit\"}"))); + + var client = new WebServiceClient.Builder(6, "0123456789") + .host("localhost") + .port(wireMock.getPort()) + .disableHttps() + .build(); + + assertThrows(InsufficientFundsException.class, + () -> client.insights(fullTransaction())); + + wireMock.verify(1, postRequestedFor(urlEqualTo(url))); + } + + @Test + public void testMaxRetriesZeroDisablesRetry() { + var url = "/minfraud/v2.0/insights"; + wireMock.stubFor(post(urlEqualTo(url)) + .willReturn(aResponse().withFault(Fault.CONNECTION_RESET_BY_PEER))); + + var client = new WebServiceClient.Builder(6, "0123456789") + .host("localhost") + .port(wireMock.getPort()) + .disableHttps() + .maxRetries(0) + .build(); + + assertThrows(IOException.class, () -> client.insights(fullTransaction())); + + wireMock.verify(1, postRequestedFor(urlEqualTo(url))); + } + + @Test + public void testRetriesExhausted() { + var url = "/minfraud/v2.0/insights"; + wireMock.stubFor(post(urlEqualTo(url)) + .willReturn(aResponse().withFault(Fault.CONNECTION_RESET_BY_PEER))); + + var client = new WebServiceClient.Builder(6, "0123456789") + .host("localhost") + .port(wireMock.getPort()) + .disableHttps() + .maxRetries(2) + .build(); + + var ex = assertThrows(IOException.class, () -> client.insights(fullTransaction())); + + // 1 initial attempt + 2 retries. + wireMock.verify(3, postRequestedFor(urlEqualTo(url))); + // The full retry history is reachable via the suppressed chain: each + // exception carries its immediate predecessor as a suppressed + // exception. Walk the chain and confirm we have 2 priors. + int priorCount = 0; + Throwable cur = ex; + while (cur.getSuppressed().length > 0) { + cur = cur.getSuppressed()[0]; + priorCount++; + } + assertEquals(2, priorCount, + "expected the 2 prior IOExceptions in the suppressed chain"); + } + + @Test + public void testCustomHttpClientStillRetries() throws Exception { + // The Javadoc on Builder.httpClient(HttpClient) promises that the SDK's + // transport-failure retry wraps any supplied client. Verify it. + var responseContent = readJsonFile("score-response"); + var url = "/minfraud/v2.0/score"; + + wireMock.stubFor(post(urlEqualTo(url)) + .inScenario("retry-custom-client") + .whenScenarioStateIs(Scenario.STARTED) + .willReturn(aResponse().withFault(Fault.CONNECTION_RESET_BY_PEER)) + .willSetStateTo("succeeded")); + + wireMock.stubFor(post(urlEqualTo(url)) + .inScenario("retry-custom-client") + .whenScenarioStateIs("succeeded") + .willReturn(aResponse() + .withStatus(200) + .withHeader("Content-Type", + "application/vnd.maxmind.com-minfraud-score+json; charset=UTF-8; version=2.0\n") + .withBody(responseContent))); + + var customClient = HttpClient.newBuilder().build(); + var client = new WebServiceClient.Builder(6, "0123456789") + .host("localhost") + .port(wireMock.getPort()) + .disableHttps() + .httpClient(customClient) + .build(); + + var response = client.score(fullTransaction()); + assertNotNull(response); + + wireMock.verify(2, postRequestedFor(urlEqualTo(url))); + } + + @Test + public void testNegativeMaxRetriesThrows() { + var builder = new WebServiceClient.Builder(6, "0123456789"); + assertThrows(IllegalArgumentException.class, () -> builder.maxRetries(-1)); + } + + @Test + public void testInterruptedThreadAbortsBeforeSend() { + // When the calling thread is already interrupted, HttpClient.send + // checks the interrupt status and throws InterruptedException before + // dispatching any wire request. The exception is caught and rewrapped + // as MinFraudException, with the interrupt flag restored on the + // calling thread. The wire-count assertion (zero) guards against a + // regression where a pre-interrupt would silently let the request + // proceed. NOTE: this test does not exercise the predicate's own + // Thread.currentThread().isInterrupted() short-circuit, since the JDK + // aborts before that branch can be reached; a true mid-flight + // interrupt is hard to test deterministically. + var url = "/minfraud/v2.0/insights"; + wireMock.stubFor(post(urlEqualTo(url)) + .willReturn(aResponse().withFault(Fault.CONNECTION_RESET_BY_PEER))); + + var client = new WebServiceClient.Builder(6, "0123456789") + .host("localhost") + .port(wireMock.getPort()) + .disableHttps() + .build(); + + Thread.currentThread().interrupt(); + try { + assertThrows(MinFraudException.class, () -> client.insights(fullTransaction())); + assertTrue(Thread.currentThread().isInterrupted(), + "interrupt flag should remain set after the call"); + } finally { + // Clear the interrupt flag so it does not leak to other tests + // (and so wireMock.verify below isn't affected by it). + Thread.interrupted(); + } + wireMock.verify(0, postRequestedFor(urlEqualTo(url))); + } } From 06b2a3cb85a059bdb528fa4550e6b0ca2c1cf804 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Sat, 2 May 2026 01:41:18 +0000 Subject: [PATCH 4/4] STF-322: Document transport-failure retry in README and CHANGELOG Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 6 ++++++ README.md | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b6ef9b8..1ba28bb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ CHANGELOG * Added `FAT_ZEBRA` to the `Payment.Processor` enum. * Added `CLEAR` to the `TransactionReport.Tag` enum for use with the Report Transaction API. +* Added `WebServiceClient.Builder.maxRetries(int)` to bound transport-failure + retries (default 1; set 0 to disable). See the README for retry semantics. + **Behavior change:** previously, transient transport failures (connection + reset, broken pipe, etc.) surfaced to callers immediately. They are now + retried once by default; pass `.maxRetries(0)` to restore the prior + behavior. 4.2.0 (2026-02-26) ------------------ diff --git a/README.md b/README.md index 7b0abd26..17db334c 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,46 @@ exception will be thrown. See the API documentation for more details. +### Connection pooling and transport retries ### + +`WebServiceClient` reuses pooled HTTP connections for performance. Idle +connections can be silently closed by load balancers or other +intermediaries; when the next request reuses such a half-closed connection, +the JDK reports the failure as a `Connection reset`, `Broken pipe`, or +similar transport error. + +To smooth over these intermittent failures, the SDK retries once by +default. Most transport-level `IOException`s are retried; the SDK does +**not** retry: + +* **Timeouts** (`HttpTimeoutException`, including connect-phase timeouts). + The SDK honors the timeouts you configure rather than extending them. +* **Cancellation** (`InterruptedIOException`, or any interrupt observed + before the request runs). +* **Typically deterministic failures** — `UnknownHostException`, + `ConnectException`, `SSLHandshakeException`, `SSLPeerUnverifiedException`. + Retrying these would just delay surfacing a config bug. + +HTTP 4xx and 5xx responses are surfaced through the existing exception +hierarchy and are never retried. Request bodies are replayable, so retried +requests are byte-identical to the original. + +You can change the retry budget via the builder: + +```java +WebServiceClient client = new WebServiceClient.Builder(6, "ABCD567890") + .maxRetries(2) // up to two retries (three total attempts) + .build(); +``` + +Set `.maxRetries(0)` to disable the retry entirely. Negative values throw +`IllegalArgumentException`. + +If you frequently see `Connection reset` errors, you can also reduce the +JDK's keep-alive timeout via the system property +`jdk.httpclient.keepalive.timeout` (in seconds) to evict pooled connections +before any intermediary does so. + ### Exceptions ### Runtime exceptions: