Skip to content

Commit cf7336f

Browse files
committed
fix: set proper User-Agent and add retry delay
colly's default User-Agent identifies requests as coming from a scraping framework, which causes CDNs like Cloudflare to silently drop connections from CI runners This commit sets a descriptive bot-style User-Agent instead. It also adds a 2s delay before retrying on status 0 and 503, matching the existing retry logic Signed-off-by: vprashar2929 <vibhu.sharma2929@gmail.com>
1 parent 65d9272 commit cf7336f

1 file changed

Lines changed: 8 additions & 3 deletions

File tree

  • pkg/mdformatter/linktransformer

pkg/mdformatter/linktransformer/link.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ func NewValidator(ctx context.Context, logger log.Logger, linksValidateConfig []
218218
linktransformerMetrics := newLinktransformerMetrics(reg)
219219
transport := &http.Transport{
220220
Proxy: http.ProxyFromEnvironment,
221-
ForceAttemptHTTP2: true,
221+
ForceAttemptHTTP2: false,
222222
MaxIdleConns: 100,
223223
IdleConnTimeout: 90 * time.Second,
224224
TLSHandshakeTimeout: 10 * time.Second,
@@ -238,7 +238,7 @@ func NewValidator(ctx context.Context, logger log.Logger, linksValidateConfig []
238238
validateConfig: config,
239239
localLinks: map[string]*[]string{},
240240
remoteLinks: map[string]error{},
241-
c: colly.NewCollector(colly.Async(), colly.StdlibContext(ctx)),
241+
c: colly.NewCollector(colly.Async(), colly.StdlibContext(ctx), colly.UserAgent("Mozilla/5.0 (compatible; mdox/link-checker; +https://github.com/bwplotka/mdox)")),
242242
storage: nil,
243243
destFutures: map[futureKey]*futureResult{},
244244
l: linktransformerMetrics,
@@ -262,6 +262,7 @@ func NewValidator(ctx context.Context, logger log.Logger, linksValidateConfig []
262262
// on API (only search is below 100).
263263
if config.Timeout != "" {
264264
v.c.SetRequestTimeout(config.timeout)
265+
transport.ResponseHeaderTimeout = config.timeout
265266
}
266267

267268
if v.validateConfig.Cache.IsSet() && storage != nil {
@@ -334,7 +335,11 @@ func NewValidator(ctx context.Context, logger log.Logger, linksValidateConfig []
334335
break
335336
}
336337
response.Ctx.Put(numberOfRetriesKey, strconv.Itoa(retries+1))
337-
338+
select {
339+
case <-time.After(2 * time.Second):
340+
case <-v.c.Context.Done():
341+
return
342+
}
338343
if retryErr := response.Request.Retry(); retryErr != nil {
339344
v.remoteLinks[response.Ctx.Get(originalURLKey)] = fmt.Errorf("remote link retry %v: %w", response.Ctx.Get(originalURLKey), err)
340345
break

0 commit comments

Comments
 (0)