diff --git a/src/__tests__/commands/crawl.test.ts b/src/__tests__/commands/crawl.test.ts index a7de2f120..c10e7179a 100644 --- a/src/__tests__/commands/crawl.test.ts +++ b/src/__tests__/commands/crawl.test.ts @@ -343,7 +343,7 @@ describe('executeCrawl', () => { expect(mockClient.crawl).toHaveBeenCalledWith( 'https://example.com', expect.objectContaining({ - pollInterval: 5000, // Default poll interval + pollInterval: 5, // Default poll interval }) ); expect(result).toEqual({ @@ -371,7 +371,7 @@ describe('executeCrawl', () => { expect(mockClient.crawl).toHaveBeenCalledWith( 'https://example.com', expect.objectContaining({ - pollInterval: 10000, // Converted to milliseconds + pollInterval: 10, // seconds }) ); }); @@ -395,7 +395,7 @@ describe('executeCrawl', () => { expect(mockClient.crawl).toHaveBeenCalledWith( 'https://example.com', expect.objectContaining({ - timeout: 300000, // Converted to milliseconds + timeout: 300, // seconds }) ); }); @@ -422,8 +422,8 @@ describe('executeCrawl', () => { expect(mockClient.crawl).toHaveBeenCalledWith( 'https://example.com', expect.objectContaining({ - pollInterval: 5000, - timeout: 600000, + pollInterval: 5, + timeout: 600, limit: 50, maxDiscoveryDepth: 2, }) @@ -443,13 +443,14 @@ describe('executeCrawl', () => { vi.restoreAllMocks(); vi.useRealTimers(); }); - it('should use custom polling with progress when progress flag is set', async () => { const jobId = '550e8400-e29b-41d4-a716-446655440000'; + const mockStartResponse = { id: jobId, url: 'https://example.com', }; + const mockScrapingStatus = { id: jobId, status: 'scraping', @@ -457,6 +458,7 @@ describe('executeCrawl', () => { completed: 50, data: [], }; + const mockCompletedStatus = { id: jobId, status: 'completed', @@ -466,34 +468,69 @@ describe('executeCrawl', () => { }; mockClient.startCrawl.mockResolvedValue(mockStartResponse); - // First call returns scraping status, second returns completed + mockClient.getCrawlStatus .mockResolvedValueOnce(mockScrapingStatus) .mockResolvedValueOnce(mockCompletedStatus); - // Start the async operation const crawlPromise = executeCrawl({ urlOrJobId: 'https://example.com', wait: true, progress: true, - pollInterval: 0.001, // Very short interval for testing (1ms) + pollInterval: 1, // seconds }); - // Fast-forward timers to resolve the first setTimeout - await vi.advanceTimersByTimeAsync(1); - - // Fast-forward again to resolve the second setTimeout - await vi.advanceTimersByTimeAsync(1); + await vi.advanceTimersByTimeAsync(1000); + await vi.advanceTimersByTimeAsync(1000); const result = await crawlPromise; expect(mockClient.startCrawl).toHaveBeenCalledTimes(1); expect(mockClient.getCrawlStatus).toHaveBeenCalledTimes(2); expect(result.success).toBe(true); + if (result.success && 'data' in result) { expect(result.data.status).toBe('completed'); } }); + + it('should timeout correctly in progress mode', async () => { + const jobId = '550e8400-e29b-41d4-a716-446655440000'; + const mockStartResponse = { + id: jobId, + url: 'https://example.com', + }; + // Always return 'scraping' so crawl never completes (forces timeout) + const mockScrapingStatus = { + id: jobId, + status: 'scraping', + total: 100, + completed: 50, + data: [], + }; + + mockClient.startCrawl.mockResolvedValue(mockStartResponse); + // Always returns scraping (never completes) + mockClient.getCrawlStatus.mockResolvedValue(mockScrapingStatus); + + // Start the async operation + const crawlPromise = executeCrawl({ + urlOrJobId: 'https://example.com', + wait: true, + progress: true, + pollInterval: 1, // 1 second + timeout: 2, // 2 seconds + }); + + // Advance time beyond timeout + await vi.advanceTimersByTimeAsync(3000); + await vi.runAllTimersAsync(); // ensures all async chains resolve + await Promise.resolve(); // Flush microtasks explicitly + + const result = await crawlPromise; + expect(result.success).toBe(false); + expect(result.error).toMatch(/Timeout/i); + }); }); describe('Error handling', () => { diff --git a/src/commands/crawl.ts b/src/commands/crawl.ts index ab00bea61..350b50401 100644 --- a/src/commands/crawl.ts +++ b/src/commands/crawl.ts @@ -57,7 +57,7 @@ export async function executeCrawl( } // Build crawl options - const crawlOptions: any = { + const crawlOptions: Partial & Record = { integration: 'cli', }; @@ -97,15 +97,15 @@ export async function executeCrawl( // If wait mode, use the convenience crawl method with polling if (wait) { - // Set polling options + // Set polling options (SDK expects seconds, not ms) if (pollInterval !== undefined) { - crawlOptions.pollInterval = pollInterval * 1000; // Convert to milliseconds + crawlOptions.pollInterval = pollInterval; // seconds } else { // Default poll interval: 5 seconds - crawlOptions.pollInterval = 5000; + crawlOptions.pollInterval = 5; } if (timeout !== undefined) { - crawlOptions.timeout = timeout * 1000; // Convert to milliseconds + crawlOptions.timeout = timeout; // seconds } // Show progress if requested - use custom polling for better UX @@ -117,13 +117,19 @@ export async function executeCrawl( process.stderr.write(`Crawling ${urlOrJobId}...\n`); process.stderr.write(`Job ID: ${jobId}\n`); - // Poll for status with progress updates - const pollMs = crawlOptions.pollInterval || 5000; + // Converts seconds -> ms Only here + const pollMs = + crawlOptions.pollInterval !== undefined + ? crawlOptions.pollInterval * 1000 + : 5000; const startTime = Date.now(); - const timeoutMs = timeout ? timeout * 1000 : undefined; + const timeoutMs = + crawlOptions.timeout !== undefined + ? crawlOptions.timeout * 1000 + : undefined; while (true) { - await new Promise((resolve) => setTimeout(resolve, pollMs)); + await new Promise((resolve) => setTimeout(resolve, pollMs)); const status = await app.getCrawlStatus(jobId); @@ -145,7 +151,7 @@ export async function executeCrawl( } // Check timeout - if (timeoutMs && Date.now() - startTime > timeoutMs) { + if (timeoutMs !== undefined && Date.now() - startTime > timeoutMs) { process.stderr.write('\n'); return { success: false,