From 9ea5f3a9f7903aa1591384af6c0f20f032488a70 Mon Sep 17 00:00:00 2001 From: Ilia Shkolyar Date: Mon, 22 Jun 2026 13:17:23 +0300 Subject: [PATCH 1/2] CM-67391: Use S3 presigned upload for secret CLI scans Add secret to PRESIGNED_UPLOAD_SCAN_TYPES and give it the 5GB presigned zip-size limit so async secret scans upload as a single file directly to object storage (BYOS-aware), mirroring SAST. Gate the presigned single-file path on non-sync flow so a --sync secret scan keeps its bounded batched inline upload. Co-Authored-By: Claude Opus 4.8 (1M context) --- cycode/cli/apps/scan/code_scanner.py | 5 +- cycode/cli/consts.py | 3 +- tests/cli/commands/scan/test_code_scanner.py | 52 +++++++++++++++++++- 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/cycode/cli/apps/scan/code_scanner.py b/cycode/cli/apps/scan/code_scanner.py index dc3727e4..642fe225 100644 --- a/cycode/cli/apps/scan/code_scanner.py +++ b/cycode/cli/apps/scan/code_scanner.py @@ -279,7 +279,10 @@ def scan_documents( scan_batch_thread_func = _get_scan_documents_thread_func(ctx, is_git_diff, is_commit_range, scan_parameters) - if should_use_presigned_upload(scan_type): + # Presigned single-file upload is async-only; a --sync scan must stay on the batched inline path + # so it never builds one oversized zip to POST synchronously. + should_use_sync_flow = _should_use_sync_flow(ctx.info_name, scan_type, ctx.obj['sync']) + if should_use_presigned_upload(scan_type) and not should_use_sync_flow: errors, local_scan_results = _run_presigned_upload_scan( scan_batch_thread_func, scan_type, documents_to_scan, progress_bar, printer ) diff --git a/cycode/cli/consts.py b/cycode/cli/consts.py index 9ef19eb2..b61bce17 100644 --- a/cycode/cli/consts.py +++ b/cycode/cli/consts.py @@ -220,12 +220,13 @@ FILE_MAX_SIZE_LIMIT_IN_BYTES = 5000000 PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 5 * 1024 * 1024 * 1024 # 5 GB (S3 presigned POST limit) -PRESIGNED_UPLOAD_SCAN_TYPES = {SAST_SCAN_TYPE} +PRESIGNED_UPLOAD_SCAN_TYPES = {SAST_SCAN_TYPE, SECRET_SCAN_TYPE} DEFAULT_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 20 * 1024 * 1024 ZIP_MAX_SIZE_LIMIT_IN_BYTES = { SCA_SCAN_TYPE: 200 * 1024 * 1024, SAST_SCAN_TYPE: PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES, + SECRET_SCAN_TYPE: PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES, } # scan in batches diff --git a/tests/cli/commands/scan/test_code_scanner.py b/tests/cli/commands/scan/test_code_scanner.py index 8a9f60b7..fb83c317 100644 --- a/tests/cli/commands/scan/test_code_scanner.py +++ b/tests/cli/commands/scan/test_code_scanner.py @@ -2,8 +2,10 @@ from os.path import normpath from unittest.mock import MagicMock, Mock, patch +import pytest + from cycode.cli import consts -from cycode.cli.apps.scan.code_scanner import scan_disk_files +from cycode.cli.apps.scan.code_scanner import scan_disk_files, scan_documents from cycode.cli.files_collector.file_excluder import _is_file_relevant_for_sca_scan from cycode.cli.files_collector.path_documents import _generate_document from cycode.cli.models import Document @@ -162,3 +164,51 @@ def test_entrypoint_cycode_not_added_for_single_file( assert len(entrypoint_docs) == 0 # Verify only the original documents are present assert len(documents_passed) == len(mock_documents) + + +@pytest.mark.parametrize( + ('scan_type', 'command_scan_type', 'sync_option', 'expect_presigned'), + [ + # SAST keeps uploading directly to S3 via a presigned URL (regression guard for the new sync gate). + (consts.SAST_SCAN_TYPE, 'path', False, True), + # Async secret scans now upload as a single file directly to S3 via a presigned URL. + (consts.SECRET_SCAN_TYPE, 'path', False, True), + # A --sync secret scan must stay on the batched inline path and never build one giant zip. + (consts.SECRET_SCAN_TYPE, 'path', True, False), + ], +) +@patch('cycode.cli.apps.scan.code_scanner.print_local_scan_results') +@patch('cycode.cli.apps.scan.code_scanner.set_issue_detected_by_scan_results') +@patch('cycode.cli.apps.scan.code_scanner.try_set_aggregation_report_url_if_needed') +@patch('cycode.cli.apps.scan.code_scanner.run_parallel_batched_scan') +@patch('cycode.cli.apps.scan.code_scanner._run_presigned_upload_scan') +def test_scan_documents_routes_upload_by_scan_type_and_sync( + mock_presigned_upload: Mock, + mock_batched_scan: Mock, + mock_aggregation: Mock, + mock_set_issue: Mock, + mock_print: Mock, + scan_type: str, + command_scan_type: str, + sync_option: bool, + expect_presigned: bool, +) -> None: + mock_presigned_upload.return_value = ([], []) + mock_batched_scan.return_value = ([], []) + + mock_ctx = MagicMock() + mock_ctx.info_name = command_scan_type + mock_ctx.obj = { + 'scan_type': scan_type, + 'progress_bar': MagicMock(), + 'console_printer': MagicMock(), + 'client': MagicMock(), + 'severity_threshold': None, + 'sync': sync_option, + } + documents = [Document('/repo/file.py', 'content', is_git_diff_format=False)] + + scan_documents(mock_ctx, documents, {}) + + assert mock_presigned_upload.called is expect_presigned + assert mock_batched_scan.called is (not expect_presigned) From 5ee4393577f0c5cd83b504ca464449049f5658fd Mon Sep 17 00:00:00 2001 From: Ilia Shkolyar Date: Thu, 2 Jul 2026 15:40:14 +0300 Subject: [PATCH 2/2] CM-67391: Fix secret presigned upload test mocks and harden fallback Switching secret scans to the presigned upload path routed them through get_upload_link -> S3 -> scan_repository_from_upload_id, none of which were registered by mock_scan_async_responses. The scan hit an unmocked endpoint, returned 0 violations, and failed test_passing_output_option. - Extend mock_scan_async_responses to register the presigned endpoints (upload-link, S3 POST, repository) for presigned scan types, branching on should_use_presigned_upload. - Widen the presigned-upload fallback in _perform_scan to also catch the client's wrapped RequestError/SlowUploadConnectionError, not just raw requests.RequestException. A connection/timeout error from get_upload_link or the scan trigger otherwise never fell back to the Cycode-API upload. - Add a regression test covering the wrapped-exception fallback. Co-Authored-By: Claude Opus 4.8 --- cycode/cli/apps/scan/code_scanner.py | 6 ++- tests/cli/commands/scan/test_code_scanner.py | 27 ++++++++++- .../cyclient/mocked_responses/scan_client.py | 48 +++++++++++++++++-- 3 files changed, 76 insertions(+), 5 deletions(-) diff --git a/cycode/cli/apps/scan/code_scanner.py b/cycode/cli/apps/scan/code_scanner.py index 642fe225..667138fa 100644 --- a/cycode/cli/apps/scan/code_scanner.py +++ b/cycode/cli/apps/scan/code_scanner.py @@ -391,7 +391,11 @@ def _perform_scan( is_commit_range, on_upload_progress, ) - except requests.exceptions.RequestException: + except ( + requests.exceptions.RequestException, + custom_exceptions.RequestError, + custom_exceptions.SlowUploadConnectionError, + ): logger.warning('Direct upload to object storage failed. Falling back to upload via Cycode API. ') return _perform_scan_async( diff --git a/tests/cli/commands/scan/test_code_scanner.py b/tests/cli/commands/scan/test_code_scanner.py index fb83c317..8b4a30b3 100644 --- a/tests/cli/commands/scan/test_code_scanner.py +++ b/tests/cli/commands/scan/test_code_scanner.py @@ -5,7 +5,8 @@ import pytest from cycode.cli import consts -from cycode.cli.apps.scan.code_scanner import scan_disk_files, scan_documents +from cycode.cli.apps.scan.code_scanner import _perform_scan, scan_disk_files, scan_documents +from cycode.cli.exceptions import custom_exceptions from cycode.cli.files_collector.file_excluder import _is_file_relevant_for_sca_scan from cycode.cli.files_collector.path_documents import _generate_document from cycode.cli.models import Document @@ -212,3 +213,27 @@ def test_scan_documents_routes_upload_by_scan_type_and_sync( assert mock_presigned_upload.called is expect_presigned assert mock_batched_scan.called is (not expect_presigned) + + +@patch('cycode.cli.apps.scan.code_scanner._perform_scan_async') +@patch('cycode.cli.apps.scan.code_scanner._perform_scan_v4_async') +def test_perform_scan_falls_back_to_api_when_presigned_upload_raises_wrapped_error( + mock_v4_async: Mock, mock_async: Mock +) -> None: + # RequestConnectionError is a CycodeError, not a requests.RequestException — the fallback must still catch it. + mock_v4_async.side_effect = custom_exceptions.RequestConnectionError + fallback_result = object() + mock_async.return_value = fallback_result + + result = _perform_scan( + cycode_client=MagicMock(), + zipped_documents=MagicMock(), + scan_type=consts.SAST_SCAN_TYPE, + is_git_diff=False, + is_commit_range=False, + scan_parameters={}, + ) + + assert result is fallback_result + mock_v4_async.assert_called_once() + mock_async.assert_called_once() diff --git a/tests/cyclient/mocked_responses/scan_client.py b/tests/cyclient/mocked_responses/scan_client.py index c37c1d8a..b2be7ab5 100644 --- a/tests/cyclient/mocked_responses/scan_client.py +++ b/tests/cyclient/mocked_responses/scan_client.py @@ -5,6 +5,7 @@ import responses +from cycode.cli.utils.scan_utils import should_use_presigned_upload from cycode.cyclient.scan_client import ScanClient from tests.conftest import MOCKED_RESPONSES_PATH @@ -128,6 +129,38 @@ def get_scan_configuration_response(url: str) -> responses.Response: return responses.Response(method=responses.GET, url=url, json=json_response, status=200) +_PRESIGNED_UPLOAD_URL = 'https://cycode-tests.s3.amazonaws.com/presigned-upload' + + +def get_upload_link_url(scan_type: str, scan_client: ScanClient) -> str: + api_url = scan_client.scan_cycode_client.api_url + async_scan_type = scan_client.scan_config.get_async_scan_type(scan_type) + service_url = f'{scan_client.get_scan_service_v4_url_path(scan_type)}/{async_scan_type}/upload-link' + return f'{api_url}/{service_url}' + + +def get_upload_link_response(url: str) -> responses.Response: + json_response = {'upload_id': str(uuid4()), 'url': _PRESIGNED_UPLOAD_URL, 'presigned_post_fields': {}} + return responses.Response(method=responses.GET, url=url, json=json_response, status=200) + + +def get_presigned_upload_response() -> responses.Response: + return responses.Response(method=responses.POST, url=_PRESIGNED_UPLOAD_URL, status=204) + + +def get_scan_from_upload_id_url(scan_type: str, scan_client: ScanClient) -> str: + api_url = scan_client.scan_cycode_client.api_url + async_scan_type = scan_client.scan_config.get_async_scan_type(scan_type) + service_url = f'{scan_client.get_scan_service_v4_url_path(scan_type)}/{async_scan_type}/repository' + return f'{api_url}/{service_url}' + + +def get_scan_from_upload_id_response(url: str, scan_id: Optional[UUID] = None) -> responses.Response: + if not scan_id: + scan_id = uuid4() + return responses.Response(method=responses.POST, url=url, json={'scan_id': str(scan_id)}, status=200) + + def mock_remote_config_responses(responses_module: responses, scan_type: str, scan_client: ScanClient) -> None: responses_module.add(get_scan_configuration_response(get_scan_configuration_url(scan_type, scan_client))) @@ -136,9 +169,18 @@ def mock_scan_async_responses( responses_module: responses, scan_type: str, scan_client: ScanClient, scan_id: UUID, zip_content_path: Path ) -> None: mock_remote_config_responses(responses_module, scan_type, scan_client) - responses_module.add( - get_zipped_file_scan_async_response(get_zipped_file_scan_async_url(scan_type, scan_client), scan_id) - ) + + if should_use_presigned_upload(scan_type): + responses_module.add(get_upload_link_response(get_upload_link_url(scan_type, scan_client))) + responses_module.add(get_presigned_upload_response()) + responses_module.add( + get_scan_from_upload_id_response(get_scan_from_upload_id_url(scan_type, scan_client), scan_id) + ) + else: + responses_module.add( + get_zipped_file_scan_async_response(get_zipped_file_scan_async_url(scan_type, scan_client), scan_id) + ) + responses_module.add(get_scan_details_response(get_scan_details_url(scan_type, scan_id, scan_client), scan_id)) responses_module.add(get_detection_rules_response(get_detection_rules_url(scan_client))) responses_module.add(get_scan_detections_response(get_scan_detections_url(scan_client), scan_id, zip_content_path))