diff --git a/cycode/cli/apps/scan/code_scanner.py b/cycode/cli/apps/scan/code_scanner.py index dc3727e4..667138fa 100644 --- a/cycode/cli/apps/scan/code_scanner.py +++ b/cycode/cli/apps/scan/code_scanner.py @@ -279,7 +279,10 @@ def scan_documents( scan_batch_thread_func = _get_scan_documents_thread_func(ctx, is_git_diff, is_commit_range, scan_parameters) - if should_use_presigned_upload(scan_type): + # Presigned single-file upload is async-only; a --sync scan must stay on the batched inline path + # so it never builds one oversized zip to POST synchronously. + should_use_sync_flow = _should_use_sync_flow(ctx.info_name, scan_type, ctx.obj['sync']) + if should_use_presigned_upload(scan_type) and not should_use_sync_flow: errors, local_scan_results = _run_presigned_upload_scan( scan_batch_thread_func, scan_type, documents_to_scan, progress_bar, printer ) @@ -388,7 +391,11 @@ def _perform_scan( is_commit_range, on_upload_progress, ) - except requests.exceptions.RequestException: + except ( + requests.exceptions.RequestException, + custom_exceptions.RequestError, + custom_exceptions.SlowUploadConnectionError, + ): logger.warning('Direct upload to object storage failed. Falling back to upload via Cycode API. ') return _perform_scan_async( diff --git a/cycode/cli/consts.py b/cycode/cli/consts.py index a134f3f4..37ef2298 100644 --- a/cycode/cli/consts.py +++ b/cycode/cli/consts.py @@ -222,12 +222,13 @@ FILE_MAX_SIZE_LIMIT_IN_BYTES = 5000000 PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 5 * 1024 * 1024 * 1024 # 5 GB (S3 presigned POST limit) -PRESIGNED_UPLOAD_SCAN_TYPES = {SAST_SCAN_TYPE} +PRESIGNED_UPLOAD_SCAN_TYPES = {SAST_SCAN_TYPE, SECRET_SCAN_TYPE} DEFAULT_ZIP_MAX_SIZE_LIMIT_IN_BYTES = 20 * 1024 * 1024 ZIP_MAX_SIZE_LIMIT_IN_BYTES = { SCA_SCAN_TYPE: 200 * 1024 * 1024, SAST_SCAN_TYPE: PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES, + SECRET_SCAN_TYPE: PRESIGNED_LINK_UPLOADED_ZIP_MAX_SIZE_LIMIT_IN_BYTES, } # scan in batches diff --git a/tests/cli/commands/scan/test_code_scanner.py b/tests/cli/commands/scan/test_code_scanner.py index 8a9f60b7..8b4a30b3 100644 --- a/tests/cli/commands/scan/test_code_scanner.py +++ b/tests/cli/commands/scan/test_code_scanner.py @@ -2,8 +2,11 @@ from os.path import normpath from unittest.mock import MagicMock, Mock, patch +import pytest + from cycode.cli import consts -from cycode.cli.apps.scan.code_scanner import scan_disk_files +from cycode.cli.apps.scan.code_scanner import _perform_scan, scan_disk_files, scan_documents +from cycode.cli.exceptions import custom_exceptions from cycode.cli.files_collector.file_excluder import _is_file_relevant_for_sca_scan from cycode.cli.files_collector.path_documents import _generate_document from cycode.cli.models import Document @@ -162,3 +165,75 @@ def test_entrypoint_cycode_not_added_for_single_file( assert len(entrypoint_docs) == 0 # Verify only the original documents are present assert len(documents_passed) == len(mock_documents) + + +@pytest.mark.parametrize( + ('scan_type', 'command_scan_type', 'sync_option', 'expect_presigned'), + [ + # SAST keeps uploading directly to S3 via a presigned URL (regression guard for the new sync gate). + (consts.SAST_SCAN_TYPE, 'path', False, True), + # Async secret scans now upload as a single file directly to S3 via a presigned URL. + (consts.SECRET_SCAN_TYPE, 'path', False, True), + # A --sync secret scan must stay on the batched inline path and never build one giant zip. + (consts.SECRET_SCAN_TYPE, 'path', True, False), + ], +) +@patch('cycode.cli.apps.scan.code_scanner.print_local_scan_results') +@patch('cycode.cli.apps.scan.code_scanner.set_issue_detected_by_scan_results') +@patch('cycode.cli.apps.scan.code_scanner.try_set_aggregation_report_url_if_needed') +@patch('cycode.cli.apps.scan.code_scanner.run_parallel_batched_scan') +@patch('cycode.cli.apps.scan.code_scanner._run_presigned_upload_scan') +def test_scan_documents_routes_upload_by_scan_type_and_sync( + mock_presigned_upload: Mock, + mock_batched_scan: Mock, + mock_aggregation: Mock, + mock_set_issue: Mock, + mock_print: Mock, + scan_type: str, + command_scan_type: str, + sync_option: bool, + expect_presigned: bool, +) -> None: + mock_presigned_upload.return_value = ([], []) + mock_batched_scan.return_value = ([], []) + + mock_ctx = MagicMock() + mock_ctx.info_name = command_scan_type + mock_ctx.obj = { + 'scan_type': scan_type, + 'progress_bar': MagicMock(), + 'console_printer': MagicMock(), + 'client': MagicMock(), + 'severity_threshold': None, + 'sync': sync_option, + } + documents = [Document('/repo/file.py', 'content', is_git_diff_format=False)] + + scan_documents(mock_ctx, documents, {}) + + assert mock_presigned_upload.called is expect_presigned + assert mock_batched_scan.called is (not expect_presigned) + + +@patch('cycode.cli.apps.scan.code_scanner._perform_scan_async') +@patch('cycode.cli.apps.scan.code_scanner._perform_scan_v4_async') +def test_perform_scan_falls_back_to_api_when_presigned_upload_raises_wrapped_error( + mock_v4_async: Mock, mock_async: Mock +) -> None: + # RequestConnectionError is a CycodeError, not a requests.RequestException — the fallback must still catch it. + mock_v4_async.side_effect = custom_exceptions.RequestConnectionError + fallback_result = object() + mock_async.return_value = fallback_result + + result = _perform_scan( + cycode_client=MagicMock(), + zipped_documents=MagicMock(), + scan_type=consts.SAST_SCAN_TYPE, + is_git_diff=False, + is_commit_range=False, + scan_parameters={}, + ) + + assert result is fallback_result + mock_v4_async.assert_called_once() + mock_async.assert_called_once() diff --git a/tests/cyclient/mocked_responses/scan_client.py b/tests/cyclient/mocked_responses/scan_client.py index c37c1d8a..b2be7ab5 100644 --- a/tests/cyclient/mocked_responses/scan_client.py +++ b/tests/cyclient/mocked_responses/scan_client.py @@ -5,6 +5,7 @@ import responses +from cycode.cli.utils.scan_utils import should_use_presigned_upload from cycode.cyclient.scan_client import ScanClient from tests.conftest import MOCKED_RESPONSES_PATH @@ -128,6 +129,38 @@ def get_scan_configuration_response(url: str) -> responses.Response: return responses.Response(method=responses.GET, url=url, json=json_response, status=200) +_PRESIGNED_UPLOAD_URL = 'https://cycode-tests.s3.amazonaws.com/presigned-upload' + + +def get_upload_link_url(scan_type: str, scan_client: ScanClient) -> str: + api_url = scan_client.scan_cycode_client.api_url + async_scan_type = scan_client.scan_config.get_async_scan_type(scan_type) + service_url = f'{scan_client.get_scan_service_v4_url_path(scan_type)}/{async_scan_type}/upload-link' + return f'{api_url}/{service_url}' + + +def get_upload_link_response(url: str) -> responses.Response: + json_response = {'upload_id': str(uuid4()), 'url': _PRESIGNED_UPLOAD_URL, 'presigned_post_fields': {}} + return responses.Response(method=responses.GET, url=url, json=json_response, status=200) + + +def get_presigned_upload_response() -> responses.Response: + return responses.Response(method=responses.POST, url=_PRESIGNED_UPLOAD_URL, status=204) + + +def get_scan_from_upload_id_url(scan_type: str, scan_client: ScanClient) -> str: + api_url = scan_client.scan_cycode_client.api_url + async_scan_type = scan_client.scan_config.get_async_scan_type(scan_type) + service_url = f'{scan_client.get_scan_service_v4_url_path(scan_type)}/{async_scan_type}/repository' + return f'{api_url}/{service_url}' + + +def get_scan_from_upload_id_response(url: str, scan_id: Optional[UUID] = None) -> responses.Response: + if not scan_id: + scan_id = uuid4() + return responses.Response(method=responses.POST, url=url, json={'scan_id': str(scan_id)}, status=200) + + def mock_remote_config_responses(responses_module: responses, scan_type: str, scan_client: ScanClient) -> None: responses_module.add(get_scan_configuration_response(get_scan_configuration_url(scan_type, scan_client))) @@ -136,9 +169,18 @@ def mock_scan_async_responses( responses_module: responses, scan_type: str, scan_client: ScanClient, scan_id: UUID, zip_content_path: Path ) -> None: mock_remote_config_responses(responses_module, scan_type, scan_client) - responses_module.add( - get_zipped_file_scan_async_response(get_zipped_file_scan_async_url(scan_type, scan_client), scan_id) - ) + + if should_use_presigned_upload(scan_type): + responses_module.add(get_upload_link_response(get_upload_link_url(scan_type, scan_client))) + responses_module.add(get_presigned_upload_response()) + responses_module.add( + get_scan_from_upload_id_response(get_scan_from_upload_id_url(scan_type, scan_client), scan_id) + ) + else: + responses_module.add( + get_zipped_file_scan_async_response(get_zipped_file_scan_async_url(scan_type, scan_client), scan_id) + ) + responses_module.add(get_scan_details_response(get_scan_details_url(scan_type, scan_id, scan_client), scan_id)) responses_module.add(get_detection_rules_response(get_detection_rules_url(scan_client))) responses_module.add(get_scan_detections_response(get_scan_detections_url(scan_client), scan_id, zip_content_path))