diff --git a/precommit/src/main/shell/core.d/patchfiles.sh b/precommit/src/main/shell/core.d/patchfiles.sh index 67dec857a..d8dd4d5db 100755 --- a/precommit/src/main/shell/core.d/patchfiles.sh +++ b/precommit/src/main/shell/core.d/patchfiles.sh @@ -346,15 +346,18 @@ function patchfile_dryrun_driver ## @stability evolving function dryrun_both_files { - # always prefer the patch file since git format patch files support a lot more - if [[ -f "${INPUT_PATCH_FILE}" ]] && patchfile_dryrun_driver "${INPUT_PATCH_FILE}"; then - INPUT_APPLY_TYPE="patch" - INPUT_APPLIED_FILE="${INPUT_PATCH_FILE}" - return 0 - elif [[ -f "${INPUT_DIFF_FILE}" ]] && patchfile_dryrun_driver "${INPUT_DIFF_FILE}"; then + # prefer the cumulative diff: it represents the PR's net change and avoids + # stale-file bugs when per-commit .patch stanzas add then rename/delete files + # (YETUS-983). Binary files are preserved when the diff is generated locally + # with --binary. + if [[ -f "${INPUT_DIFF_FILE}" ]] && patchfile_dryrun_driver "${INPUT_DIFF_FILE}"; then INPUT_APPLY_TYPE="diff" INPUT_APPLIED_FILE="${INPUT_DIFF_FILE}" return 0 + elif [[ -f "${INPUT_PATCH_FILE}" ]] && patchfile_dryrun_driver "${INPUT_PATCH_FILE}"; then + INPUT_APPLY_TYPE="patch" + INPUT_APPLIED_FILE="${INPUT_PATCH_FILE}" + return 0 else return 1 fi diff --git a/precommit/src/main/shell/plugins.d/github.sh b/precommit/src/main/shell/plugins.d/github.sh index ede2f0ce5..c5613cec0 100755 --- a/precommit/src/main/shell/plugins.d/github.sh +++ b/precommit/src/main/shell/plugins.d/github.sh @@ -346,6 +346,7 @@ function github_determine_branch ## @param PR number ## @param patch output file ## @param diff output file +## @param skip_patch (optional, default false) - skip format-patch generation ## @return 0 on success ## @return 1 on failure function github_generate_local_diff @@ -353,6 +354,7 @@ function github_generate_local_diff declare input=$1 declare patchout=$2 declare diffout=$3 + declare skip_patch=$4 declare base_ref declare base_sha declare head_sha @@ -415,16 +417,18 @@ function github_generate_local_diff return 1 fi - echo " Generating patch/diff locally via git" - - if ! "${GIT}" format-patch --stdout "${merge_base}..${head_sha}" > "${patchout}" 2>/dev/null; then - yetus_debug "github: git format-patch failed" - popd >/dev/null || true - return 1 + if [[ "${skip_patch}" != true ]]; then + echo " Generating local patch via git format-patch" + if ! "${GIT}" format-patch --stdout "${merge_base}..${head_sha}" > "${patchout}" 2>/dev/null; then + yetus_debug "github: git format-patch failed" + popd >/dev/null || true + return 1 + fi fi - if ! "${GIT}" diff "${merge_base}..${head_sha}" > "${diffout}" 2>/dev/null; then - yetus_debug "github: git diff failed" + echo " Generating local binary diff via git diff --binary" + if ! "${GIT}" diff --binary "${merge_base}..${head_sha}" > "${diffout}" 2>/dev/null; then + yetus_debug "github: git diff --binary failed" popd >/dev/null || true return 1 fi @@ -480,10 +484,8 @@ function github_locate_pr_patch return 1 fi - # we always pull both the .patch and .diff versions - # but set the default to be .patch so that binary files work. - # The downside of this is that the patch files are - # significantly larger and therefore take longer to process + # We pull both .patch and .diff versions. The diff is preferred + # (see dryrun_both_files / YETUS-983); .patch is kept as a fallback. apiurl="${GITHUB_API_URL}/repos/${GITHUB_REPO}/pulls/${input}" @@ -515,7 +517,7 @@ function github_locate_pr_patch "${GITHUB_AUTH[@]}" \ "${GITHUB_API_URL}/repos/${GITHUB_REPO}/pulls/${input}"; then yetus_debug "github_locate_patch: API patch download failed" - if ! github_generate_local_diff "${input}" "${patchout}" "${diffout}"; then + if ! github_generate_local_diff "${input}" "${patchout}" "${diffout}" false; then yetus_debug "github_locate_patch: local git fallback also failed" return 1 fi @@ -523,15 +525,17 @@ function github_locate_pr_patch fi if [[ "${diffgenerated}" != true ]]; then - echo " Diff data at $(date)" - if ! "${CURL}" --silent --fail \ - -H "Accept: application/vnd.github.v3.diff" \ - --output "${diffout}" \ - --location \ - "${GITHUB_AUTH[@]}" \ - "${apiurl}"; then - yetus_debug "github_locate_patch: cannot download diff" - return 1 + if ! github_generate_local_diff "${input}" "${patchout}" "${diffout}" true; then + echo " Local binary diff failed, falling back to API diff at $(date)" + if ! "${CURL}" --silent --fail \ + -H "Accept: application/vnd.github.v3.diff" \ + --output "${diffout}" \ + --location \ + "${GITHUB_AUTH[@]}" \ + "${apiurl}"; then + yetus_debug "github_locate_patch: cannot download diff" + return 1 + fi fi fi diff --git a/precommit/src/main/shell/test-patch.sh b/precommit/src/main/shell/test-patch.sh index 518afd8c2..b471ccf91 100755 --- a/precommit/src/main/shell/test-patch.sh +++ b/precommit/src/main/shell/test-patch.sh @@ -1441,10 +1441,7 @@ function apply_patch_file { if [[ "${INPUT_APPLIED_FILE}" == "${INPUT_DIFF_FILE}" ]]; then - add_vote_table_v2 '-0' patch "" "Used diff version of patch file. Binary files and potentially other changes not applied. Please rebase and squash commits if necessary." - if [[ "${GITHUB_ACTIONS}" == true ]]; then - echo "::warning::Used diff version. Binary files and potentially other changes not applied. Try a rebase and/or squashing commits." - fi + add_vote_table_v2 0 patch "" "Used cumulative diff to apply patch. Per-commit history not preserved." big_console_header "Applying diff to ${PATCH_BRANCH}" else big_console_header "Applying patch to ${PATCH_BRANCH}" diff --git a/precommit/src/test/shell/git_binary_diff.bats b/precommit/src/test/shell/git_binary_diff.bats new file mode 100644 index 000000000..1c233de87 --- /dev/null +++ b/precommit/src/test/shell/git_binary_diff.bats @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load functions_test_helper + +TESTREPOS=() + +init_test_repo() { + local repodir + repodir=$(mktemp -d /tmp/yetus-bats-XXXXXX) + TESTREPOS+=("${repodir}") + unset GIT_DIR GIT_WORK_TREE GIT_DISCOVERY_ACROSS_FILESYSTEM + export GIT_CEILING_DIRECTORIES=/tmp + pushd "${repodir}" >/dev/null || return 1 + git init -q --initial-branch=main + git config user.email "test@test.com" + git config user.name "Test" + git config commit.gpgsign false + git config tag.gpgsign false + git config core.hooksPath /dev/null +} + +teardown() { + popd >/dev/null 2>&1 || true + for d in "${TESTREPOS[@]}"; do + rm -rf "${d}" + done + TESTREPOS=() + if [[ -n "${TMP}" ]]; then + rm -rf "${TMP}" + fi +} + +@test "git diff --binary round-trip preserves binary content" { + init_test_repo + + echo "initial" > readme.txt + git add readme.txt + git commit -q -m "initial" + + git checkout -q -b feature + printf '\x00\x01\x02\x03BINARY\xff\xfe' > binary.dat + echo "modified" > readme.txt + git add binary.dat readme.txt + git commit -q -m "add binary" + + local merge_base + merge_base=$(git merge-base main feature) + git diff --binary "${merge_base}..feature" > "${TESTREPOS[0]}/test.diff" + + git checkout -q main + git apply --binary "${TESTREPOS[0]}/test.diff" + + [ -f binary.dat ] + local actual + actual=$(od -A n -t x1 binary.dat | tr -d ' \n') + [ "${actual}" = "0001020342494e415259fffe" ] # pragma: allowlist secret + [ "$(cat readme.txt)" = "modified" ] +} + +@test "YETUS-983: format-patch leaves stale file after add-delete, cumulative diff does not" { + init_test_repo + + echo "initial" > Helper.java + git add -A && git commit -q -m "initial" + + git checkout -q -b feature + + printf 'public class BigController {\n public void doStuff() {}\n}\n' > BigController.java + git add BigController.java && git commit -q -m "add BigController" + + printf 'public class BigController {\n public void doStuff() {}\n public void doMore() {}\n}\n' > BigController.java + git add -A && git commit -q -m "modify BigController" + + git rm -q BigController.java + printf 'public class SmallController {\n public void doStuff() {}\n}\n' > SmallController.java + git add -A && git commit -q -m "replace BigController with SmallController" + + printf 'public class SmallController {\n public void doStuff() {}\n public void doExtra() {}\n}\n' > SmallController.java + git add -A && git commit -q -m "modify SmallController" + + local merge_base patchfile difffile tmpfiles + merge_base=$(git merge-base main feature) + tmpfiles=$(mktemp -d /tmp/yetus-bats-XXXXXX) + TESTREPOS+=("${tmpfiles}") + patchfile="${tmpfiles}/multi.patch" + difffile="${tmpfiles}/cumulative.diff" + git format-patch --stdout "${merge_base}..feature" > "${patchfile}" + git diff --binary "${merge_base}..feature" > "${difffile}" + + # format-patch dry-run passes (this is the YETUS-983 trap) + git checkout -q main + git apply --binary --check -p1 "${patchfile}" + + # format-patch actual apply "succeeds" but leaves stale file + git apply --binary -p1 "${patchfile}" + [ -f BigController.java ] + + # cumulative diff produces correct tree + git checkout -f -q main + git clean -fd -q + git apply --binary -p1 "${difffile}" + [ ! -f BigController.java ] + [ -f SmallController.java ] +}