From dbcb1b2d8c885bb088ea196539a8a044ba9eef25 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Fri, 8 May 2026 08:39:00 +0200 Subject: [PATCH 1/3] YETUS-983. test-patch confused by multiple commits that include rename Flip dryrun_both_files to prefer cumulative .diff over per-commit .patch. The cumulative diff represents the PR's net change, avoiding stale files left on disk when format-patch stanzas add then rename/delete the same file. Generate the local diff with git diff --binary to preserve binary file handling that GitHub's API .diff endpoint strips. Fall back to the API .diff when local generation fails. --- precommit/src/main/shell/core.d/patchfiles.sh | 15 +++--- precommit/src/main/shell/plugins.d/github.sh | 48 +++++++++-------- precommit/src/main/shell/test-patch.sh | 5 +- precommit/src/test/shell/git_binary_diff.bats | 53 +++++++++++++++++++ 4 files changed, 89 insertions(+), 32 deletions(-) create mode 100644 precommit/src/test/shell/git_binary_diff.bats diff --git a/precommit/src/main/shell/core.d/patchfiles.sh b/precommit/src/main/shell/core.d/patchfiles.sh index 67dec857a..d8dd4d5db 100755 --- a/precommit/src/main/shell/core.d/patchfiles.sh +++ b/precommit/src/main/shell/core.d/patchfiles.sh @@ -346,15 +346,18 @@ function patchfile_dryrun_driver ## @stability evolving function dryrun_both_files { - # always prefer the patch file since git format patch files support a lot more - if [[ -f "${INPUT_PATCH_FILE}" ]] && patchfile_dryrun_driver "${INPUT_PATCH_FILE}"; then - INPUT_APPLY_TYPE="patch" - INPUT_APPLIED_FILE="${INPUT_PATCH_FILE}" - return 0 - elif [[ -f "${INPUT_DIFF_FILE}" ]] && patchfile_dryrun_driver "${INPUT_DIFF_FILE}"; then + # prefer the cumulative diff: it represents the PR's net change and avoids + # stale-file bugs when per-commit .patch stanzas add then rename/delete files + # (YETUS-983). Binary files are preserved when the diff is generated locally + # with --binary. + if [[ -f "${INPUT_DIFF_FILE}" ]] && patchfile_dryrun_driver "${INPUT_DIFF_FILE}"; then INPUT_APPLY_TYPE="diff" INPUT_APPLIED_FILE="${INPUT_DIFF_FILE}" return 0 + elif [[ -f "${INPUT_PATCH_FILE}" ]] && patchfile_dryrun_driver "${INPUT_PATCH_FILE}"; then + INPUT_APPLY_TYPE="patch" + INPUT_APPLIED_FILE="${INPUT_PATCH_FILE}" + return 0 else return 1 fi diff --git a/precommit/src/main/shell/plugins.d/github.sh b/precommit/src/main/shell/plugins.d/github.sh index ede2f0ce5..c5613cec0 100755 --- a/precommit/src/main/shell/plugins.d/github.sh +++ b/precommit/src/main/shell/plugins.d/github.sh @@ -346,6 +346,7 @@ function github_determine_branch ## @param PR number ## @param patch output file ## @param diff output file +## @param skip_patch (optional, default false) - skip format-patch generation ## @return 0 on success ## @return 1 on failure function github_generate_local_diff @@ -353,6 +354,7 @@ function github_generate_local_diff declare input=$1 declare patchout=$2 declare diffout=$3 + declare skip_patch=$4 declare base_ref declare base_sha declare head_sha @@ -415,16 +417,18 @@ function github_generate_local_diff return 1 fi - echo " Generating patch/diff locally via git" - - if ! "${GIT}" format-patch --stdout "${merge_base}..${head_sha}" > "${patchout}" 2>/dev/null; then - yetus_debug "github: git format-patch failed" - popd >/dev/null || true - return 1 + if [[ "${skip_patch}" != true ]]; then + echo " Generating local patch via git format-patch" + if ! "${GIT}" format-patch --stdout "${merge_base}..${head_sha}" > "${patchout}" 2>/dev/null; then + yetus_debug "github: git format-patch failed" + popd >/dev/null || true + return 1 + fi fi - if ! "${GIT}" diff "${merge_base}..${head_sha}" > "${diffout}" 2>/dev/null; then - yetus_debug "github: git diff failed" + echo " Generating local binary diff via git diff --binary" + if ! "${GIT}" diff --binary "${merge_base}..${head_sha}" > "${diffout}" 2>/dev/null; then + yetus_debug "github: git diff --binary failed" popd >/dev/null || true return 1 fi @@ -480,10 +484,8 @@ function github_locate_pr_patch return 1 fi - # we always pull both the .patch and .diff versions - # but set the default to be .patch so that binary files work. - # The downside of this is that the patch files are - # significantly larger and therefore take longer to process + # We pull both .patch and .diff versions. The diff is preferred + # (see dryrun_both_files / YETUS-983); .patch is kept as a fallback. apiurl="${GITHUB_API_URL}/repos/${GITHUB_REPO}/pulls/${input}" @@ -515,7 +517,7 @@ function github_locate_pr_patch "${GITHUB_AUTH[@]}" \ "${GITHUB_API_URL}/repos/${GITHUB_REPO}/pulls/${input}"; then yetus_debug "github_locate_patch: API patch download failed" - if ! github_generate_local_diff "${input}" "${patchout}" "${diffout}"; then + if ! github_generate_local_diff "${input}" "${patchout}" "${diffout}" false; then yetus_debug "github_locate_patch: local git fallback also failed" return 1 fi @@ -523,15 +525,17 @@ function github_locate_pr_patch fi if [[ "${diffgenerated}" != true ]]; then - echo " Diff data at $(date)" - if ! "${CURL}" --silent --fail \ - -H "Accept: application/vnd.github.v3.diff" \ - --output "${diffout}" \ - --location \ - "${GITHUB_AUTH[@]}" \ - "${apiurl}"; then - yetus_debug "github_locate_patch: cannot download diff" - return 1 + if ! github_generate_local_diff "${input}" "${patchout}" "${diffout}" true; then + echo " Local binary diff failed, falling back to API diff at $(date)" + if ! "${CURL}" --silent --fail \ + -H "Accept: application/vnd.github.v3.diff" \ + --output "${diffout}" \ + --location \ + "${GITHUB_AUTH[@]}" \ + "${apiurl}"; then + yetus_debug "github_locate_patch: cannot download diff" + return 1 + fi fi fi diff --git a/precommit/src/main/shell/test-patch.sh b/precommit/src/main/shell/test-patch.sh index 518afd8c2..b471ccf91 100755 --- a/precommit/src/main/shell/test-patch.sh +++ b/precommit/src/main/shell/test-patch.sh @@ -1441,10 +1441,7 @@ function apply_patch_file { if [[ "${INPUT_APPLIED_FILE}" == "${INPUT_DIFF_FILE}" ]]; then - add_vote_table_v2 '-0' patch "" "Used diff version of patch file. Binary files and potentially other changes not applied. Please rebase and squash commits if necessary." - if [[ "${GITHUB_ACTIONS}" == true ]]; then - echo "::warning::Used diff version. Binary files and potentially other changes not applied. Try a rebase and/or squashing commits." - fi + add_vote_table_v2 0 patch "" "Used cumulative diff to apply patch. Per-commit history not preserved." big_console_header "Applying diff to ${PATCH_BRANCH}" else big_console_header "Applying patch to ${PATCH_BRANCH}" diff --git a/precommit/src/test/shell/git_binary_diff.bats b/precommit/src/test/shell/git_binary_diff.bats new file mode 100644 index 000000000..b3e76535b --- /dev/null +++ b/precommit/src/test/shell/git_binary_diff.bats @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load functions_test_helper + +@test "git diff --binary round-trip preserves binary content" { + mkdir -p "${TMP}/repo" + pushd "${TMP}/repo" >/dev/null || return 1 + git init -q --initial-branch=main + git config user.email "test@test.com" + git config user.name "Test" + git config commit.gpgsign false + git config tag.gpgsign false + git config core.hooksPath /dev/null + + echo "initial" > readme.txt + git add readme.txt + git commit -q -m "initial" + + git checkout -q -b feature + printf '\x00\x01\x02\x03BINARY\xff\xfe' > binary.dat + echo "modified" > readme.txt + git add binary.dat readme.txt + git commit -q -m "add binary" + + local merge_base + merge_base=$(git merge-base main feature 2>/dev/null || git merge-base master feature) + git diff --binary "${merge_base}..feature" > "${TMP}/test.diff" + + git checkout -q main 2>/dev/null || git checkout -q master + git apply --binary "${TMP}/test.diff" + + [ -f binary.dat ] + local actual + actual=$(xxd -p binary.dat | tr -d '\n') + [ "${actual}" = "0001020342494e415259fffe" ] + [ "$(cat readme.txt)" = "modified" ] + + popd >/dev/null || return 1 +} From f080d9cecdd966680cbb0b90510f122f8775e729 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Fri, 8 May 2026 09:26:38 +0200 Subject: [PATCH 2/3] Add bats test reproducing the stale-file bug --- precommit/src/test/shell/git_binary_diff.bats | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/precommit/src/test/shell/git_binary_diff.bats b/precommit/src/test/shell/git_binary_diff.bats index b3e76535b..fca864b08 100644 --- a/precommit/src/test/shell/git_binary_diff.bats +++ b/precommit/src/test/shell/git_binary_diff.bats @@ -51,3 +51,58 @@ load functions_test_helper popd >/dev/null || return 1 } + +@test "YETUS-983: format-patch leaves stale file after add-delete, cumulative diff does not" { + mkdir -p "${TMP}/repo" + pushd "${TMP}/repo" >/dev/null || return 1 + git init -q --initial-branch=main + git config user.email "test@test.com" + git config user.name "Test" + git config commit.gpgsign false + git config tag.gpgsign false + git config core.hooksPath /dev/null + + echo "initial" > Helper.java + git add -A && git commit -q -m "initial" + + git checkout -q -b feature + + # commit 1: add BigController + printf 'public class BigController {\n public void doStuff() {}\n}\n' > BigController.java + git add BigController.java && git commit -q -m "add BigController" + + # commit 2: modify BigController + printf 'public class BigController {\n public void doStuff() {}\n public void doMore() {}\n}\n' > BigController.java + git add -A && git commit -q -m "modify BigController" + + # commit 3: delete BigController, add SmallController + git rm -q BigController.java + printf 'public class SmallController {\n public void doStuff() {}\n}\n' > SmallController.java + git add -A && git commit -q -m "replace BigController with SmallController" + + # commit 4: modify SmallController + printf 'public class SmallController {\n public void doStuff() {}\n public void doExtra() {}\n}\n' > SmallController.java + git add -A && git commit -q -m "modify SmallController" + + local merge_base + merge_base=$(git merge-base main feature) + git format-patch --stdout "${merge_base}..feature" > "${TMP}/multi.patch" + git diff --binary "${merge_base}..feature" > "${TMP}/cumulative.diff" + + # format-patch dry-run passes (this is the YETUS-983 trap) + git checkout -q main + git apply --binary --check -p1 "${TMP}/multi.patch" + + # format-patch actual apply "succeeds" but leaves stale file + git apply --binary -p1 "${TMP}/multi.patch" + [ -f BigController.java ] + + # cumulative diff produces correct tree + git checkout -f -q main + git clean -fd -q + git apply --binary -p1 "${TMP}/cumulative.diff" + [ ! -f BigController.java ] + [ -f SmallController.java ] + + popd >/dev/null || return 1 +} From 6a8a8ce0170f722dde71b3a2f13cfab583a06361 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Fri, 8 May 2026 09:39:54 +0200 Subject: [PATCH 3/3] Fix bats tests for CI environment --- precommit/src/test/shell/git_binary_diff.bats | 72 +++++++++++-------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/precommit/src/test/shell/git_binary_diff.bats b/precommit/src/test/shell/git_binary_diff.bats index fca864b08..1c233de87 100644 --- a/precommit/src/test/shell/git_binary_diff.bats +++ b/precommit/src/test/shell/git_binary_diff.bats @@ -16,15 +16,36 @@ load functions_test_helper -@test "git diff --binary round-trip preserves binary content" { - mkdir -p "${TMP}/repo" - pushd "${TMP}/repo" >/dev/null || return 1 +TESTREPOS=() + +init_test_repo() { + local repodir + repodir=$(mktemp -d /tmp/yetus-bats-XXXXXX) + TESTREPOS+=("${repodir}") + unset GIT_DIR GIT_WORK_TREE GIT_DISCOVERY_ACROSS_FILESYSTEM + export GIT_CEILING_DIRECTORIES=/tmp + pushd "${repodir}" >/dev/null || return 1 git init -q --initial-branch=main git config user.email "test@test.com" git config user.name "Test" git config commit.gpgsign false git config tag.gpgsign false git config core.hooksPath /dev/null +} + +teardown() { + popd >/dev/null 2>&1 || true + for d in "${TESTREPOS[@]}"; do + rm -rf "${d}" + done + TESTREPOS=() + if [[ -n "${TMP}" ]]; then + rm -rf "${TMP}" + fi +} + +@test "git diff --binary round-trip preserves binary content" { + init_test_repo echo "initial" > readme.txt git add readme.txt @@ -37,72 +58,61 @@ load functions_test_helper git commit -q -m "add binary" local merge_base - merge_base=$(git merge-base main feature 2>/dev/null || git merge-base master feature) - git diff --binary "${merge_base}..feature" > "${TMP}/test.diff" + merge_base=$(git merge-base main feature) + git diff --binary "${merge_base}..feature" > "${TESTREPOS[0]}/test.diff" - git checkout -q main 2>/dev/null || git checkout -q master - git apply --binary "${TMP}/test.diff" + git checkout -q main + git apply --binary "${TESTREPOS[0]}/test.diff" [ -f binary.dat ] local actual - actual=$(xxd -p binary.dat | tr -d '\n') - [ "${actual}" = "0001020342494e415259fffe" ] + actual=$(od -A n -t x1 binary.dat | tr -d ' \n') + [ "${actual}" = "0001020342494e415259fffe" ] # pragma: allowlist secret [ "$(cat readme.txt)" = "modified" ] - - popd >/dev/null || return 1 } @test "YETUS-983: format-patch leaves stale file after add-delete, cumulative diff does not" { - mkdir -p "${TMP}/repo" - pushd "${TMP}/repo" >/dev/null || return 1 - git init -q --initial-branch=main - git config user.email "test@test.com" - git config user.name "Test" - git config commit.gpgsign false - git config tag.gpgsign false - git config core.hooksPath /dev/null + init_test_repo echo "initial" > Helper.java git add -A && git commit -q -m "initial" git checkout -q -b feature - # commit 1: add BigController printf 'public class BigController {\n public void doStuff() {}\n}\n' > BigController.java git add BigController.java && git commit -q -m "add BigController" - # commit 2: modify BigController printf 'public class BigController {\n public void doStuff() {}\n public void doMore() {}\n}\n' > BigController.java git add -A && git commit -q -m "modify BigController" - # commit 3: delete BigController, add SmallController git rm -q BigController.java printf 'public class SmallController {\n public void doStuff() {}\n}\n' > SmallController.java git add -A && git commit -q -m "replace BigController with SmallController" - # commit 4: modify SmallController printf 'public class SmallController {\n public void doStuff() {}\n public void doExtra() {}\n}\n' > SmallController.java git add -A && git commit -q -m "modify SmallController" - local merge_base + local merge_base patchfile difffile tmpfiles merge_base=$(git merge-base main feature) - git format-patch --stdout "${merge_base}..feature" > "${TMP}/multi.patch" - git diff --binary "${merge_base}..feature" > "${TMP}/cumulative.diff" + tmpfiles=$(mktemp -d /tmp/yetus-bats-XXXXXX) + TESTREPOS+=("${tmpfiles}") + patchfile="${tmpfiles}/multi.patch" + difffile="${tmpfiles}/cumulative.diff" + git format-patch --stdout "${merge_base}..feature" > "${patchfile}" + git diff --binary "${merge_base}..feature" > "${difffile}" # format-patch dry-run passes (this is the YETUS-983 trap) git checkout -q main - git apply --binary --check -p1 "${TMP}/multi.patch" + git apply --binary --check -p1 "${patchfile}" # format-patch actual apply "succeeds" but leaves stale file - git apply --binary -p1 "${TMP}/multi.patch" + git apply --binary -p1 "${patchfile}" [ -f BigController.java ] # cumulative diff produces correct tree git checkout -f -q main git clean -fd -q - git apply --binary -p1 "${TMP}/cumulative.diff" + git apply --binary -p1 "${difffile}" [ ! -f BigController.java ] [ -f SmallController.java ] - - popd >/dev/null || return 1 }