The cleanup_records function marks some lines as changed before running the actual diff algorithm. For most lines, this is a good performance optimization, but it also marks lines that are surrounded by many changed lines as changed as well. This can cause redundant changes and longer-than- necessary diffs. Whether this results in better-looking diffs is subjective. However, the --minimal flag explicitly requests the shortest possible diff. The performance impact of this change is negligible, and it results in shorter diffs in about 1.3% of diffs in Git's history. Signed-off-by: Niels Glodny <n.glodny@xxxxxxxxxxxxx> --- t/meson.build | 1 + t/t4071-diff-minimal.sh | 16 ++++++++++++++++ xdiff/xprepare.c | 22 +++++++++++++--------- 3 files changed, 30 insertions(+), 9 deletions(-) create mode 100755 t/t4071-diff-minimal.sh diff --git a/t/meson.build b/t/meson.build index bfb744e886..8f2e9d2c50 100644 --- a/t/meson.build +++ b/t/meson.build @@ -501,6 +501,7 @@ integration_tests = [ 't4068-diff-symmetric-merge-base.sh', 't4069-remerge-diff.sh', 't4070-diff-pairs.sh', + 't4071-diff-minimal.sh', 't4100-apply-stat.sh', 't4101-apply-nonl.sh', 't4102-apply-rename.sh', diff --git a/t/t4071-diff-minimal.sh b/t/t4071-diff-minimal.sh new file mode 100755 index 0000000000..3ad759dab4 --- /dev/null +++ b/t/t4071-diff-minimal.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +test_description='minimal diff algorithm' + +. ./test-lib.sh + +test_expect_success 'minimal diff should not mark changes between changed lines' ' + printf "x\nx\nx\nx\n" >pre && + printf "x\nx\nx\nA\nB\nC\nD\nx\nE\nF\nG\n" >post && + test_must_fail git diff --no-index \ + --minimal pre post >diff && + ! grep "+x" diff && + ! grep "-x" diff +' + +test_done diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index c84549f6c5..cb0b6c9fd6 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -61,9 +61,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ xdlclassifier_t *cf, xdfile_t *xdf); static void xdl_free_ctx(xdfile_t *xdf); static int xdl_clean_mmatch(char const *dis, long i, long s, long e); -static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); +static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, + xdfile_t *xdf2, int need_min); static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); -static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); +static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, + xdfile_t *xdf2, int need_min); @@ -279,7 +281,8 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) && (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) && - xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) { + xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2, + (xpp->flags & XDF_NEED_MINIMAL) != 0) < 0) { xdl_free_ctx(&xe->xdf2); xdl_free_ctx(&xe->xdf1); @@ -363,7 +366,8 @@ static int xdl_clean_mmatch(char const *dis, long i, long s, long e) { * matches on the other file. Also, lines that have multiple matches * might be potentially discarded if they happear in a run of discardable. */ -static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { +static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, + xdfile_t *xdf2, int need_min) { long i, nm, nreff, mlim; xrecord_t **recs; xdlclass_t *rcrec; @@ -379,7 +383,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { rcrec = cf->rcrecs[(*recs)->ha]; nm = rcrec ? rcrec->len2 : 0; - dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; + dis1[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1; } if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) @@ -387,7 +391,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { rcrec = cf->rcrecs[(*recs)->ha]; nm = rcrec ? rcrec->len1 : 0; - dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; + dis2[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1; } for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; @@ -449,10 +453,10 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { } -static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { - +static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, + xdfile_t *xdf2, int need_min) { if (xdl_trim_ends(xdf1, xdf2) < 0 || - xdl_cleanup_records(cf, xdf1, xdf2) < 0) { + xdl_cleanup_records(cf, xdf1, xdf2, need_min) < 0) { return -1; } base-commit: f65182a99e545d2f2bc22e6c1c2da192133b16a3 -- 2.34.1