The `git-for-each-ref(1)` command is used to iterate over references present in a repository. In large repositories with millions of references, it would be optimal to paginate this output such that we can start iteration from a given reference. This would avoid having to iterate over all references from the beginning each time when paginating through results. The previous commit added 'seek' functionality to the reference backends. Utilize this and expose a '--start-after' option in 'git-for-each-ref(1)'. When used, the reference iteration seeks to the lexicographically next reference and iterates from there onward. This enables efficient pagination workflows, where the calling script can remember the last provided reference and use that as the starting point for the next set of references: git for-each-ref --count=100 git for-each-ref --count=100 --start-after=refs/heads/branch-100 git for-each-ref --count=100 --start-after=refs/heads/branch-200 Since the reference iterators only allow seeking to a specified marker via the `ref_iterator_seek()`, we introduce a helper function `start_ref_iterator_after()`, which seeks to next reference by simply adding (char) 1 to the marker. We must note that pagination always continues from the provided marker, as such any concurrent reference updates lexicographically behind the marker will not be output. Document the same. Signed-off-by: Karthik Nayak <karthik.188@xxxxxxxxx> --- Documentation/git-for-each-ref.adoc | 10 +- builtin/for-each-ref.c | 8 ++ ref-filter.c | 78 +++++++++++---- ref-filter.h | 1 + t/t6302-for-each-ref-filter.sh | 194 ++++++++++++++++++++++++++++++++++++ 5 files changed, 272 insertions(+), 19 deletions(-) diff --git a/Documentation/git-for-each-ref.adoc b/Documentation/git-for-each-ref.adoc index 5ef89fc0fe..ae61ba642a 100644 --- a/Documentation/git-for-each-ref.adoc +++ b/Documentation/git-for-each-ref.adoc @@ -14,7 +14,7 @@ SYNOPSIS [--points-at=<object>] [--merged[=<object>]] [--no-merged[=<object>]] [--contains[=<object>]] [--no-contains[=<object>]] - [--exclude=<pattern> ...] + [--exclude=<pattern> ...] [--start-after=<marker>] DESCRIPTION ----------- @@ -108,6 +108,14 @@ TAB %(refname)`. --include-root-refs:: List root refs (HEAD and pseudorefs) apart from regular refs. +--start-after=<marker>:: + Allows paginating the output by skipping references up to and including the + specified marker. When paging, it should be noted that references may be + deleted, modified or added between invocations. Output will only yield those + references which follow the marker lexicographically. Output begins from the + first reference that would come after the marker alphabetically. Cannot be + used with general pattern matching or custom sort options. + FIELD NAMES ----------- diff --git a/builtin/for-each-ref.c b/builtin/for-each-ref.c index 3d2207ec77..3f21598046 100644 --- a/builtin/for-each-ref.c +++ b/builtin/for-each-ref.c @@ -13,6 +13,7 @@ static char const * const for_each_ref_usage[] = { N_("git for-each-ref [--points-at <object>]"), N_("git for-each-ref [--merged [<commit>]] [--no-merged [<commit>]]"), N_("git for-each-ref [--contains [<commit>]] [--no-contains [<commit>]]"), + N_("git for-each-ref [--start-after <marker>]"), NULL }; @@ -44,6 +45,7 @@ int cmd_for_each_ref(int argc, OPT_GROUP(""), OPT_INTEGER( 0 , "count", &format.array_opts.max_count, N_("show only <n> matched refs")), OPT_STRING( 0 , "format", &format.format, N_("format"), N_("format to use for the output")), + OPT_STRING( 0 , "start-after", &filter.start_after, N_("start-start"), N_("start iteration after the provided marker")), OPT__COLOR(&format.use_color, N_("respect format colors")), OPT_REF_FILTER_EXCLUDE(&filter), OPT_REF_SORT(&sorting_options), @@ -79,6 +81,9 @@ int cmd_for_each_ref(int argc, if (verify_ref_format(&format)) usage_with_options(for_each_ref_usage, opts); + if (filter.start_after && sorting_options.nr > 1) + die(_("cannot use --start-after with custom sort options")); + sorting = ref_sorting_options(&sorting_options); ref_sorting_set_sort_flags_all(sorting, REF_SORTING_ICASE, icase); filter.ignore_case = icase; @@ -100,6 +105,9 @@ int cmd_for_each_ref(int argc, filter.name_patterns = argv; } + if (filter.start_after && filter.name_patterns && filter.name_patterns[0]) + die(_("cannot use --start-after with patterns")); + if (include_root_refs) flags |= FILTER_REFS_ROOT_REFS | FILTER_REFS_DETACHED_HEAD; diff --git a/ref-filter.c b/ref-filter.c index da663c7ac8..c8a6b7f1af 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2683,6 +2683,41 @@ static int filter_exclude_match(struct ref_filter *filter, const char *refname) return match_pattern(filter->exclude.v, refname, filter->ignore_case); } +/* + * We need to seek to the reference right after a given marker but excluding any + * matching references. So we seek to the lexicographically next reference. + */ +static int start_ref_iterator_after(struct ref_iterator *iter, const char *marker) +{ + struct strbuf sb = STRBUF_INIT; + int ret; + + strbuf_addstr(&sb, marker); + strbuf_addch(&sb, 1); + + ret = ref_iterator_seek(iter, sb.buf, 0); + + strbuf_release(&sb); + return ret; +} + +static int for_each_fullref_with_seek(struct ref_filter *filter, each_ref_fn cb, + void *cb_data, unsigned int flags) +{ + struct ref_iterator *iter; + int ret = 0; + + iter = refs_ref_iterator_begin(get_main_ref_store(the_repository), "", + NULL, 0, flags); + if (filter->start_after) + ret = start_ref_iterator_after(iter, filter->start_after); + + if (ret) + return ret; + + return do_for_each_ref_iterator(iter, cb, cb_data); +} + /* * This is the same as for_each_fullref_in(), but it tries to iterate * only over the patterns we'll care about. Note that it _doesn't_ do a full @@ -2694,8 +2729,8 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter, { if (filter->kind & FILTER_REFS_ROOT_REFS) { /* In this case, we want to print all refs including root refs. */ - return refs_for_each_include_root_refs(get_main_ref_store(the_repository), - cb, cb_data); + return for_each_fullref_with_seek(filter, cb, cb_data, + DO_FOR_EACH_INCLUDE_ROOT_REFS); } if (!filter->match_as_path) { @@ -2704,8 +2739,7 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter, * prefixes like "refs/heads/" etc. are stripped off, * so we have to look at everything: */ - return refs_for_each_fullref_in(get_main_ref_store(the_repository), - "", NULL, cb, cb_data); + return for_each_fullref_with_seek(filter, cb, cb_data, 0); } if (filter->ignore_case) { @@ -2714,14 +2748,12 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter, * so just return everything and let the caller * sort it out. */ - return refs_for_each_fullref_in(get_main_ref_store(the_repository), - "", NULL, cb, cb_data); + return for_each_fullref_with_seek(filter, cb, cb_data, 0); } if (!filter->name_patterns[0]) { /* no patterns; we have to look at everything */ - return refs_for_each_fullref_in(get_main_ref_store(the_repository), - "", filter->exclude.v, cb, cb_data); + return for_each_fullref_with_seek(filter, cb, cb_data, 0); } return refs_for_each_fullref_in_prefixes(get_main_ref_store(the_repository), @@ -3189,6 +3221,7 @@ void filter_is_base(struct repository *r, static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref_fn fn, void *cb_data) { + const char *prefix = NULL; int ret = 0; filter->kind = type & FILTER_REFS_KIND_MASK; @@ -3207,19 +3240,28 @@ static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref * of filter_ref_kind(). */ if (filter->kind == FILTER_REFS_BRANCHES) - ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), - "refs/heads/", NULL, - fn, cb_data); + prefix = "refs/heads/"; else if (filter->kind == FILTER_REFS_REMOTES) - ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), - "refs/remotes/", NULL, - fn, cb_data); + prefix = "refs/remotes/"; else if (filter->kind == FILTER_REFS_TAGS) - ret = refs_for_each_fullref_in(get_main_ref_store(the_repository), - "refs/tags/", NULL, fn, - cb_data); - else if (filter->kind & FILTER_REFS_REGULAR) + prefix = "refs/tags/"; + + if (prefix) { + struct ref_iterator *iter; + + iter = refs_ref_iterator_begin(get_main_ref_store(the_repository), + "", NULL, 0, 0); + + if (filter->start_after) + ret = start_ref_iterator_after(iter, filter->start_after); + else if (prefix) + ret = ref_iterator_seek(iter, prefix, 1); + + if (!ret) + ret = do_for_each_ref_iterator(iter, fn, cb_data); + } else if (filter->kind & FILTER_REFS_REGULAR) { ret = for_each_fullref_in_pattern(filter, fn, cb_data); + } /* * When printing all ref types, HEAD is already included, diff --git a/ref-filter.h b/ref-filter.h index c98c4fbd4c..f22ca94b49 100644 --- a/ref-filter.h +++ b/ref-filter.h @@ -64,6 +64,7 @@ struct ref_array { struct ref_filter { const char **name_patterns; + const char *start_after; struct strvec exclude; struct oid_array points_at; struct commit_list *with_commit; diff --git a/t/t6302-for-each-ref-filter.sh b/t/t6302-for-each-ref-filter.sh index bb02b86c16..e097db6b02 100755 --- a/t/t6302-for-each-ref-filter.sh +++ b/t/t6302-for-each-ref-filter.sh @@ -541,4 +541,198 @@ test_expect_success 'validate worktree atom' ' test_cmp expect actual ' +test_expect_success 'start after with empty value' ' + cat >expect <<-\EOF && + refs/heads/main + refs/heads/main_worktree + refs/heads/side + refs/odd/spot + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after="" >actual && + test_cmp expect actual +' + +test_expect_success 'start after a specific reference' ' + cat >expect <<-\EOF && + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd/spot >actual && + test_cmp expect actual +' + +test_expect_success 'start after a specific reference with partial match' ' + cat >expect <<-\EOF && + refs/odd/spot + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd/sp >actual && + test_cmp expect actual +' + +test_expect_success 'start after, just behind a specific reference' ' + cat >expect <<-\EOF && + refs/odd/spot + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd/parrot >actual && + test_cmp expect actual +' + +test_expect_success 'start after with specific directory match' ' + cat >expect <<-\EOF && + refs/odd/spot + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd >actual && + test_cmp expect actual +' + +test_expect_success 'start after with specific directory and trailing slash' ' + cat >expect <<-\EOF && + refs/odd/spot + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd/ >actual && + test_cmp expect actual +' + +test_expect_success 'start after, just behind a specific directory' ' + cat >expect <<-\EOF && + refs/odd/spot + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/lost >actual && + test_cmp expect actual +' + +test_expect_success 'start after, overflow specific reference length' ' + cat >expect <<-\EOF && + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd/spotnew >actual && + test_cmp expect actual +' + +test_expect_success 'start after, overflow specific reference path' ' + cat >expect <<-\EOF && + refs/tags/annotated-tag + refs/tags/doubly-annotated-tag + refs/tags/doubly-signed-tag + refs/tags/foo1.10 + refs/tags/foo1.3 + refs/tags/foo1.6 + refs/tags/four + refs/tags/one + refs/tags/signed-tag + refs/tags/three + refs/tags/two + EOF + git for-each-ref --format="%(refname)" --start-after=refs/odd/spot/new >actual && + test_cmp expect actual +' + +test_expect_success 'start after, last reference' ' + cat >expect <<-\EOF && + EOF + git for-each-ref --format="%(refname)" --start-after=refs/tags/two >actual && + test_cmp expect actual +' + +test_expect_success 'start after used with a pattern' ' + cat >expect <<-\EOF && + fatal: cannot use --start-after with patterns + EOF + test_must_fail git for-each-ref --format="%(refname)" --start-after=refs/odd/spot refs/tags 2>actual && + test_cmp expect actual +' + +test_expect_success 'start after used with custom sort order' ' + cat >expect <<-\EOF && + fatal: cannot use --start-after with custom sort options + EOF + test_must_fail git for-each-ref --format="%(refname)" --start-after=refs/odd/spot --sort=author 2>actual && + test_cmp expect actual +' + test_done -- 2.49.0