Remove `if (spec->nr > 1)` to enable bloom filter given multiple pathspec. Wrapped for loop around code in prepare_to_use_bloom_filter() to initialize each pathspec's struct bloom_keyvec. Add for loop in check_maybe_different_in_bloom_filter() to find if at least one pathspec's bloom_keyvec is contained in bloom filter. Add new function release_revisions_bloom_keyvecs() to free all bloom keyvec owned by rev_info. Modify t/t4216 to test if bloom filter is still used given multiple pathspec. Signed-off-by: Lidong Yan <502024330056@xxxxxxxxxxxxxxxx> --- revision.c | 122 ++++++++++++++++++++++++------------------- t/t4216-log-bloom.sh | 10 ++-- 2 files changed, 73 insertions(+), 59 deletions(-) diff --git a/revision.c b/revision.c index cf7dc3b3fa..8818f017f3 100644 --- a/revision.c +++ b/revision.c @@ -675,8 +675,6 @@ static int forbid_bloom_filters(struct pathspec *spec) { if (spec->has_wildcard) return 1; - if (spec->nr > 1) - return 1; if (spec->magic & ~PATHSPEC_LITERAL) return 1; if (spec->nr && (spec->items[0].magic & ~PATHSPEC_LITERAL)) @@ -685,6 +683,8 @@ static int forbid_bloom_filters(struct pathspec *spec) return 0; } +static void release_revisions_bloom_keyvecs(struct rev_info *revs); + static void prepare_to_use_bloom_filter(struct rev_info *revs) { struct pathspec_item *pi; @@ -692,7 +692,7 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs) char *path_alloc = NULL; const char *path, *p; size_t len; - int path_component_nr = 1; + int path_component_nr; if (!revs->commits) return; @@ -709,51 +709,53 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs) if (!revs->pruning.pathspec.nr) return; - pi = &revs->pruning.pathspec.items[0]; - - /* remove single trailing slash from path, if needed */ - if (pi->len > 0 && pi->match[pi->len - 1] == '/') { - path_alloc = xmemdupz(pi->match, pi->len - 1); - path = path_alloc; - } else - path = pi->match; - - len = strlen(path); - if (!len) { - revs->bloom_filter_settings = NULL; - free(path_alloc); - return; - } - - p = path; - while (*p) { - /* - * At this point, the path is normalized to use Unix-style - * path separators. This is required due to how the - * changed-path Bloom filters store the paths. - */ - if (*p == '/') - path_component_nr++; - p++; - } - - revs->bloom_keyvecs_nr = 1; - CALLOC_ARRAY(revs->bloom_keyvecs, 1); - bloom_keyvec = &revs->bloom_keyvecs[0]; - bloom_keyvec_init(bloom_keyvec, path_component_nr); + revs->bloom_keyvecs_nr = revs->pruning.pathspec.nr; + CALLOC_ARRAY(revs->bloom_keyvecs, revs->bloom_keyvecs_nr); + for (int i = 0; i < revs->pruning.pathspec.nr; i++) { + pi = &revs->pruning.pathspec.items[i]; + path_component_nr = 1; + + /* remove single trailing slash from path, if needed */ + if (pi->len > 0 && pi->match[pi->len - 1] == '/') { + path_alloc = xmemdupz(pi->match, pi->len - 1); + path = path_alloc; + } else + path = pi->match; + + len = strlen(path); + if (!len) + goto fail; + + p = path; + while (*p) { + /* + * At this point, the path is normalized to use + * Unix-style path separators. This is required due to + * how the changed-path Bloom filters store the paths. + */ + if (*p == '/') + path_component_nr++; + p++; + } - fill_bloom_key(path, len, bloom_keyvec_at(bloom_keyvec, 0), - revs->bloom_filter_settings); - path_component_nr = 1; + bloom_keyvec = &revs->bloom_keyvecs[i]; + bloom_keyvec_init(bloom_keyvec, path_component_nr); + + fill_bloom_key(path, len, bloom_keyvec_at(bloom_keyvec, 0), + revs->bloom_filter_settings); + path_component_nr = 1; + + p = path + len - 1; + while (p > path) { + if (*p == '/') + fill_bloom_key(path, p - path, + bloom_keyvec_at(bloom_keyvec, + path_component_nr++), + revs->bloom_filter_settings); + p--; + } - p = path + len - 1; - while (p > path) { - if (*p == '/') - fill_bloom_key(path, p - path, - bloom_keyvec_at(bloom_keyvec, - path_component_nr++), - revs->bloom_filter_settings); - p--; + FREE_AND_NULL(path_alloc); } if (trace2_is_enabled() && !bloom_filter_atexit_registered) { @@ -761,14 +763,19 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs) bloom_filter_atexit_registered = 1; } + return; + +fail: + revs->bloom_filter_settings = NULL; free(path_alloc); + release_revisions_bloom_keyvecs(revs); } static int check_maybe_different_in_bloom_filter(struct rev_info *revs, struct commit *commit) { struct bloom_filter *filter; - int result = 1, j; + int result = 0; if (!revs->repo->objects->commit_graph) return -1; @@ -783,8 +790,11 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs, return -1; } - result = bloom_filter_contains_vec(filter, &revs->bloom_keyvecs[0], - revs->bloom_filter_settings); + for (int i = 0; !result && i < revs->bloom_keyvecs_nr; i++) { + result = bloom_filter_contains_vec(filter, + &revs->bloom_keyvecs[i], + revs->bloom_filter_settings); + } if (result) count_bloom_filter_maybe++; @@ -3202,6 +3212,14 @@ static void release_revisions_mailmap(struct string_list *mailmap) static void release_revisions_topo_walk_info(struct topo_walk_info *info); +static void release_revisions_bloom_keyvecs(struct rev_info *revs) +{ + for (int i = 0; i < revs->bloom_keyvecs_nr; i++) + bloom_keyvec_clear(&revs->bloom_keyvecs[i]); + FREE_AND_NULL(revs->bloom_keyvecs); + revs->bloom_keyvecs_nr = 0; +} + static void free_void_commit_list(void *list) { free_commit_list(list); @@ -3230,11 +3248,7 @@ void release_revisions(struct rev_info *revs) clear_decoration(&revs->treesame, free); line_log_free(revs); oidset_clear(&revs->missing_commits); - - for (int i = 0; i < revs->bloom_keyvecs_nr; i++) - bloom_keyvec_clear(&revs->bloom_keyvecs[i]); - FREE_AND_NULL(revs->bloom_keyvecs); - revs->bloom_keyvecs_nr = 0; + release_revisions_bloom_keyvecs(revs); } static void add_child(struct rev_info *revs, struct commit *parent, struct commit *child) diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index 8910d53cac..46d1900a21 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -138,8 +138,8 @@ test_expect_success 'git log with --walk-reflogs does not use Bloom filters' ' test_bloom_filters_not_used "--walk-reflogs -- A" ' -test_expect_success 'git log -- multiple path specs does not use Bloom filters' ' - test_bloom_filters_not_used "-- file4 A/file1" +test_expect_success 'git log -- multiple path specs use Bloom filters' ' + test_bloom_filters_used "-- file4 A/file1" ' test_expect_success 'git log -- "." pathspec at root does not use Bloom filters' ' @@ -151,9 +151,9 @@ test_expect_success 'git log with wildcard that resolves to a single path uses B test_bloom_filters_used "-- *renamed" ' -test_expect_success 'git log with wildcard that resolves to a multiple paths does not uses Bloom filters' ' - test_bloom_filters_not_used "-- *" && - test_bloom_filters_not_used "-- file*" +test_expect_success 'git log with wildcard that resolves to a multiple paths uses Bloom filters' ' + test_bloom_filters_used "-- *" && + test_bloom_filters_used "-- file*" ' test_expect_success 'setup - add commit-graph to the chain without Bloom filters' ' -- 2.50.0.108.g6ae0c543ae