The revision traversal limited by pathspec has optimization when the pathspec has only one element. To support optimization for multiple pathspec items, we need to modify the data structures in struct rev_info. struct rev_info uses bloom_keys and bloom_nr to store the bloom keys corresponding to a single pathspec item. To allow struct rev_info to store bloom keys for multiple pathspec items, a new data structure `struct bloom_keyvec` is introduced. Each `struct bloom_keyvec` corresponds to a single pathspec item. In `struct rev_info`, replace bloom_keys and bloom_nr with bloom_keyvecs and bloom_keyvec_nr. This commit still optimize one pathspec item, thus bloom_keyvec_nr can only be 0 or 1. New bloom_keyvec_* functions are added to create and destroy a keyvec. bloom_filter_contains_vec() is added to check if all key in keyvec is contained in a bloom filter. bloom_keyvec_fill_key() is added to initialize a key in keyvec. Signed-off-by: Lidong Yan <502024330056@xxxxxxxxxxxxxxxx> --- bloom.c | 31 +++++++++++++++++++++++++++++++ bloom.h | 25 +++++++++++++++++++++++++ revision.c | 39 +++++++++++++++++++++------------------ revision.h | 6 +++--- 4 files changed, 80 insertions(+), 21 deletions(-) diff --git a/bloom.c b/bloom.c index 35ff36c31c..877bda0ef3 100644 --- a/bloom.c +++ b/bloom.c @@ -280,6 +280,25 @@ void deinit_bloom_filters(void) deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter); } +struct bloom_keyvec *bloom_keyvec_new(size_t count) +{ + struct bloom_keyvec *vec; + size_t sz = sizeof(struct bloom_keyvec); + sz += count * sizeof(struct bloom_key); + vec = (struct bloom_keyvec *)xcalloc(1, sz); + vec->count = count; + return vec; +} + +void bloom_keyvec_free(struct bloom_keyvec *vec) +{ + if (!vec) + return; + for (size_t nr = 0; nr < vec->count; nr++) + bloom_key_clear(&vec->key[nr]); + free(vec); +} + static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED, const struct hashmap_entry *eptr, const struct hashmap_entry *entry_or_key, @@ -541,6 +560,18 @@ int bloom_filter_contains(const struct bloom_filter *filter, return 1; } +int bloom_filter_contains_vec(const struct bloom_filter *filter, + const struct bloom_keyvec *vec, + const struct bloom_filter_settings *settings) +{ + int ret = 1; + + for (size_t nr = 0; ret > 0 && nr < vec->count; nr++) + ret = bloom_filter_contains(filter, &vec->key[nr], settings); + + return ret; +} + uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len, int version) { diff --git a/bloom.h b/bloom.h index edf14fef3e..3669074f3a 100644 --- a/bloom.h +++ b/bloom.h @@ -74,6 +74,16 @@ struct bloom_key { uint32_t *hashes; }; +/* + * A bloom_keyvec is a vector of bloom_keys, which + * can be used to store multiple keys for a single + * pathspec item. + */ +struct bloom_keyvec { + size_t count; + struct bloom_key key[FLEX_ARRAY]; +}; + int load_bloom_filter_from_graph(struct commit_graph *g, struct bloom_filter *filter, uint32_t graph_pos); @@ -84,6 +94,17 @@ void bloom_key_fill(const char *data, const struct bloom_filter_settings *settings); void bloom_key_clear(struct bloom_key *key); +struct bloom_keyvec *bloom_keyvec_new(size_t count); +void bloom_keyvec_free(struct bloom_keyvec *vec); + +static inline void bloom_keyvec_fill_key(const char *data, size_t len, + struct bloom_keyvec *vec, size_t nr, + const struct bloom_filter_settings *settings) +{ + assert(nr < vec->count); + bloom_key_fill(data, len, &vec->key[nr], settings); +} + void add_key_to_filter(const struct bloom_key *key, struct bloom_filter *filter, const struct bloom_filter_settings *settings); @@ -128,6 +149,10 @@ int bloom_filter_contains(const struct bloom_filter *filter, const struct bloom_key *key, const struct bloom_filter_settings *settings); +int bloom_filter_contains_vec(const struct bloom_filter *filter, + const struct bloom_keyvec *v, + const struct bloom_filter_settings *settings); + uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len, int version); diff --git a/revision.c b/revision.c index 49fc650ac7..7cbb49617d 100644 --- a/revision.c +++ b/revision.c @@ -688,6 +688,7 @@ static int forbid_bloom_filters(struct pathspec *spec) static void prepare_to_use_bloom_filter(struct rev_info *revs) { struct pathspec_item *pi; + struct bloom_keyvec *bloom_keyvec; char *path_alloc = NULL; const char *path, *p; size_t len; @@ -736,19 +737,21 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs) p++; } - revs->bloom_keys_nr = path_component_nr; - ALLOC_ARRAY(revs->bloom_keys, revs->bloom_keys_nr); + revs->bloom_keyvecs_nr = 1; + CALLOC_ARRAY(revs->bloom_keyvecs, 1); + bloom_keyvec = bloom_keyvec_new(path_component_nr); + revs->bloom_keyvecs[0] = bloom_keyvec; - bloom_key_fill(path, len, &revs->bloom_keys[0], - revs->bloom_filter_settings); + bloom_keyvec_fill_key(path, len, bloom_keyvec, 0, + revs->bloom_filter_settings); path_component_nr = 1; p = path + len - 1; while (p > path) { if (*p == '/') - bloom_key_fill(path, p - path, - &revs->bloom_keys[path_component_nr++], - revs->bloom_filter_settings); + bloom_keyvec_fill_key(path, p - path, bloom_keyvec, + path_component_nr++, + revs->bloom_filter_settings); p--; } @@ -764,7 +767,7 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs, struct commit *commit) { struct bloom_filter *filter; - int result = 1, j; + int result = 0; if (!revs->repo->objects->commit_graph) return -1; @@ -779,10 +782,10 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs, return -1; } - for (j = 0; result && j < revs->bloom_keys_nr; j++) { - result = bloom_filter_contains(filter, - &revs->bloom_keys[j], - revs->bloom_filter_settings); + for (size_t nr = 0; !result && nr < revs->bloom_keyvecs_nr; nr++) { + result = bloom_filter_contains_vec(filter, + revs->bloom_keyvecs[nr], + revs->bloom_filter_settings); } if (result) @@ -823,7 +826,7 @@ static int rev_compare_tree(struct rev_info *revs, return REV_TREE_SAME; } - if (revs->bloom_keys_nr && !nth_parent) { + if (revs->bloom_keyvecs_nr && !nth_parent) { bloom_ret = check_maybe_different_in_bloom_filter(revs, commit); if (bloom_ret == 0) @@ -850,7 +853,7 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit, if (!t1) return 0; - if (!nth_parent && revs->bloom_keys_nr) { + if (!nth_parent && revs->bloom_keyvecs_nr) { bloom_ret = check_maybe_different_in_bloom_filter(revs, commit); if (!bloom_ret) return 1; @@ -3230,10 +3233,10 @@ void release_revisions(struct rev_info *revs) line_log_free(revs); oidset_clear(&revs->missing_commits); - for (int i = 0; i < revs->bloom_keys_nr; i++) - bloom_key_clear(&revs->bloom_keys[i]); - FREE_AND_NULL(revs->bloom_keys); - revs->bloom_keys_nr = 0; + for (size_t i = 0; i < revs->bloom_keyvecs_nr; i++) + bloom_keyvec_free(revs->bloom_keyvecs[i]); + FREE_AND_NULL(revs->bloom_keyvecs); + revs->bloom_keyvecs_nr = 0; } static void add_child(struct rev_info *revs, struct commit *parent, struct commit *child) diff --git a/revision.h b/revision.h index 6d369cdad6..ac843f58d0 100644 --- a/revision.h +++ b/revision.h @@ -62,7 +62,7 @@ struct repository; struct rev_info; struct string_list; struct saved_parents; -struct bloom_key; +struct bloom_keyvec; struct bloom_filter_settings; struct option; struct parse_opt_ctx_t; @@ -360,8 +360,8 @@ struct rev_info { /* Commit graph bloom filter fields */ /* The bloom filter key(s) for the pathspec */ - struct bloom_key *bloom_keys; - int bloom_keys_nr; + struct bloom_keyvec **bloom_keyvecs; + int bloom_keyvecs_nr; /* * The bloom filter settings used to generate the key. -- 2.50.0.107.g33b6ec8c79