[PATCH v2 2/2] bloom: optimize multiple pathspec items in revision traversal

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Remove `if (spec->nr > 1)` to enable bloom filter given multiple
pathspec items. Add for loop in prepare_to_use_bloom_filter()
to initialize each pathspec item's struct bloom_keyvec. Add for
loop in check_maybe_different_in_bloom_filter() to find if at least one
bloom_keyvec is contained in bloom filter.

Add new function release_revisions_bloom_keyvecs() to free all bloom
keyvec owned by rev_info.

Modify t/t4216 to ensure consistent results between the optimization
for multiple pathspec items using bloom filters and the case without
bloom filter optimization.

Signed-off-by: Lidong Yan <502024330056@xxxxxxxxxxxxxxxx>
---
 revision.c           | 121 ++++++++++++++++++++++++-------------------
 t/t4216-log-bloom.sh |  10 ++--
 2 files changed, 73 insertions(+), 58 deletions(-)

diff --git a/revision.c b/revision.c
index 3aa544c137..5606f6c7f6 100644
--- a/revision.c
+++ b/revision.c
@@ -675,8 +675,6 @@ static int forbid_bloom_filters(struct pathspec *spec)
 {
 	if (spec->has_wildcard)
 		return 1;
-	if (spec->nr > 1)
-		return 1;
 	if (spec->magic & ~PATHSPEC_LITERAL)
 		return 1;
 	if (spec->nr && (spec->items[0].magic & ~PATHSPEC_LITERAL))
@@ -685,6 +683,8 @@ static int forbid_bloom_filters(struct pathspec *spec)
 	return 0;
 }
 
+static void release_revisions_bloom_keyvecs(struct rev_info *revs);
+
 static void prepare_to_use_bloom_filter(struct rev_info *revs)
 {
 	struct pathspec_item *pi;
@@ -692,7 +692,7 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
 	char *path_alloc = NULL;
 	const char *path, *p;
 	size_t len;
-	int path_component_nr = 1;
+	int path_component_nr;
 
 	if (!revs->commits)
 		return;
@@ -709,50 +709,53 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
 	if (!revs->pruning.pathspec.nr)
 		return;
 
-	pi = &revs->pruning.pathspec.items[0];
-
-	/* remove single trailing slash from path, if needed */
-	if (pi->len > 0 && pi->match[pi->len - 1] == '/') {
-		path_alloc = xmemdupz(pi->match, pi->len - 1);
-		path = path_alloc;
-	} else
-		path = pi->match;
-
-	len = strlen(path);
-	if (!len) {
-		revs->bloom_filter_settings = NULL;
-		free(path_alloc);
-		return;
-	}
-
-	p = path;
-	while (*p) {
-		/*
-		 * At this point, the path is normalized to use Unix-style
-		 * path separators. This is required due to how the
-		 * changed-path Bloom filters store the paths.
-		 */
-		if (*p == '/')
-			path_component_nr++;
-		p++;
-	}
-
-	revs->bloom_keyvecs_nr = 1;
-	CALLOC_ARRAY(revs->bloom_keyvecs, 1);
-	bloom_keyvec = create_bloom_keyvec(path_component_nr);
-	revs->bloom_keyvecs[0] = bloom_keyvec;
+	revs->bloom_keyvecs_nr = revs->pruning.pathspec.nr;
+	CALLOC_ARRAY(revs->bloom_keyvecs, revs->bloom_keyvecs_nr);
+	for (int i = 0; i < revs->pruning.pathspec.nr; i++) {
+		pi = &revs->pruning.pathspec.items[i];
+		path_component_nr = 1;
+
+		/* remove single trailing slash from path, if needed */
+		if (pi->len > 0 && pi->match[pi->len - 1] == '/') {
+			path_alloc = xmemdupz(pi->match, pi->len - 1);
+			path = path_alloc;
+		} else
+			path = pi->match;
+
+		len = strlen(path);
+		if (!len)
+			goto fail;
+
+		p = path;
+		while (*p) {
+			/*
+			 * At this point, the path is normalized to use
+			 * Unix-style path separators. This is required due to
+			 * how the changed-path Bloom filters store the paths.
+			 */
+			if (*p == '/')
+				path_component_nr++;
+			p++;
+		}
 
-	fill_bloom_keyvec_key(path, len, bloom_keyvec, 0,
-			      revs->bloom_filter_settings);
-	path_component_nr = 1;
+		bloom_keyvec = create_bloom_keyvec(path_component_nr);
+		revs->bloom_keyvecs[i] = bloom_keyvec;
+
+		fill_bloom_keyvec_key(path, len, bloom_keyvec, 0,
+			       revs->bloom_filter_settings);
+		path_component_nr = 1;
+
+		p = path + len - 1;
+		while (p > path) {
+			if (*p == '/')
+				fill_bloom_keyvec_key(path, p - path,
+					       bloom_keyvec,
+						   path_component_nr++,
+					       revs->bloom_filter_settings);
+			p--;
+		}
 
-	p = path + len - 1;
-	while (p > path) {
-		if (*p == '/')
-			fill_bloom_keyvec_key(path, p - path, bloom_keyvec,
-					      path_component_nr++,
-					      revs->bloom_filter_settings);
-		p--;
+		FREE_AND_NULL(path_alloc);
 	}
 
 	if (trace2_is_enabled() && !bloom_filter_atexit_registered) {
@@ -760,14 +763,19 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
 		bloom_filter_atexit_registered = 1;
 	}
 
+	return;
+
+fail:
+	revs->bloom_filter_settings = NULL;
 	free(path_alloc);
+	release_revisions_bloom_keyvecs(revs);
 }
 
 static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
 						 struct commit *commit)
 {
 	struct bloom_filter *filter;
-	int result = 1, j;
+	int result = 0;
 
 	if (!revs->repo->objects->commit_graph)
 		return -1;
@@ -782,8 +790,11 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
 		return -1;
 	}
 
-	result = bloom_filter_contains_vec(filter, revs->bloom_keyvecs[0],
-					   revs->bloom_filter_settings);
+	for (size_t nr = 0; !result && nr < revs->bloom_keyvecs_nr; nr++) {
+		result = bloom_filter_contains_vec(filter,
+						   revs->bloom_keyvecs[nr],
+						   revs->bloom_filter_settings);
+	}
 
 	if (result)
 		count_bloom_filter_maybe++;
@@ -3201,6 +3212,14 @@ static void release_revisions_mailmap(struct string_list *mailmap)
 
 static void release_revisions_topo_walk_info(struct topo_walk_info *info);
 
+static void release_revisions_bloom_keyvecs(struct rev_info *revs)
+{
+	for (size_t nr = 0; nr < revs->bloom_keyvecs_nr; nr++)
+		destroy_bloom_keyvec(revs->bloom_keyvecs[nr]);
+	FREE_AND_NULL(revs->bloom_keyvecs);
+	revs->bloom_keyvecs_nr = 0;
+}
+
 static void free_void_commit_list(void *list)
 {
 	free_commit_list(list);
@@ -3229,11 +3248,7 @@ void release_revisions(struct rev_info *revs)
 	clear_decoration(&revs->treesame, free);
 	line_log_free(revs);
 	oidset_clear(&revs->missing_commits);
-
-	for (int i = 0; i < revs->bloom_keyvecs_nr; i++)
-		destroy_bloom_keyvec(revs->bloom_keyvecs[i]);
-	FREE_AND_NULL(revs->bloom_keyvecs);
-	revs->bloom_keyvecs_nr = 0;
+	release_revisions_bloom_keyvecs(revs);
 }
 
 static void add_child(struct rev_info *revs, struct commit *parent, struct commit *child)
diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh
index 8910d53cac..46d1900a21 100755
--- a/t/t4216-log-bloom.sh
+++ b/t/t4216-log-bloom.sh
@@ -138,8 +138,8 @@ test_expect_success 'git log with --walk-reflogs does not use Bloom filters' '
 	test_bloom_filters_not_used "--walk-reflogs -- A"
 '
 
-test_expect_success 'git log -- multiple path specs does not use Bloom filters' '
-	test_bloom_filters_not_used "-- file4 A/file1"
+test_expect_success 'git log -- multiple path specs use Bloom filters' '
+	test_bloom_filters_used "-- file4 A/file1"
 '
 
 test_expect_success 'git log -- "." pathspec at root does not use Bloom filters' '
@@ -151,9 +151,9 @@ test_expect_success 'git log with wildcard that resolves to a single path uses B
 	test_bloom_filters_used "-- *renamed"
 '
 
-test_expect_success 'git log with wildcard that resolves to a multiple paths does not uses Bloom filters' '
-	test_bloom_filters_not_used "-- *" &&
-	test_bloom_filters_not_used "-- file*"
+test_expect_success 'git log with wildcard that resolves to a multiple paths uses Bloom filters' '
+	test_bloom_filters_used "-- *" &&
+	test_bloom_filters_used "-- file*"
 '
 
 test_expect_success 'setup - add commit-graph to the chain without Bloom filters' '
-- 
2.50.0.108.g6ae0c543ae





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux