[PATCH v2 1/3] commit: convert pop_most_recent_commit() to prio_queue

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



pop_most_recent_commit() calls commit_list_insert_by_date() for parent
commits, which is itself called in a loop.  This can lead to quadratic
complexity if there are many merges.  Replace the commit_list with a
prio_queue to ensure logarithmic worst case complexity and convert all
three users.

Add a performance test that exercises one of them using a pathological
history that consists of 50% merges and 50% root commits to demonstrate
the speedup:

Test                          v2.50.1           HEAD
----------------------------------------------------------------------
1501.2: rev-parse ':/65535'   2.48(2.47+0.00)   0.20(0.19+0.00) -91.9%

Alas, sane histories don't benefit from the conversion much, and
traversing Git's own history takes a 1% performance hit on my machine:

   $ hyperfine -w3 -L git ./git_2.50.1,./git '{git} rev-parse :/^Initial.revision'
   Benchmark 1: ./git_2.50.1 rev-parse :/^Initial.revision
     Time (mean ± σ):      1.071 s ±  0.004 s    [User: 1.052 s, System: 0.017 s]
     Range (min … max):    1.067 s …  1.078 s    10 runs

   Benchmark 2: ./git rev-parse :/^Initial.revision
     Time (mean ± σ):      1.079 s ±  0.003 s    [User: 1.060 s, System: 0.017 s]
     Range (min … max):    1.074 s …  1.083 s    10 runs

   Summary
     ./git_2.50.1 rev-parse :/^Initial.revision ran
       1.01 ± 0.00 times faster than ./git rev-parse :/^Initial.revision

Signed-off-by: René Scharfe <l.s.r@xxxxxx>
---
 commit.c                          |  7 +--
 commit.h                          |  8 ++--
 fetch-pack.c                      | 13 +++---
 object-name.c                     | 10 ++---
 t/meson.build                     |  1 +
 t/perf/p1501-rev-parse-oneline.sh | 71 +++++++++++++++++++++++++++++++
 walker.c                          | 11 +++--
 7 files changed, 100 insertions(+), 21 deletions(-)
 create mode 100755 t/perf/p1501-rev-parse-oneline.sh

diff --git a/commit.c b/commit.c
index 15115125c3..f4712ad9a7 100644
--- a/commit.c
+++ b/commit.c
@@ -31,6 +31,7 @@
 #include "parse.h"
 #include "object-file.h"
 #include "object-file-convert.h"
+#include "prio-queue.h"
 
 static struct commit_extra_header *read_commit_extra_header_lines(const char *buf, size_t len, const char **);
 
@@ -739,17 +740,17 @@ void commit_list_sort_by_date(struct commit_list **list)
 	commit_list_sort(list, commit_list_compare_by_date);
 }
 
-struct commit *pop_most_recent_commit(struct commit_list **list,
+struct commit *pop_most_recent_commit(struct prio_queue *queue,
 				      unsigned int mark)
 {
-	struct commit *ret = pop_commit(list);
+	struct commit *ret = prio_queue_get(queue);
 	struct commit_list *parents = ret->parents;
 
 	while (parents) {
 		struct commit *commit = parents->item;
 		if (!repo_parse_commit(the_repository, commit) && !(commit->object.flags & mark)) {
 			commit->object.flags |= mark;
-			commit_list_insert_by_date(commit, list);
+			prio_queue_put(queue, commit);
 		}
 		parents = parents->next;
 	}
diff --git a/commit.h b/commit.h
index 70c870dae4..9630c076d6 100644
--- a/commit.h
+++ b/commit.h
@@ -201,10 +201,10 @@ const char *repo_logmsg_reencode(struct repository *r,
 
 const char *skip_blank_lines(const char *msg);
 
-/** Removes the first commit from a list sorted by date, and adds all
- * of its parents.
- **/
-struct commit *pop_most_recent_commit(struct commit_list **list,
+struct prio_queue;
+
+/* Removes the first commit from a prio_queue and adds its parents. */
+struct commit *pop_most_recent_commit(struct prio_queue *queue,
 				      unsigned int mark);
 
 struct commit *pop_commit(struct commit_list **stack);
diff --git a/fetch-pack.c b/fetch-pack.c
index 5e74235fc0..8ad5bf2931 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -34,6 +34,7 @@
 #include "commit-graph.h"
 #include "sigchain.h"
 #include "mergesort.h"
+#include "prio-queue.h"
 
 static int transfer_unpack_limit = -1;
 static int fetch_unpack_limit = -1;
@@ -601,7 +602,7 @@ static int find_common(struct fetch_negotiator *negotiator,
 	return count ? retval : 0;
 }
 
-static struct commit_list *complete;
+static struct prio_queue complete = { compare_commits_by_commit_date };
 
 static int mark_complete(const struct object_id *oid)
 {
@@ -609,7 +610,7 @@ static int mark_complete(const struct object_id *oid)
 
 	if (commit && !(commit->object.flags & COMPLETE)) {
 		commit->object.flags |= COMPLETE;
-		commit_list_insert(commit, &complete);
+		prio_queue_put(&complete, commit);
 	}
 	return 0;
 }
@@ -626,9 +627,12 @@ static int mark_complete_oid(const char *refname UNUSED,
 static void mark_recent_complete_commits(struct fetch_pack_args *args,
 					 timestamp_t cutoff)
 {
-	while (complete && cutoff <= complete->item->date) {
+	while (complete.nr) {
+		struct commit *item = prio_queue_peek(&complete);
+		if (item->date < cutoff)
+			break;
 		print_verbose(args, _("Marking %s as complete"),
-			      oid_to_hex(&complete->item->object.oid));
+			      oid_to_hex(&item->object.oid));
 		pop_most_recent_commit(&complete, COMPLETE);
 	}
 }
@@ -798,7 +802,6 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
 		refs_for_each_rawref(get_main_ref_store(the_repository),
 				     mark_complete_oid, NULL);
 		for_each_cached_alternate(NULL, mark_alternate_complete);
-		commit_list_sort_by_date(&complete);
 		if (cutoff)
 			mark_recent_complete_commits(args, cutoff);
 	}
diff --git a/object-name.c b/object-name.c
index ddafe7f9b1..41930609e3 100644
--- a/object-name.c
+++ b/object-name.c
@@ -28,6 +28,7 @@
 #include "commit-reach.h"
 #include "date.h"
 #include "object-file-convert.h"
+#include "prio-queue.h"
 
 static int get_oid_oneline(struct repository *r, const char *, struct object_id *,
 			   const struct commit_list *);
@@ -1461,7 +1462,7 @@ static int get_oid_oneline(struct repository *r,
 			   const char *prefix, struct object_id *oid,
 			   const struct commit_list *list)
 {
-	struct commit_list *copy = NULL, **copy_tail = &copy;
+	struct prio_queue copy = { compare_commits_by_commit_date };
 	const struct commit_list *l;
 	int found = 0;
 	int negative = 0;
@@ -1483,9 +1484,9 @@ static int get_oid_oneline(struct repository *r,
 
 	for (l = list; l; l = l->next) {
 		l->item->object.flags |= ONELINE_SEEN;
-		copy_tail = &commit_list_insert(l->item, copy_tail)->next;
+		prio_queue_put(&copy, l->item);
 	}
-	while (copy) {
+	while (copy.nr) {
 		const char *p, *buf;
 		struct commit *commit;
 		int matches;
@@ -1507,7 +1508,7 @@ static int get_oid_oneline(struct repository *r,
 	regfree(&regex);
 	for (l = list; l; l = l->next)
 		clear_commit_marks(l->item, ONELINE_SEEN);
-	free_commit_list(copy);
+	clear_prio_queue(&copy);
 	return found ? 0 : -1;
 }
 
@@ -2061,7 +2062,6 @@ static enum get_oid_result get_oid_with_context_1(struct repository *repo,
 			cb.list = &list;
 			refs_for_each_ref(get_main_ref_store(repo), handle_one_ref, &cb);
 			refs_head_ref(get_main_ref_store(repo), handle_one_ref, &cb);
-			commit_list_sort_by_date(&list);
 			ret = get_oid_oneline(repo, name + 2, oid, list);
 
 			free_commit_list(list);
diff --git a/t/meson.build b/t/meson.build
index 1af289425d..660d780dcc 100644
--- a/t/meson.build
+++ b/t/meson.build
@@ -1116,6 +1116,7 @@ benchmarks = [
   'perf/p1450-fsck.sh',
   'perf/p1451-fsck-skip-list.sh',
   'perf/p1500-graph-walks.sh',
+  'perf/p1501-rev-parse-oneline.sh',
   'perf/p2000-sparse-operations.sh',
   'perf/p3400-rebase.sh',
   'perf/p3404-rebase-interactive.sh',
diff --git a/t/perf/p1501-rev-parse-oneline.sh b/t/perf/p1501-rev-parse-oneline.sh
new file mode 100755
index 0000000000..538fa9c404
--- /dev/null
+++ b/t/perf/p1501-rev-parse-oneline.sh
@@ -0,0 +1,71 @@
+#!/bin/sh
+
+test_description='Test :/ object name notation'
+
+. ./perf-lib.sh
+
+test_perf_fresh_repo
+
+#
+# Creates lots of merges to make history traversal costly.  In
+# particular it creates 2^($max_level-1)-1 2-way merges on top of
+# 2^($max_level-1) root commits.  E.g., the commit history looks like
+# this for a $max_level of 3:
+#
+#     _1_
+#    /   \
+#   2     3
+#  / \   / \
+# 4   5 6   7
+#
+# The numbers are the fast-import marks, which also are the commit
+# messages.  1 is the HEAD commit and a merge, 2 and 3 are also merges,
+# 4-7 are the root commits.
+#
+build_history () {
+	local max_level="$1" &&
+	local level="${2:-1}" &&
+	local mark="${3:-1}" &&
+	if test $level -eq $max_level
+	then
+		echo "reset refs/heads/master" &&
+		echo "from $ZERO_OID" &&
+		echo "commit refs/heads/master" &&
+		echo "mark :$mark" &&
+		echo "committer C <c@xxxxxxxxxxx> 1234567890 +0000" &&
+		echo "data <<EOF" &&
+		echo "$mark" &&
+		echo "EOF"
+	else
+		local level1=$((level+1)) &&
+		local mark1=$((2*mark)) &&
+		local mark2=$((2*mark+1)) &&
+		build_history $max_level $level1 $mark1 &&
+		build_history $max_level $level1 $mark2 &&
+		echo "commit refs/heads/master" &&
+		echo "mark :$mark" &&
+		echo "committer C <c@xxxxxxxxxxx> 1234567890 +0000" &&
+		echo "data <<EOF" &&
+		echo "$mark" &&
+		echo "EOF" &&
+		echo "from :$mark1" &&
+		echo "merge :$mark2"
+	fi
+}
+
+test_expect_success 'setup' '
+	build_history 16 | git fast-import &&
+	git log --format="%H %s" --reverse >commits &&
+	sed -n -e "s/ .*$//p" -e "q" <commits >expect &&
+	sed -n -e "s/^.* //p" -e "q" <commits >needle
+'
+
+test_perf "rev-parse :/$(cat needle)" '
+	git rev-parse :/$(cat needle) >actual
+'
+
+test_expect_success 'verify result' '
+	test_cmp expect actual
+'
+
+test_done
diff --git a/walker.c b/walker.c
index d131af04c7..8073754517 100644
--- a/walker.c
+++ b/walker.c
@@ -14,6 +14,7 @@
 #include "blob.h"
 #include "refs.h"
 #include "progress.h"
+#include "prio-queue.h"
 
 static struct object_id current_commit_oid;
 
@@ -78,7 +79,7 @@ static int process_tree(struct walker *walker, struct tree *tree)
 #define SEEN		(1U << 1)
 #define TO_SCAN		(1U << 2)
 
-static struct commit_list *complete = NULL;
+static struct prio_queue complete = { compare_commits_by_commit_date };
 
 static int process_commit(struct walker *walker, struct commit *commit)
 {
@@ -87,7 +88,10 @@ static int process_commit(struct walker *walker, struct commit *commit)
 	if (repo_parse_commit(the_repository, commit))
 		return -1;
 
-	while (complete && complete->item->date >= commit->date) {
+	while (complete.nr) {
+		struct commit *item = prio_queue_peek(&complete);
+		if (item->date < commit->date)
+			break;
 		pop_most_recent_commit(&complete, COMPLETE);
 	}
 
@@ -233,7 +237,7 @@ static int mark_complete(const char *path UNUSED,
 
 	if (commit) {
 		commit->object.flags |= COMPLETE;
-		commit_list_insert(commit, &complete);
+		prio_queue_put(&complete, commit);
 	}
 	return 0;
 }
@@ -302,7 +306,6 @@ int walker_fetch(struct walker *walker, int targets, char **target,
 	if (!walker->get_recover) {
 		refs_for_each_ref(get_main_ref_store(the_repository),
 				  mark_complete, NULL);
-		commit_list_sort_by_date(&complete);
 	}
 
 	for (i = 0; i < targets; i++) {
-- 
2.50.1





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux