[PATCH] describe: use khash in finish_depth_computation()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Depth computation can end early if all remaining commits are flagged.
The current code determines if that's the case by checking all queue
items each time it dequeues a flagged commit.  This can cause
quadratic complexity.

We could simply count the flagged items in the queue and then update
that number as we add and remove items.  That would provide a general
speedup, but leave one case where we have to scan the whole queue: When
we flag a previously seen, but unflagged commit.  It could be on the
queue and then we'd have to decrease our count.

We could dedicate an object flag to track queue membership, but that
would leave less for candidate tags, affecting the results.  So use a
hash table, specifically a khash set of commit pointers, to track that.
This avoids quadratic behaviour in all cases and provides a nice
performance boost over the previous commit, 08bb69d70f (describe: use
prio_queue_replace(), 2025-08-03):

Benchmark 1: ./git_08bb69d70f describe $(git rev-list v2.41.0..v2.47.0)
  Time (mean ± σ):     851.7 ms ±   1.1 ms    [User: 788.7 ms, System: 49.2 ms]
  Range (min … max):   849.4 ms … 852.8 ms    10 runs

Benchmark 2: ./git describe $(git rev-list v2.41.0..v2.47.0)
  Time (mean ± σ):     607.1 ms ±   0.9 ms    [User: 544.6 ms, System: 48.6 ms]
  Range (min … max):   606.1 ms … 608.3 ms    10 runs

Summary
  ./git describe $(git rev-list v2.41.0..v2.47.0) ran
    1.40 ± 0.00 times faster than ./git_08bb69d70f describe $(git rev-list v2.41.0..v2.47.0)

Use the commit index value as a hash because it is unique, has the
right size and needs no computation.  We could also derive the hash
directly from the pointer value, but that requires slightly more effort.

Signed-off-by: René Scharfe <l.s.r@xxxxxx>
---
 builtin/describe.c | 57 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 47 insertions(+), 10 deletions(-)

diff --git a/builtin/describe.c b/builtin/describe.c
index 0540976210..edb4dec79d 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -24,6 +24,7 @@
 #include "commit-slab.h"
 #include "wildmatch.h"
 #include "prio-queue.h"
+#include "khash.h"
 
 #define MAX_TAGS	(FLAG_BITS - 1)
 #define DEFAULT_CANDIDATES 10
@@ -286,38 +287,74 @@ static void lazy_queue_clear(struct lazy_queue *queue)
 	queue->get_pending = false;
 }
 
-static bool all_have_flag(const struct lazy_queue *queue, unsigned flag)
+static inline unsigned int commit_index(const struct commit *commit)
 {
-	for (size_t i = queue->get_pending ? 1 : 0; i < queue->queue.nr; i++) {
-		struct commit *commit = queue->queue.array[i].data;
-		if (!(commit->object.flags & flag))
-			return false;
-	}
-	return true;
+	return commit->index;
+}
+
+static inline int ptr_eq(const void *a, const void *b)
+{
+	return a == b;
+}
+
+KHASH_INIT(commit_set, struct commit *, int, 0, commit_index, ptr_eq)
+
+static void commit_set_insert(kh_commit_set_t *set, struct commit *commit)
+{
+	int added;
+	kh_put_commit_set(set, commit, &added);
+}
+
+static void commit_set_remove(kh_commit_set_t *set, struct commit *commit)
+{
+	khiter_t pos = kh_get_commit_set(set, commit);
+	if (pos != kh_end(set))
+		kh_del_commit_set(set, pos);
 }
 
 static unsigned long finish_depth_computation(struct lazy_queue *queue,
 					      struct possible_tag *best)
 {
 	unsigned long seen_commits = 0;
+	kh_commit_set_t unflagged = { 0 };
+
+	for (size_t i = queue->get_pending ? 1 : 0; i < queue->queue.nr; i++) {
+		struct commit *commit = queue->queue.array[i].data;
+		if (!(commit->object.flags & best->flag_within))
+			commit_set_insert(&unflagged, commit);
+	}
+
 	while (!lazy_queue_empty(queue)) {
 		struct commit *c = lazy_queue_get(queue);
 		struct commit_list *parents = c->parents;
+
 		seen_commits++;
 		if (c->object.flags & best->flag_within) {
-			if (all_have_flag(queue, best->flag_within))
+			if (!kh_size(&unflagged))
 				break;
-		} else
+		} else {
+			commit_set_remove(&unflagged, c);
 			best->depth++;
+		}
 		while (parents) {
 			struct commit *p = parents->item;
+			unsigned seen, flag_before, flag_after;
+
 			repo_parse_commit(the_repository, p);
-			if (!(p->object.flags & SEEN))
+			seen = p->object.flags & SEEN;
+			if (!seen)
 				lazy_queue_put(queue, p);
+			flag_before = p->object.flags & best->flag_within;
 			p->object.flags |= c->object.flags;
+			flag_after = p->object.flags & best->flag_within;
+			if (!seen && !flag_after)
+				commit_set_insert(&unflagged, p);
+			if (seen && !flag_before && flag_after)
+				commit_set_remove(&unflagged, p);
 			parents = parents->next;
 		}
 	}
+	kh_release_commit_set(&unflagged);
 	return seen_commits;
 }
 
-- 
2.51.0





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux