Re: [PATCH] describe: use khash in finish_depth_computation()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Aug 25, 2025 at 03:34:03AM -0400, Jeff King wrote:

> > The nice thing about commit-slabs is the lack of required maintenance.
> > They just allocate as needed and never give anything back, never need to
> > rehash.  And they don't need to store the keys anywhere.  They should be
> > good alternatives to object flags used during full history traversal
> > without flagging non-commits.
> > 
> > Off-the-shelf hash tables like khash might be slower in these cases,
> > though not far off, I expect.
> 
> Yeah, maybe. I've never really loved the drawbacks of commit-slab, and
> your numbers made me wonder if we could be getting away with hashes in
> more places. Though one of the drawbacks I find with commit-slab is the
> grossness of instantiating a giant mess of macros. That is not much
> better with khash. ;) And certainly oidmap, though it does not use
> macros, is no picnic to use either.

So out of curiosity I tried replacing a slab that should be pretty
densely filled, using a khash based on oidhash/ptr along with some
quality-of-life wrappers.  Patch is below.

It performs...very badly. Not sure if I've screwed something up, but
it's about 7x slower to run "git rev-list --author-date-order HEAD" in
the kernel. So maybe slabs really are worth it overall.

-Peff

---
diff --git a/commit.c b/commit.c
index 16d91b2bfc..0678932ab8 100644
--- a/commit.c
+++ b/commit.c
@@ -822,9 +822,7 @@ struct commit *pop_commit(struct commit_list **stack)
 /* count number of children that have not been emitted */
 define_commit_slab(indegree_slab, int);
 
-define_commit_slab(author_date_slab, timestamp_t);
-
-void record_author_date(struct author_date_slab *author_date,
+void record_author_date(kh_obj_timestamp_t *author_date,
 			struct commit *commit)
 {
 	const char *buffer = repo_get_commit_buffer(the_repository, commit,
@@ -845,7 +843,7 @@ void record_author_date(struct author_date_slab *author_date,
 	date = parse_timestamp(ident.date_begin, &date_end, 10);
 	if (date_end != ident.date_end)
 		goto fail_exit; /* malformed date */
-	*(author_date_slab_at(author_date, commit)) = date;
+	obj_timestamp_put(author_date, &commit->object, date);
 
 fail_exit:
 	repo_unuse_commit_buffer(the_repository, commit, buffer);
@@ -855,9 +853,9 @@ int compare_commits_by_author_date(const void *a_, const void *b_,
 				   void *cb_data)
 {
 	const struct commit *a = a_, *b = b_;
-	struct author_date_slab *author_date = cb_data;
-	timestamp_t a_date = *(author_date_slab_at(author_date, a));
-	timestamp_t b_date = *(author_date_slab_at(author_date, b));
+	kh_obj_timestamp_t *author_date = cb_data;
+	timestamp_t a_date = obj_timestamp_get(author_date, &a->object);
+	timestamp_t b_date = obj_timestamp_get(author_date, &b->object);
 
 	/* newer commits with larger date first */
 	if (a_date < b_date)
@@ -910,7 +908,7 @@ void sort_in_topological_order(struct commit_list **list, enum rev_sort_order so
 	struct indegree_slab indegree;
 	struct prio_queue queue;
 	struct commit *commit;
-	struct author_date_slab author_date;
+	kh_obj_timestamp_t author_date;
 
 	if (!orig)
 		return;
@@ -927,7 +925,7 @@ void sort_in_topological_order(struct commit_list **list, enum rev_sort_order so
 		queue.compare = compare_commits_by_commit_date;
 		break;
 	case REV_SORT_BY_AUTHOR_DATE:
-		init_author_date_slab(&author_date);
+		memset(&author_date, 0, sizeof(author_date));
 		queue.compare = compare_commits_by_author_date;
 		queue.cb_data = &author_date;
 		break;
@@ -1011,7 +1009,7 @@ void sort_in_topological_order(struct commit_list **list, enum rev_sort_order so
 	clear_indegree_slab(&indegree);
 	clear_prio_queue(&queue);
 	if (sort_order == REV_SORT_BY_AUTHOR_DATE)
-		clear_author_date_slab(&author_date);
+		kh_release_obj_timestamp(&author_date);
 }
 
 struct rev_collect {
diff --git a/commit.h b/commit.h
index 1d6e0c7518..f49531ab8b 100644
--- a/commit.h
+++ b/commit.h
@@ -334,8 +334,8 @@ int remove_signature(struct strbuf *buf);
 int check_commit_signature(const struct commit *commit, struct signature_check *sigc);
 
 /* record author-date for each commit object */
-struct author_date_slab;
-void record_author_date(struct author_date_slab *author_date,
+struct kh_obj_timestamp_t;
+void record_author_date(kh_obj_timestamp_t *author_date,
 			struct commit *commit);
 
 int compare_commits_by_author_date(const void *a_, const void *b_, void *unused);
diff --git a/object.h b/object.h
index 8c3c1c46e1..14718051f2 100644
--- a/object.h
+++ b/object.h
@@ -2,6 +2,7 @@
 #define OBJECT_H
 
 #include "hash.h"
+#include "khash.h"
 
 struct buffer_slab;
 struct repository;
@@ -340,4 +341,33 @@ void clear_object_flags(struct repository *repo, unsigned flags);
  */
 void repo_clear_commit_marks(struct repository *r, unsigned int flags);
 
+/*
+ * This would all probably go into its own header file...
+ */
+static inline unsigned int obj_ptr_hash(const struct object *obj)
+{
+	return oidhash(&obj->oid);
+}
+
+static inline int obj_ptr_eq(const struct object *a, const struct object *b)
+{
+	return a == b;
+}
+
+#define OBJHASH_INIT(name, value_type, sentinel) \
+KHASH_INIT(name, const struct object *, value_type, 1, obj_ptr_hash, obj_ptr_eq); \
+static inline void name##_put(kh_##name##_t *map, const struct object *obj, value_type v) \
+{ \
+	int hash_ret; \
+	khiter_t pos = kh_put_##name(map, obj, &hash_ret); \
+	kh_value(map, pos) = v; \
+} \
+static inline value_type name##_get(kh_##name##_t *map, const struct object *obj) \
+{ \
+	khiter_t pos = kh_get_##name(map, obj); \
+	return pos == kh_end(map) ? sentinel : kh_value(map, pos); \
+}
+
+OBJHASH_INIT(obj_timestamp, timestamp_t, 0);
+
 #endif /* OBJECT_H */
diff --git a/revision.c b/revision.c
index 6ba8f67054..e67cf2249a 100644
--- a/revision.c
+++ b/revision.c
@@ -3622,15 +3622,14 @@ static int mark_uninteresting(const struct object_id *oid,
 }
 
 define_commit_slab(indegree_slab, int);
-define_commit_slab(author_date_slab, timestamp_t);
 
 struct topo_walk_info {
 	timestamp_t min_generation;
 	struct prio_queue explore_queue;
 	struct prio_queue indegree_queue;
 	struct prio_queue topo_queue;
 	struct indegree_slab indegree;
-	struct author_date_slab author_date;
+	kh_obj_timestamp_t author_date;
 };
 
 static int topo_walk_atexit_registered;
@@ -3755,7 +3754,7 @@ static void release_revisions_topo_walk_info(struct topo_walk_info *info)
 	clear_prio_queue(&info->indegree_queue);
 	clear_prio_queue(&info->topo_queue);
 	clear_indegree_slab(&info->indegree);
-	clear_author_date_slab(&info->author_date);
+	kh_release_obj_timestamp(&info->author_date);
 	free(info);
 }
 
@@ -3789,7 +3788,7 @@ static void init_topo_walk(struct rev_info *revs)
 		info->topo_queue.compare = compare_commits_by_commit_date;
 		break;
 	case REV_SORT_BY_AUTHOR_DATE:
-		init_author_date_slab(&info->author_date);
+		memset(&info->author_date, 0, sizeof(info->author_date));
 		info->topo_queue.compare = compare_commits_by_author_date;
 		info->topo_queue.cb_data = &info->author_date;
 		break;




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux