[PATCH v2 06/17] xdiff: separate parsing lines from hashing them

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Ezekiel Newren <ezekielnewren@xxxxxxxxx>

We want to use xxhash for faster hashing. To facilitate that
and to simplify the code. Separate the concerns of parsing
and hashing into discrete steps. This makes swapping the hash
function much easier. Since xdl_hash_record() both parses and
hashses lines, this requires some slight code restructuring.

Signed-off-by: Ezekiel Newren <ezekielnewren@xxxxxxxxx>
---
 xdiff/xprepare.c | 75 ++++++++++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 00cdf7d8a038..031c1752cc1a 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -129,13 +129,39 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t
 }
 
 
+static void xdl_parse_lines(mmfile_t *mf, long narec, xdfile_t *xdf) {
+	u8 const* ptr = (u8 const*) mf->ptr;
+	usize len = (usize) mf->size;
+
+	xdf->recs = NULL;
+	xdf->nrec = 0;
+	XDL_ALLOC_ARRAY(xdf->recs, narec);
+
+	while (len > 0) {
+		xrecord_t *rec = NULL;
+		usize length;
+		u8 const* result = memchr(ptr, '\n', len);
+		if (result) {
+			length = result - ptr + 1;
+		} else {
+			length = len;
+		}
+		if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec))
+			die("XDL_ALLOC_GROW failed");
+		rec = xdl_cha_alloc(&xdf->rcha);
+		rec->ptr = ptr;
+		rec->size = length;
+		rec->ha = 0;
+		xdf->recs[xdf->nrec++] = rec;
+		ptr += length;
+		len -= length;
+	}
+
+}
+
+
 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
 			   xdlclassifier_t *cf, xdfile_t *xdf) {
-	long nrec, bsize;
-	unsigned long hav;
-	char const *blk, *cur, *top, *prev;
-	xrecord_t *crec;
-	xrecord_t **recs;
 	unsigned long *ha;
 	char *rchg;
 	long *rindex;
@@ -143,50 +169,37 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 	ha = NULL;
 	rindex = NULL;
 	rchg = NULL;
-	recs = NULL;
 
 	if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0)
 		goto abort;
-	if (!XDL_ALLOC_ARRAY(recs, narec))
-		goto abort;
 
-	nrec = 0;
-	if ((cur = blk = xdl_mmfile_first(mf, &bsize))) {
-		for (top = blk + bsize; cur < top; ) {
-			prev = cur;
-			hav = xdl_hash_record(&cur, top, xpp->flags);
-			if (XDL_ALLOC_GROW(recs, nrec + 1, narec))
-				goto abort;
-			if (!(crec = xdl_cha_alloc(&xdf->rcha)))
-				goto abort;
-			crec->ptr = (u8 const*) prev;
-			crec->size = (long) (cur - prev);
-			crec->ha = hav;
-			recs[nrec++] = crec;
-			if (xdl_classify_record(pass, cf, crec) < 0)
-				goto abort;
-		}
+	xdl_parse_lines(mf, narec, xdf);
+
+	for (usize i = 0; i < (usize) xdf->nrec; i++) {
+		xrecord_t *rec = xdf->recs[i];
+		char const* dump = (char const*) rec->ptr;
+		rec->ha = xdl_hash_record(&dump, (char const*) (rec->ptr + rec->size), xpp->flags);
+		xdl_classify_record(pass, cf, rec);
 	}
 
-	if (!XDL_CALLOC_ARRAY(rchg, nrec + 2))
+
+	if (!XDL_CALLOC_ARRAY(rchg, xdf->nrec + 2))
 		goto abort;
 
 	if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
 	    (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) {
-		if (!XDL_ALLOC_ARRAY(rindex, nrec + 1))
+		if (!XDL_ALLOC_ARRAY(rindex, xdf->nrec + 1))
 			goto abort;
-		if (!XDL_ALLOC_ARRAY(ha, nrec + 1))
+		if (!XDL_ALLOC_ARRAY(ha, xdf->nrec + 1))
 			goto abort;
 	}
 
-	xdf->nrec = nrec;
-	xdf->recs = recs;
 	xdf->rchg = rchg + 1;
 	xdf->rindex = rindex;
 	xdf->nreff = 0;
 	xdf->ha = ha;
 	xdf->dstart = 0;
-	xdf->dend = nrec - 1;
+	xdf->dend = xdf->nrec - 1;
 
 	return 0;
 
@@ -194,7 +207,7 @@ abort:
 	xdl_free(ha);
 	xdl_free(rindex);
 	xdl_free(rchg);
-	xdl_free(recs);
+	xdl_free(xdf->recs);
 	xdl_cha_free(&xdf->rcha);
 	return -1;
 }
-- 
gitgitgadget





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux