[PATCH v2 17/17] xdiff: use rust's version of whitespace processing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Ezekiel Newren <ezekielnewren@xxxxxxxxx>

Delete xdl_hash_record() and xdl_recmatch() in favor of xdl_line_hash()
and xdl_line_equal().

Signed-off-by: Ezekiel Newren <ezekielnewren@xxxxxxxxx>
---
 rust/xdiff/src/lib.rs |   6 --
 xdiff-interface.c     |   4 +-
 xdiff/xmerge.c        |   8 +--
 xdiff/xprepare.c      |  29 ++------
 xdiff/xutils.c        | 158 ------------------------------------------
 xdiff/xutils.h        |   4 +-
 6 files changed, 15 insertions(+), 194 deletions(-)

diff --git a/rust/xdiff/src/lib.rs b/rust/xdiff/src/lib.rs
index 809c5573c6e7..634b453a21b6 100644
--- a/rust/xdiff/src/lib.rs
+++ b/rust/xdiff/src/lib.rs
@@ -14,12 +14,6 @@ pub const XDF_WHITESPACE_FLAGS: u64 = XDF_IGNORE_WHITESPACE |
     XDF_IGNORE_CR_AT_EOL;
 
 
-#[no_mangle]
-unsafe extern "C" fn xxh3_64(ptr: *const u8, size: usize) -> u64 {
-    let slice = std::slice::from_raw_parts(ptr, size);
-    xxhash_rust::xxh3::xxh3_64(slice)
-}
-
 #[no_mangle]
 unsafe extern "C" fn xdl_line_hash(ptr: *const u8, size: usize, flags: u64) -> u64 {
     let line = std::slice::from_raw_parts(ptr, size);
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 1edcd319e6ef..71ddccf2cc15 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -299,13 +299,13 @@ void xdiff_clear_find_func(xdemitconf_t *xecfg)
 
 unsigned long xdiff_hash_string(const char *s, size_t len, long flags)
 {
-	return xdl_hash_record(&s, s + len, flags);
+	return xdl_line_hash((u8 const*) s, len, flags);
 }
 
 int xdiff_compare_lines(const char *l1, long s1,
 			const char *l2, long s2, long flags)
 {
-	return xdl_recmatch(l1, s1, l2, s2, flags);
+	return xdl_line_equal((u8 const*) l1, s1, (u8 const*) l2, s2, flags);
 }
 
 int parse_conflict_style_name(const char *value)
diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
index 6fa6ea61a208..2f64651a839b 100644
--- a/xdiff/xmerge.c
+++ b/xdiff/xmerge.c
@@ -101,8 +101,8 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2,
 	xrecord_t **rec2 = xe2->xdf2.recs + i2;
 
 	for (i = 0; i < line_count; i++) {
-		int result = xdl_recmatch((const char*) rec1[i]->ptr, rec1[i]->size,
-			(const char*) rec2[i]->ptr, rec2[i]->size, flags);
+		bool result = xdl_line_equal(rec1[i]->ptr, rec1[i]->size,
+			rec2[i]->ptr, rec2[i]->size, flags);
 		if (!result)
 			return -1;
 	}
@@ -324,8 +324,8 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1,
 
 static int recmatch(xrecord_t *rec1, xrecord_t *rec2, unsigned long flags)
 {
-	return xdl_recmatch((char const*) rec1->ptr, rec1->size,
-			    (char const*) rec2->ptr, rec2->size, flags);
+	return xdl_line_equal(rec1->ptr, rec1->size,
+			    rec2->ptr, rec2->size, flags);
 }
 
 /*
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index c0463bacd94b..b9f12184b1bb 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -33,8 +33,8 @@
 typedef struct s_xdlclass {
 	struct s_xdlclass *next;
 	u64 ha;
-	char const *line;
-	long size;
+	u8 const *line;
+	usize size;
 	long idx;
 	long len1, len2;
 } xdlclass_t;
@@ -93,15 +93,15 @@ static void xdl_free_classifier(xdlclassifier_t *cf) {
 
 static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t *rec) {
 	long hi;
-	char const *line;
+	u8 const *line;
 	xdlclass_t *rcrec;
 
-	line = (char const*) rec->ptr;
+	line = rec->ptr;
 	hi = (long) XDL_HASHLONG(rec->ha, cf->hbits);
 	for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next)
 		if (rcrec->ha == rec->ha &&
-				xdl_recmatch(rcrec->line, rcrec->size,
-					(const char*) rec->ptr, rec->size, cf->flags))
+				xdl_line_equal(rcrec->line, rcrec->size,
+					rec->ptr, rec->size, cf->flags))
 			break;
 
 	if (!rcrec) {
@@ -160,9 +160,6 @@ static void xdl_parse_lines(mmfile_t *mf, long narec, xdfile_t *xdf) {
 }
 
 
-extern u64 xxh3_64(u8 const* ptr, usize size);
-
-
 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
 			   xdlclassifier_t *cf, xdfile_t *xdf) {
 	unsigned long *ha;
@@ -178,21 +175,9 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 
 	xdl_parse_lines(mf, narec, xdf);
 
-	if ((xpp->flags & XDF_WHITESPACE_FLAGS) == 0) {
-		for (usize i = 0; i < (usize) xdf->nrec; i++) {
-			xrecord_t *rec = xdf->recs[i];
-			rec->ha = xxh3_64(rec->ptr, rec->size);
-		}
-	} else {
-		for (usize i = 0; i < (usize) xdf->nrec; i++) {
-			xrecord_t *rec = xdf->recs[i];
-			char const* dump = (char const*) rec->ptr;
-			rec->ha = xdl_hash_record(&dump, (char const*) (rec->ptr + rec->size), xpp->flags);
-		}
-	}
-
 	for (usize i = 0; i < (usize) xdf->nrec; i++) {
 		xrecord_t *rec = xdf->recs[i];
+		rec->ha = xdl_line_hash(rec->ptr, rec->size, xpp->flags);
 		xdl_classify_record(pass, cf, rec);
 	}
 
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 10e4f20b7c31..29e240eb138b 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -152,164 +152,6 @@ int xdl_blankline(const char *line, long size, long flags)
 	return (i == size);
 }
 
-/*
- * Have we eaten everything on the line, except for an optional
- * CR at the very end?
- */
-static int ends_with_optional_cr(const char *l, long s, long i)
-{
-	int complete = s && l[s-1] == '\n';
-
-	if (complete)
-		s--;
-	if (s == i)
-		return 1;
-	/* do not ignore CR at the end of an incomplete line */
-	if (complete && s == i + 1 && l[i] == '\r')
-		return 1;
-	return 0;
-}
-
-int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
-{
-	int i1, i2;
-
-	if (s1 == s2 && !memcmp(l1, l2, s1))
-		return 1;
-	if (!(flags & XDF_WHITESPACE_FLAGS))
-		return 0;
-
-	i1 = 0;
-	i2 = 0;
-
-	/*
-	 * -w matches everything that matches with -b, and -b in turn
-	 * matches everything that matches with --ignore-space-at-eol,
-	 * which in turn matches everything that matches with --ignore-cr-at-eol.
-	 *
-	 * Each flavor of ignoring needs different logic to skip whitespaces
-	 * while we have both sides to compare.
-	 */
-	if (flags & XDF_IGNORE_WHITESPACE) {
-		goto skip_ws;
-		while (i1 < s1 && i2 < s2) {
-			if (l1[i1++] != l2[i2++])
-				return 0;
-		skip_ws:
-			while (i1 < s1 && XDL_ISSPACE(l1[i1]))
-				i1++;
-			while (i2 < s2 && XDL_ISSPACE(l2[i2]))
-				i2++;
-		}
-	} else if (flags & XDF_IGNORE_WHITESPACE_CHANGE) {
-		while (i1 < s1 && i2 < s2) {
-			if (XDL_ISSPACE(l1[i1]) && XDL_ISSPACE(l2[i2])) {
-				/* Skip matching spaces and try again */
-				while (i1 < s1 && XDL_ISSPACE(l1[i1]))
-					i1++;
-				while (i2 < s2 && XDL_ISSPACE(l2[i2]))
-					i2++;
-				continue;
-			}
-			if (l1[i1++] != l2[i2++])
-				return 0;
-		}
-	} else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL) {
-		while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) {
-			i1++;
-			i2++;
-		}
-	} else if (flags & XDF_IGNORE_CR_AT_EOL) {
-		/* Find the first difference and see how the line ends */
-		while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) {
-			i1++;
-			i2++;
-		}
-		return (ends_with_optional_cr(l1, s1, i1) &&
-			ends_with_optional_cr(l2, s2, i2));
-	}
-
-	/*
-	 * After running out of one side, the remaining side must have
-	 * nothing but whitespace for the lines to match.  Note that
-	 * ignore-whitespace-at-eol case may break out of the loop
-	 * while there still are characters remaining on both lines.
-	 */
-	if (i1 < s1) {
-		while (i1 < s1 && XDL_ISSPACE(l1[i1]))
-			i1++;
-		if (s1 != i1)
-			return 0;
-	}
-	if (i2 < s2) {
-		while (i2 < s2 && XDL_ISSPACE(l2[i2]))
-			i2++;
-		return (s2 == i2);
-	}
-	return 1;
-}
-
-static unsigned long xdl_hash_record_with_whitespace(char const **data,
-		char const *top, long flags) {
-	unsigned long ha = 5381;
-	char const *ptr = *data;
-	int cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL;
-
-	for (; ptr < top && *ptr != '\n'; ptr++) {
-		if (cr_at_eol_only) {
-			/* do not ignore CR at the end of an incomplete line */
-			if (*ptr == '\r' &&
-			    (ptr + 1 < top && ptr[1] == '\n'))
-				continue;
-		}
-		else if (XDL_ISSPACE(*ptr)) {
-			const char *ptr2 = ptr;
-			int at_eol;
-			while (ptr + 1 < top && XDL_ISSPACE(ptr[1])
-					&& ptr[1] != '\n')
-				ptr++;
-			at_eol = (top <= ptr + 1 || ptr[1] == '\n');
-			if (flags & XDF_IGNORE_WHITESPACE)
-				; /* already handled */
-			else if (flags & XDF_IGNORE_WHITESPACE_CHANGE
-				 && !at_eol) {
-				ha += (ha << 5);
-				ha ^= (unsigned long) ' ';
-			}
-			else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL
-				 && !at_eol) {
-				while (ptr2 != ptr + 1) {
-					ha += (ha << 5);
-					ha ^= (unsigned long) *ptr2;
-					ptr2++;
-				}
-			}
-			continue;
-		}
-		ha += (ha << 5);
-		ha ^= (unsigned long) *ptr;
-	}
-	*data = ptr < top ? ptr + 1: ptr;
-
-	return ha;
-}
-
-unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
-	unsigned long ha = 5381;
-	char const *ptr = *data;
-
-	if (flags & XDF_WHITESPACE_FLAGS)
-		return xdl_hash_record_with_whitespace(data, top, flags);
-
-	for (; ptr < top && *ptr != '\n'; ptr++) {
-		ha += (ha << 5);
-		ha ^= (unsigned long) *ptr;
-	}
-	*data = ptr < top ? ptr + 1: ptr;
-
-	return ha;
-}
-
 unsigned int xdl_hashbits(unsigned int size) {
 	unsigned int val = 1, bits = 0;
 
diff --git a/xdiff/xutils.h b/xdiff/xutils.h
index fd0bba94e8b4..8f524b72c491 100644
--- a/xdiff/xutils.h
+++ b/xdiff/xutils.h
@@ -33,8 +33,8 @@ void xdl_cha_free(chastore_t *cha);
 void *xdl_cha_alloc(chastore_t *cha);
 long xdl_guess_lines(mmfile_t *mf, long sample);
 int xdl_blankline(const char *line, long size, long flags);
-int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
-unsigned long xdl_hash_record(char const **data, char const *top, long flags);
+u64 xdl_line_hash(u8 const* ptr, usize size, u64 flags);
+bool xdl_line_equal(u8 const* lhs, usize lhs_len, u8 const* rhs, usize rhs_len, u64 flags);
 unsigned int xdl_hashbits(unsigned int size);
 int xdl_num_out(char *out, long val);
 int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,
-- 
gitgitgadget




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux