Commit Diff


commit - f374e91343146fc0584d53f4b767a3ebfe7bc49e
commit + 85ab45596727cfd0254c6d5b6f0c5705b7b6e89e
blob - 6c36ecdb31c5271095adb923f9de7efe30eb74a9
blob + 701fe6fdba14ea9bae8e9e6b929831a7bc068dc8
--- include/diff/diff_main.h
+++ include/diff/diff_main.h
@@ -15,77 +15,18 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
-#ifndef MAX
-#define MAX(A,B) ((A)>(B)?(A):(B))
-#endif
-#ifndef MIN
-#define MIN(A,B) ((A)<(B)?(A):(B))
-#endif
-
 struct diff_range {
 	int start;
 	int end;
 };
 
-static inline bool
-diff_range_empty(const struct diff_range *r)
-{
-	return r->start == r->end;
-}
-
-static inline bool
-diff_ranges_touch(const struct diff_range *a, const struct diff_range *b)
-{
-	return (a->end >= b->start) && (a->start <= b->end);
-}
-
-static inline void
-diff_ranges_merge(struct diff_range *a, const struct diff_range *b)
-{
-	*a = (struct diff_range){
-		.start = MIN(a->start, b->start),
-		.end = MAX(a->end, b->end),
-	};
-}
-
-static inline int
-diff_range_len(const struct diff_range *r)
-{
-	if (!r)
-		return 0;
-	return r->end - r->start;
-}
-
 /* List of all possible return codes of a diff invocation. */
 #define DIFF_RC_USE_DIFF_ALGO_FALLBACK	-1
 #define DIFF_RC_OK			0
 /* Any positive return values are errno values from sys/errno.h */
 
-struct diff_data;
+struct diff_atom;
 
-struct diff_atom {
-	struct diff_data *d; /* back pointer to associated diff data */
-
-	off_t pos;		/* if not memory-mapped */
-	const uint8_t *at;	/* if memory-mapped */
-	off_t len;
-
-	/* This hash is just a very cheap speed up for finding *mismatching*
-	 * atoms. When hashes match, we still need to compare entire atoms to
-	 * find out whether they are indeed identical or not. */
-	unsigned int hash;
-
-	/* State for the Patience Diff algorithm */
-	/* TODO: keep a separate array for the patience state */
-	struct {
-		bool unique_here;
-		bool unique_in_both;
-		struct diff_atom *pos_in_other;
-		struct diff_atom *prev_stack;
-		struct diff_range identical_lines;
-	} patience;
-};
-
 /* For each file, there is a "root" struct diff_data referencing the entire
  * file, which the atoms are parsed from. In recursion of diff algorithm, there
  * may be "child" struct diff_data only referencing a subsection of the file,
@@ -107,114 +48,9 @@ struct diff_data {
 
 void diff_data_free(struct diff_data *diff_data);
 
-int
-diff_atom_cmp(int *cmp,
-	      const struct diff_atom *left,
-	      const struct diff_atom *right);
-
-/* Indicate whether two given diff atoms match. */
-int
-diff_atom_same(bool *same,
-	       const struct diff_atom *left,
-	       const struct diff_atom *right);
-
-/* The atom's index in the entire file. For atoms divided by lines of text, this
- * yields the line number (starting with 0). Also works for diff_data that
- * reference only a subsection of a file, always reflecting the global position
- * in the file (and not the relative position within the subsection). */
-#define diff_atom_root_idx(DIFF_DATA, ATOM) \
-	((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \
-	 ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \
-	 : (DIFF_DATA)->root->atoms.len)
-
-/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this
- * yields the line number (starting with 0). */
-#define diff_atom_idx(DIFF_DATA, ATOM) \
-	((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \
-	 ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \
-	 : (DIFF_DATA)->atoms.len)
-
-#define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \
-	for ((ATOM) = (FIRST_ATOM); \
-	     (ATOM) \
-	     && ((ATOM) >= (FIRST_ATOM)) \
-	     && ((ATOM) - (FIRST_ATOM) < (COUNT)); \
-	     (ATOM)++)
-
-#define diff_data_foreach_atom(ATOM, DIFF_DATA) \
-	foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len)
-
-#define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \
-	for ((ATOM) = (FROM); \
-	     (ATOM) \
-	     && ((ATOM) >= (DIFF_DATA)->atoms.head) \
-	     && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \
-	     (ATOM)++)
-
-/* A diff chunk represents a set of atoms on the left and/or a set of atoms on
- * the right.
- *
- * If solved == false:
- * The diff algorithm has divided the source file, and this is a chunk that the
- * inner_algo should run on next.
- * The lines on the left should be diffed against the lines on the right.
- * (If there are no left lines or no right lines, it implies solved == true,
- * because there is nothing to diff.)
- *
- * If solved == true:
- * If there are only left atoms, it is a chunk removing atoms from the left ("a
- * minus chunk").
- * If there are only right atoms, it is a chunk adding atoms from the right ("a
- * plus chunk").
- * If there are both left and right lines, it is a chunk of equal content on
- * both sides, and left_count == right_count:
- *
- * - foo  }
- * - bar  }-- diff_chunk{ left_start = &left.atoms.head[0], left_count = 3,
- * - baz  }               right_start = NULL, right_count = 0 }
- *   moo    }
- *   goo    }-- diff_chunk{ left_start = &left.atoms.head[3], left_count = 3,
- *   zoo    }              right_start = &right.atoms.head[0], right_count = 3 }
- *  +loo      }
- *  +roo      }-- diff_chunk{ left_start = NULL, left_count = 0,
- *  +too      }            right_start = &right.atoms.head[3], right_count = 3 }
- *
- */
-struct diff_chunk {
-	bool solved;
-	struct diff_atom *left_start;
-	unsigned int left_count;
-	struct diff_atom *right_start;
-	unsigned int right_count;
-};
-
+struct diff_chunk;
 typedef ARRAYLIST(struct diff_chunk) diff_chunk_arraylist_t;
-#define DIFF_RESULT_ALLOC_BLOCKSIZE 128
 
-enum diff_chunk_type {
-	CHUNK_EMPTY,
-	CHUNK_PLUS,
-	CHUNK_MINUS,
-	CHUNK_SAME,
-	CHUNK_ERROR,
-};
-
-static inline enum diff_chunk_type
-diff_chunk_type(const struct diff_chunk *chunk)
-{
-	if (!chunk->left_count && !chunk->right_count)
-		return CHUNK_EMPTY;
-	if (!chunk->solved)
-		return CHUNK_ERROR;
-	if (!chunk->right_count)
-		return CHUNK_MINUS;
-	if (!chunk->left_count)
-		return CHUNK_PLUS;
-	if (chunk->left_count != chunk->right_count)
-		return CHUNK_ERROR;
-	return CHUNK_SAME;
-}
-
 struct diff_result {
 	int rc;
 	struct diff_data left;
@@ -222,22 +58,8 @@ struct diff_result {
 	diff_chunk_arraylist_t chunks;
 };
 
-struct diff_state {
-	/* The final result passed to the original diff caller. */
-	struct diff_result *result;
+struct diff_state;
 
-	/* The root diff_data is in result->left,right, these are (possibly)
-	 * subsections of the root data. */
-	struct diff_data left;
-	struct diff_data right;
-
-	unsigned int recursion_depth_left;
-
-	/* Remaining chunks from one diff algorithm pass, if any solved == false
-	 * chunks came up. */
-	diff_chunk_arraylist_t temp_result;
-};
-
 struct diff_chunk *diff_state_add_chunk(struct diff_state *state, bool solved,
 					struct diff_atom *left_start,
 					unsigned int left_count,
blob - e0bebd8db4f63c98c197cd5b1b64ddd23aeee754
blob + 882f94d7166ec134f12b1d70b4a076a0f17877a7
--- lib/diff_atomize_text.c
+++ lib/diff_atomize_text.c
@@ -25,6 +25,7 @@
 #include <diff/arraylist.h>
 #include <diff/diff_main.h>
 #include "diff_debug.h"
+#include "diff_internal.h"
 
 static int
 diff_data_atomize_text_lines_fd(struct diff_data *d)
blob - e47921b35f615024740d645645eb9ebc74225ee1
blob + c31b9e6e33142ba75f981720737dd4b978c52a07
--- lib/diff_main.c
+++ lib/diff_main.c
@@ -33,6 +33,7 @@
 #include <diff/diff_main.h>
 
 #include "diff_debug.h"
+#include "diff_internal.h"
 
 static int
 read_at(FILE *f, off_t at_pos, unsigned char *buf, size_t len)
blob - /dev/null
blob + eda757a32c6d6cb95825ee2b4489c06e6bcc620f (mode 644)
--- /dev/null
+++ lib/diff_internal.h
@@ -0,0 +1,211 @@
+/* Generic infrastructure to implement various diff algorithms. */
+/*
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef MAX
+#define MAX(A,B) ((A)>(B)?(A):(B))
+#endif
+#ifndef MIN
+#define MIN(A,B) ((A)<(B)?(A):(B))
+#endif
+
+static inline bool
+diff_range_empty(const struct diff_range *r)
+{
+	return r->start == r->end;
+}
+
+static inline bool
+diff_ranges_touch(const struct diff_range *a, const struct diff_range *b)
+{
+	return (a->end >= b->start) && (a->start <= b->end);
+}
+
+static inline void
+diff_ranges_merge(struct diff_range *a, const struct diff_range *b)
+{
+	*a = (struct diff_range){
+		.start = MIN(a->start, b->start),
+		.end = MAX(a->end, b->end),
+	};
+}
+
+static inline int
+diff_range_len(const struct diff_range *r)
+{
+	if (!r)
+		return 0;
+	return r->end - r->start;
+}
+
+/* List of all possible return codes of a diff invocation. */
+#define DIFF_RC_USE_DIFF_ALGO_FALLBACK	-1
+#define DIFF_RC_OK			0
+/* Any positive return values are errno values from sys/errno.h */
+
+struct diff_data;
+
+struct diff_atom {
+	struct diff_data *d; /* back pointer to associated diff data */
+
+	off_t pos;		/* if not memory-mapped */
+	const uint8_t *at;	/* if memory-mapped */
+	off_t len;
+
+	/* This hash is just a very cheap speed up for finding *mismatching*
+	 * atoms. When hashes match, we still need to compare entire atoms to
+	 * find out whether they are indeed identical or not. */
+	unsigned int hash;
+
+	/* State for the Patience Diff algorithm */
+	/* TODO: keep a separate array for the patience state */
+	struct {
+		bool unique_here;
+		bool unique_in_both;
+		struct diff_atom *pos_in_other;
+		struct diff_atom *prev_stack;
+		struct diff_range identical_lines;
+	} patience;
+};
+
+int
+diff_atom_cmp(int *cmp,
+	      const struct diff_atom *left,
+	      const struct diff_atom *right);
+
+/* Indicate whether two given diff atoms match. */
+int
+diff_atom_same(bool *same,
+	       const struct diff_atom *left,
+	       const struct diff_atom *right);
+
+/* The atom's index in the entire file. For atoms divided by lines of text, this
+ * yields the line number (starting with 0). Also works for diff_data that
+ * reference only a subsection of a file, always reflecting the global position
+ * in the file (and not the relative position within the subsection). */
+#define diff_atom_root_idx(DIFF_DATA, ATOM) \
+	((ATOM) && ((ATOM) >= (DIFF_DATA)->root->atoms.head) \
+	 ? (unsigned int)((ATOM) - ((DIFF_DATA)->root->atoms.head)) \
+	 : (DIFF_DATA)->root->atoms.len)
+
+/* The atom's index within DIFF_DATA. For atoms divided by lines of text, this
+ * yields the line number (starting with 0). */
+#define diff_atom_idx(DIFF_DATA, ATOM) \
+	((ATOM) && ((ATOM) >= (DIFF_DATA)->atoms.head) \
+	 ? (unsigned int)((ATOM) - ((DIFF_DATA)->atoms.head)) \
+	 : (DIFF_DATA)->atoms.len)
+
+#define foreach_diff_atom(ATOM, FIRST_ATOM, COUNT) \
+	for ((ATOM) = (FIRST_ATOM); \
+	     (ATOM) \
+	     && ((ATOM) >= (FIRST_ATOM)) \
+	     && ((ATOM) - (FIRST_ATOM) < (COUNT)); \
+	     (ATOM)++)
+
+#define diff_data_foreach_atom(ATOM, DIFF_DATA) \
+	foreach_diff_atom(ATOM, (DIFF_DATA)->atoms.head, (DIFF_DATA)->atoms.len)
+
+#define diff_data_foreach_atom_from(FROM, ATOM, DIFF_DATA) \
+	for ((ATOM) = (FROM); \
+	     (ATOM) \
+	     && ((ATOM) >= (DIFF_DATA)->atoms.head) \
+	     && ((ATOM) - (DIFF_DATA)->atoms.head < (DIFF_DATA)->atoms.len); \
+	     (ATOM)++)
+
+/* A diff chunk represents a set of atoms on the left and/or a set of atoms on
+ * the right.
+ *
+ * If solved == false:
+ * The diff algorithm has divided the source file, and this is a chunk that the
+ * inner_algo should run on next.
+ * The lines on the left should be diffed against the lines on the right.
+ * (If there are no left lines or no right lines, it implies solved == true,
+ * because there is nothing to diff.)
+ *
+ * If solved == true:
+ * If there are only left atoms, it is a chunk removing atoms from the left ("a
+ * minus chunk").
+ * If there are only right atoms, it is a chunk adding atoms from the right ("a
+ * plus chunk").
+ * If there are both left and right lines, it is a chunk of equal content on
+ * both sides, and left_count == right_count:
+ *
+ * - foo  }
+ * - bar  }-- diff_chunk{ left_start = &left.atoms.head[0], left_count = 3,
+ * - baz  }               right_start = NULL, right_count = 0 }
+ *   moo    }
+ *   goo    }-- diff_chunk{ left_start = &left.atoms.head[3], left_count = 3,
+ *   zoo    }              right_start = &right.atoms.head[0], right_count = 3 }
+ *  +loo      }
+ *  +roo      }-- diff_chunk{ left_start = NULL, left_count = 0,
+ *  +too      }            right_start = &right.atoms.head[3], right_count = 3 }
+ *
+ */
+struct diff_chunk {
+	bool solved;
+	struct diff_atom *left_start;
+	unsigned int left_count;
+	struct diff_atom *right_start;
+	unsigned int right_count;
+};
+
+#define DIFF_RESULT_ALLOC_BLOCKSIZE 128
+
+enum diff_chunk_type {
+	CHUNK_EMPTY,
+	CHUNK_PLUS,
+	CHUNK_MINUS,
+	CHUNK_SAME,
+	CHUNK_ERROR,
+};
+
+static inline enum diff_chunk_type
+diff_chunk_type(const struct diff_chunk *chunk)
+{
+	if (!chunk->left_count && !chunk->right_count)
+		return CHUNK_EMPTY;
+	if (!chunk->solved)
+		return CHUNK_ERROR;
+	if (!chunk->right_count)
+		return CHUNK_MINUS;
+	if (!chunk->left_count)
+		return CHUNK_PLUS;
+	if (chunk->left_count != chunk->right_count)
+		return CHUNK_ERROR;
+	return CHUNK_SAME;
+}
+
+struct diff_state {
+	/* The final result passed to the original diff caller. */
+	struct diff_result *result;
+
+	/* The root diff_data is in result->left,right, these are (possibly)
+	 * subsections of the root data. */
+	struct diff_data left;
+	struct diff_data right;
+
+	unsigned int recursion_depth_left;
+
+	/* Remaining chunks from one diff algorithm pass, if any solved == false
+	 * chunks came up. */
+	diff_chunk_arraylist_t temp_result;
+};
+
+struct diff_chunk *diff_state_add_chunk(struct diff_state *state, bool solved,
+					struct diff_atom *left_start,
+					unsigned int left_count,
+					struct diff_atom *right_start,
+					unsigned int right_count);
blob - 745a119ea407c711f457c96474762ca5f849ad2b
blob + c10e5e5475cce9bedea908a75e628123a864d339
--- lib/diff_myers.c
+++ lib/diff_myers.c
@@ -28,6 +28,7 @@
 #include <diff/diff_main.h>
 
 #include "diff_debug.h"
+#include "diff_internal.h"
 
 /* Myers' diff algorithm [1] is nicely explained in [2].
  * [1] http://www.xmailserver.org/diff2.pdf
blob - e9dd89fa1226d66603de9f1e80fd72bca21fefcb
blob + 03a4042d7c614b8d5565ce69f13d39b81da33884
--- lib/diff_output.c
+++ lib/diff_output.c
@@ -26,6 +26,8 @@
 #include <diff/diff_main.h>
 #include <diff/diff_output.h>
 
+#include "diff_internal.h"
+
 static int
 get_atom_byte(int *ch, struct diff_atom *atom, off_t off)
 {
blob - 38318db6a210f599103e1fc40e5d5f989ea6dc0b
blob + b79b53f3bb025ac548d288c82aa34658347b35a0
--- lib/diff_output_plain.c
+++ lib/diff_output_plain.c
@@ -25,6 +25,7 @@
 #include <diff/diff_main.h>
 #include <diff/diff_output.h>
 
+#include "diff_internal.h"
 
 int
 diff_output_plain(FILE *dest, const struct diff_input_info *info,
blob - 63d712dc89458bd762fad94e1d01c418d4887d19
blob + 189d9d7583c852336b9e70c6a20d6969b4f6726c
--- lib/diff_output_unidiff.c
+++ lib/diff_output_unidiff.c
@@ -26,6 +26,7 @@
 #include <diff/diff_output.h>
 
 #include "diff_debug.h"
+#include "diff_internal.h"
 
 static bool
 chunk_context_empty(const struct diff_chunk_context *cc)
blob - ad4072149dfdef088e99354b9c276ddd4fcd24dc
blob + 80af37f56e32b7c6d1fbf33b2d1dc07db12346c3
--- lib/diff_patience.c
+++ lib/diff_patience.c
@@ -27,6 +27,7 @@
 #include <diff/diff_main.h>
 
 #include "diff_debug.h"
+#include "diff_internal.h"
 
 /* Set unique_here = true for all atoms that exist exactly once in this list. */
 static int