commit - 926387ed74169349bdfa40445192f2b23d815244
commit + ca6fcbdce0110845f1195d2d1f5a14ed52a45baf
blob - ab51ce7a36d7cfb1b9e50d047239e9b760dba83b
blob + 7dcfd74d9e824db631a9d3bc775f8e4ff365720c
--- lib/diff_patience.c
+++ lib/diff_patience.c
#include "diff_internal.h"
#include "diff_debug.h"
+/* Algorithm to find unique lines:
+ * 0: stupidly iterate atoms
+ * 1: qsort
+ * 2: mergesort
+ */
+#define UNIQUE_STRATEGY 2
+
/* Per-atom state for the Patience Diff algorithm */
struct atom_patience {
+#if UNIQUE_STRATEGY == 0
+ bool unique_here;
+#endif
bool unique_in_both;
struct diff_atom *pos_in_other;
struct diff_atom *prev_stack;
(((struct atom_patience*)((ATOM)->root->current->algo_data))\
[diff_atom_idx((ATOM)->root->current, ATOM)])
-int diff_atoms_mergesort_compar(const void *_a, const void *_b)
+#if UNIQUE_STRATEGY == 0
+
+/* Stupid iteration and comparison of all atoms */
+static int
+diff_atoms_mark_unique(struct diff_data *d, unsigned int *unique_count)
+{
+ struct diff_atom *i;
+ unsigned int count = 0;
+ diff_data_foreach_atom(i, d) {
+ PATIENCE(i).unique_here = true;
+ PATIENCE(i).unique_in_both = true;
+ count++;
+ }
+ diff_data_foreach_atom(i, d) {
+ struct diff_atom *j;
+
+ if (!PATIENCE(i).unique_here)
+ continue;
+
+ diff_data_foreach_atom_from(i + 1, j, d) {
+ bool same;
+ int r = diff_atom_same(&same, i, j);
+ if (r)
+ return r;
+ if (!same)
+ continue;
+ if (PATIENCE(i).unique_here) {
+ PATIENCE(i).unique_here = false;
+ PATIENCE(i).unique_in_both = false;
+ count--;
+ }
+ PATIENCE(j).unique_here = false;
+ PATIENCE(j).unique_in_both = false;
+ count--;
+ }
+ }
+ if (unique_count)
+ *unique_count = count;
+ return 0;
+}
+
+/* Mark those lines as PATIENCE(atom).unique_in_both = true that appear exactly
+ * once in each side. */
+static int
+diff_atoms_mark_unique_in_both(struct diff_data *left, struct diff_data *right,
+ unsigned int *unique_in_both_count)
+{
+ /* Derive the final unique_in_both count without needing an explicit
+ * iteration. So this is just some optimiziation to save one iteration
+ * in the end. */
+ unsigned int unique_in_both;
+ int r;
+
+ r = diff_atoms_mark_unique(left, &unique_in_both);
+ if (r)
+ return r;
+ r = diff_atoms_mark_unique(right, NULL);
+ if (r)
+ return r;
+
+ debug("unique_in_both %u\n", unique_in_both);
+
+ struct diff_atom *i;
+ diff_data_foreach_atom(i, left) {
+ if (!PATIENCE(i).unique_here)
+ continue;
+ struct diff_atom *j;
+ int found_in_b = 0;
+ diff_data_foreach_atom(j, right) {
+ bool same;
+ int r = diff_atom_same(&same, i, j);
+ if (r)
+ return r;
+ if (!same)
+ continue;
+ if (!PATIENCE(j).unique_here) {
+ found_in_b = 2; /* or more */
+ break;
+ } else {
+ found_in_b = 1;
+ PATIENCE(j).pos_in_other = i;
+ PATIENCE(i).pos_in_other = j;
+ }
+ }
+
+ if (found_in_b == 0 || found_in_b > 1) {
+ PATIENCE(i).unique_in_both = false;
+ unique_in_both--;
+ debug("unique_in_both %u (%d) ", unique_in_both,
+ found_in_b);
+ debug_dump_atom(left, NULL, i);
+ }
+ }
+
+ /* Still need to unmark right[*]->patience.unique_in_both for atoms that
+ * don't exist in left */
+ diff_data_foreach_atom(i, right) {
+ if (!PATIENCE(i).unique_here
+ || !PATIENCE(i).unique_in_both)
+ continue;
+ struct diff_atom *j;
+ bool found_in_a = false;
+ diff_data_foreach_atom(j, left) {
+ bool same;
+ int r;
+ if (!PATIENCE(j).unique_in_both)
+ continue;
+ r = diff_atom_same(&same, i, j);
+ if (r)
+ return r;
+ if (!same)
+ continue;
+ found_in_a = true;
+ break;
+ }
+
+ if (!found_in_a)
+ PATIENCE(i).unique_in_both = false;
+ }
+
+ if (unique_in_both_count)
+ *unique_in_both_count = unique_in_both;
+ return 0;
+}
+
+#else /* UNIQUE_STRATEGY != 0 */
+
+/* Use an optimized sorting algorithm (qsort, mergesort) to find unique lines */
+
+int diff_atoms_compar(const void *_a, const void *_b)
{
const struct diff_atom *a = *(struct diff_atom**)_a;
const struct diff_atom *b = *(struct diff_atom**)_b;
int rc = 0;
/* If there's been an error (e.g. I/O error) in a previous compar, we
- * have no way to abort the mergesort but just report the rc and stop
+ * have no way to abort the sort but just report the rc and stop
* comparing. Make sure to catch errors on either side. If atoms are
* from more than one diff_data, make sure the error, if any, spreads
* to all of them, so we can cut short all future comparisons. */
/* Sort an array of struct diff_atom* in-place. */
static int diff_atoms_sort(struct diff_atom *atoms[],
- size_t atoms_count)
+ size_t atoms_count)
{
- /*
- * Use mergesort(3) bcause it provides stable ordering
- * even if two or more elements are equal.
- */
+#if UNIQUE_STRATEGY == 1
+ qsort(atoms, atoms_count, sizeof(struct diff_atom*), diff_atoms_compar);
+#else
mergesort(atoms, atoms_count, sizeof(struct diff_atom*),
- diff_atoms_mergesort_compar);
+ diff_atoms_compar);
+#endif
return atoms[0]->root->err;
}
free(all_atoms);
return rc;
}
+#endif /* UNIQUE_STRATEGY != 0 */
static int
diff_atoms_swallow_identical_neighbors(struct diff_data *left,