commit - ad5b3f855591bc548f15e09ae4b7fdf674f16245
commit + 60b6e08bf7113461bbe0ce511caf96a31121b143
blob - 375e1b98cb5964fd4d7c3d4c3cd4e1497d56fd89
blob + 9dc641cf345f7bda61a2e56012332d68e357bddb
--- include/diff_main.h
+++ include/diff_main.h
struct diff_data *root;
int diff_flags;
+
+ int err;
};
#define DIFF_FLAG_IGNORE_WHITESPACE 0x00000001
blob - bb2d2b2222005a0962cc5edc2a2573fbaa3ab2a8
blob + e0580aa5e5f76cdaee48726754ed1af7595dfbe3
--- lib/diff_debug.h
+++ lib/diff_debug.h
diff_atom_root_idx(right, atom->patience.pos_in_other));
print(" %s%s '",
- atom->patience.unique_here ? "u" : " ",
- atom->patience.unique_in_both ? "c" : " ");
+ atom->patience.unique_in_both ? "u" : " ");
if (atom->at == NULL) {
off_t remain = atom->len;
if (fseek(atom->d->root->f, atom->pos, SEEK_SET) == -1)
blob - d8950f01de74482510174dd49af87a8d3f8b13f6
blob + e9088e91a37fcda61b2cf9cd6487f3abe30adee4
--- lib/diff_internal.h
+++ lib/diff_internal.h
/* State for the Patience Diff algorithm */
/* TODO: keep a separate array for the patience state */
struct {
- bool unique_here;
bool unique_in_both;
struct diff_atom *pos_in_other;
struct diff_atom *prev_stack;
blob - cccfafd597fa145d26f94990a65acb0a25287205
blob + e7f728f1729206b8262ad23227da9f0d7d0dff66
--- lib/diff_patience.c
+++ lib/diff_patience.c
#include "diff_internal.h"
#include "diff_debug.h"
-/* Set unique_here = true for all atoms that exist exactly once in this list. */
-static int
-diff_atoms_mark_unique(struct diff_data *d, unsigned int *unique_count)
+int diff_atoms_qsort_compar(const void *_a, const void *_b)
{
- struct diff_atom *i;
- unsigned int count = 0;
- diff_data_foreach_atom(i, d) {
- i->patience.unique_here = true;
- i->patience.unique_in_both = true;
- count++;
+ const struct diff_atom *a = *(struct diff_atom**)_a;
+ const struct diff_atom *b = *(struct diff_atom**)_b;
+ int cmp;
+ int rc;
+
+ if (a->root->err || b->root->err) {
+ /* If atoms are from more than one diff_data, make sure the
+ * error, if any, spreads to all of them. */
+ a->root->err = rc;
+ b->root->err = rc;
+ return 0;
}
- diff_data_foreach_atom(i, d) {
- struct diff_atom *j;
- if (!i->patience.unique_here)
- continue;
+ /* Sort by the simplistic hash */
+ if (a->hash < b->hash)
+ return -1;
+ if (a->hash > b->hash)
+ return 1;
- diff_data_foreach_atom_from(i + 1, j, d) {
- bool same;
- int r = diff_atom_same(&same, i, j);
- if (r)
- return r;
- if (!same)
- continue;
- if (i->patience.unique_here) {
- i->patience.unique_here = false;
- i->patience.unique_in_both = false;
- count--;
- }
- j->patience.unique_here = false;
- j->patience.unique_in_both = false;
- count--;
- }
+ /* If hashes are the same, the lines may still differ. Do a full cmp. */
+ rc = diff_atom_cmp(&cmp, a, b);
+
+ if (rc) {
+ /* Mark the I/O error so that the caller can find out about it.
+ * For the case atoms are from more than one diff_data, mark in
+ * both. */
+ a->root->err = rc;
+ if (a->root != b->root)
+ b->root->err = rc;
+ return 0;
}
- if (unique_count)
- *unique_count = count;
- return 0;
+
+ return cmp;
}
-/* Mark those lines as atom->patience.unique_in_both = true that appear exactly
- * once in each side. */
-static int
-diff_atoms_mark_unique_in_both(struct diff_data *left, struct diff_data *right,
- unsigned int *unique_in_both_count)
+/* Sort an array of struct diff_atom* in-place. */
+static int diff_atoms_qsort(struct diff_atom *atoms[],
+ size_t atoms_count)
{
- /* Derive the final unique_in_both count without needing an explicit
- * iteration. So this is just some optimiziation to save one iteration
- * in the end. */
- unsigned int unique_in_both;
- int r;
+ qsort(atoms, atoms_count, sizeof(struct diff_atom*),
+ diff_atoms_qsort_compar);
+ return atoms[0]->root->err;
+}
- r = diff_atoms_mark_unique(left, &unique_in_both);
- if (r)
- return r;
- r = diff_atoms_mark_unique(right, NULL);
- if (r)
- return r;
-
- debug("unique_in_both %u\n", unique_in_both);
-
- struct diff_atom *i;
- diff_data_foreach_atom(i, left) {
- if (!i->patience.unique_here)
- continue;
- struct diff_atom *j;
- int found_in_b = 0;
- diff_data_foreach_atom(j, right) {
- bool same;
- int r = diff_atom_same(&same, i, j);
- if (r)
- return r;
- if (!same)
- continue;
- if (!j->patience.unique_here) {
- found_in_b = 2; /* or more */
+static int
+diff_atoms_mark_unique_in_both_by_qsort(struct diff_data *left,
+ struct diff_data *right,
+ unsigned int *unique_in_both_count_p)
+{
+ struct diff_atom *a;
+ struct diff_atom *b;
+ struct diff_atom **all_atoms =
+ malloc((left->atoms.len + right->atoms.len)
+ * sizeof(struct diff_atom*));
+ unsigned int len = 0;
+ unsigned int i;
+ unsigned int unique_in_both_count = 0;
+ int rc;
+ left->root->err = 0;
+ right->root->err = 0;
+ diff_data_foreach_atom(a, left) {
+ all_atoms[len++] = a;
+ }
+ diff_data_foreach_atom(b, right) {
+ all_atoms[len++] = b;
+ }
+
+ rc = diff_atoms_qsort(all_atoms, len);
+ if (rc)
+ goto free_and_exit;
+
+ /* Now we have a sorted array of atom pointers. All similar lines are
+ * adjacent. Walk through the array and mark those that are unique on
+ * each side, but exist once in both sources. */
+ for (i = 0; i < len; i++) {
+ bool same;
+ unsigned int j;
+ unsigned int count_first_side = 1;
+ unsigned int count_other_side = 0;
+ a = all_atoms[i];
+
+ for (j = i+1; j < len; j++) {
+ b = all_atoms[j];
+ rc = diff_atom_same(&same, a, b);
+ if (rc)
+ goto free_and_exit;
+ if (!same)
break;
- } else {
- found_in_b = 1;
- j->patience.pos_in_other = i;
- i->patience.pos_in_other = j;
- }
+ /* A following atom is the same. See on which side the
+ * repetition counts. */
+ if (a->root == b->root)
+ count_first_side ++;
+ else
+ count_other_side ++;
}
- if (found_in_b == 0 || found_in_b > 1) {
- i->patience.unique_in_both = false;
- unique_in_both--;
- debug("unique_in_both %u (%d) ", unique_in_both,
- found_in_b);
- debug_dump_atom(left, NULL, i);
+ /* Counted a section of similar atoms, put the results back to
+ * the atoms. */
+ if ((count_first_side == 1)
+ && (count_other_side == 1)) {
+ b = all_atoms[i+1];
+ a->patience.unique_in_both = true;
+ a->patience.pos_in_other = b;
+ b->patience.unique_in_both = true;
+ b->patience.pos_in_other = a;
+ unique_in_both_count++;
}
}
-
- /* Still need to unmark right[*]->patience.unique_in_both for atoms that
- * don't exist in left */
- diff_data_foreach_atom(i, right) {
- if (!i->patience.unique_here
- || !i->patience.unique_in_both)
- continue;
- struct diff_atom *j;
- bool found_in_a = false;
- diff_data_foreach_atom(j, left) {
- bool same;
- int r;
- if (!j->patience.unique_in_both)
- continue;
- r = diff_atom_same(&same, i, j);
- if (r)
- return r;
- if (!same)
- continue;
- found_in_a = true;
- break;
- }
-
- if (!found_in_a)
- i->patience.unique_in_both = false;
- }
-
- if (unique_in_both_count)
- *unique_in_both_count = unique_in_both;
- return 0;
+ *unique_in_both_count_p = unique_in_both_count;
+ rc = 0;
+free_and_exit:
+ free(all_atoms);
+ return rc;
}
static int
/* Find those lines that appear exactly once in 'left' and exactly once
* in 'right'. */
- rc = diff_atoms_mark_unique_in_both(left, right, &unique_in_both_count);
+ rc = diff_atoms_mark_unique_in_both_by_qsort(left, right,
+ &unique_in_both_count);
if (rc)
return rc;