commit - 9f9e0ab43b25a75c30b677d0642dddfe868f4f34
commit + a5de2633143c41b5d79b996d2b8f64df0808eaeb
blob - c74bcf7a8413d667ee4804a41a38fffdbcf1f714
blob + 0c983c45b9f23e0e1e04094533004da12a66192a
--- lib/diff_patience.c
+++ lib/diff_patience.c
/* Implementation of the Patience Diff algorithm invented by Bram Cohen:
- * Divide a diff problem into smaller chunks by an LCS of common-unique lines. */
+ * Divide a diff problem into smaller chunks by an LCS (Longest Common Sequence)
+ * of common-unique lines. */
/*
* Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
*
/* Swallow upwards.
* Each common-unique line swallows identical lines upwards and
* downwards.
- * All common-unique lines that were part of the identical lines
- * following below were already swallowed in the previous
- * iteration, so we will never hit another common-unique line
- * above. */
+ * Will never hit another identical common-unique line above on
+ * the left, because those would already have swallowed this
+ * common-unique line in a previous iteration.
+ */
for (identical_l.start = l_idx, identical_r.start = r_idx;
identical_l.start > l_min && identical_r.start > r_min;
identical_l.start--, identical_r.start--) {
break;
}
- /* Swallow downwards */
+ /* Swallow downwards. Join any common-unique lines in a block of
+ * matching L/R lines with this one. */
for (identical_l.end = l_idx + 1, identical_r.end = r_idx + 1;
identical_l.end < left->atoms.len
&& identical_r.end < right->atoms.len;
r_end = &right->atoms.head[identical_r.end];
if (!PATIENCE(l_end).unique_in_both)
continue;
- /* Part of a chunk of identical lines, remove from
- * listing of unique_in_both lines */
+ /* A unique_in_both atom is joined with a preceding
+ * unique_in_both atom, remove the joined atom from
+ * listing of unique_in_both atoms */
PATIENCE(l_end).unique_in_both = false;
PATIENCE(r_end).unique_in_both = false;
(*unique_in_both_count)--;
* look at each section in turn, trying to again find common-unique
* lines in those smaller sections. As soon as no more are found, the
* remaining smaller sections are solved by Myers. */
+ /* left_pos and right_pos are indexes in left/right->atoms.head until
+ * which the atoms are already handled (added to result chunks). */
unsigned int left_pos = 0;
unsigned int right_pos = 0;
for (i = 0; i <= lcs_count; i++) {
struct diff_atom *atom;
struct diff_atom *atom_r;
+ /* left_idx and right_idx are indexes of the start of this
+ * section of identical lines on both sides.
+ * left_pos marks the index of the first still unhandled line,
+ * left_idx is the start of an identical section some way
+ * further down, and this loop adds an unsolved chunk of
+ * [left_pos..left_idx[ and a solved chunk of
+ * [left_idx..identical_lines.end[. */
unsigned int left_idx;
unsigned int right_idx;
PATIENCE(atom).identical_lines.start, PATIENCE(atom).identical_lines.end,
PATIENCE(atom_r).identical_lines.start, PATIENCE(atom_r).identical_lines.end);
} else {
+ /* There are no more identical lines until the end of
+ * left and right. */
atom = NULL;
atom_r = NULL;
left_idx = left->atoms.len;
right_idx = right->atoms.len;
}
- /* 'atom' now marks an atom that matches on both sides according
- * to patience-diff (a common-unique identical atom in both
- * files).
+ /* 'atom' (if not NULL) now marks an atom that matches on both
+ * sides according to patience-diff (a common-unique identical
+ * atom in both files).
* Handle the section before and the atom itself; the section
* after will be handled by the next loop iteration -- note that
* i loops to last element + 1 ("i <= lcs_count"), so that there
* will be another final iteration to pick up the last remaining
* items after the last LCS atom.
- * The sections before might also be empty on left and/or right.
- * left_pos and right_pos mark the indexes of the first atoms
- * that have not yet been handled in the previous loop
- * iteration. left_idx and right_idx mark the indexes of the
- * matching atom on left and right, respectively. */
+ */
debug("iteration %u left_pos %u left_idx %u"
" right_pos %u right_idx %u\n",