commit 984ca65b1b715a9fdc86bfff64e90a485d008058 from: Neels Hofmeyr date: Sun Oct 11 05:31:05 2020 UTC myers_divide: fix "inifite" looping over same box commit - 19fad31f2f7cceb234a814c262dbba796b36af97 commit + 984ca65b1b715a9fdc86bfff64e90a485d008058 blob - 8669ed2d4c4cc6546d4d8febdcaad9c408054d47 blob + 6e754fa0fcc088feab24c22e8d57cd3010845154 --- lib/diff_myers.c +++ lib/diff_myers.c @@ -217,6 +217,9 @@ diff_divide_myers_forward(bool *found_midpoint, int delta = (int)right->atoms.len - (int)left->atoms.len; int k; int x; + int prev_x; + int prev_y; + int x_before_slide; *found_midpoint = false; debug("-- %s d=%d\n", __func__, d); @@ -282,7 +285,8 @@ diff_divide_myers_forward(bool *found_midpoint, * graph: x += 1. */ int prev_k = k - 1; - int prev_x = kd_forward[prev_k]; + prev_x = kd_forward[prev_k]; + prev_y = xk_to_y(prev_x, prev_k); x = prev_x + 1; } else { /* The bottom most one. @@ -293,11 +297,12 @@ diff_divide_myers_forward(bool *found_midpoint, * (since we're deriving y from y = x - k). */ int prev_k = k + 1; - int prev_x = kd_forward[prev_k]; + prev_x = kd_forward[prev_k]; + prev_y = xk_to_y(prev_x, prev_k); x = prev_x; } - int x_before_slide = x; + x_before_slide = x; /* Slide down any snake that we might find here. */ while (x < left->atoms.len && xk_to_y(x, k) < right->atoms.len) { bool same; @@ -449,13 +454,29 @@ diff_divide_myers_forward(bool *found_midpoint, k, c, x, xk_to_y(x, k), backward_x, xc_to_y(backward_x, c, delta)); if (x >= backward_x) { - *meeting_snake = (struct diff_box){ - .left_start = x_before_slide, - .left_end = x, - .right_start = xc_to_y(x_before_slide, - c, delta), - .right_end = xk_to_y(x, k), - }; + if (x_before_slide != x) { + /* met after sliding up a mid-snake */ + *meeting_snake = (struct diff_box){ + .left_start = x_before_slide, + .left_end = x, + .right_start = xc_to_y(x_before_slide, + c, delta), + .right_end = xk_to_y(x, k), + }; + } else { + /* met after a side step, non-identical + * line. Mark that as box divider + * instead. This makes sure that + * myers_divide never returns the same + * box that came as input, avoiding + * "infinite" looping. */ + *meeting_snake = (struct diff_box){ + .left_start = prev_x, + .left_end = x, + .right_start = prev_y, + .right_end = xk_to_y(x, k), + }; + } debug("HIT x=(%u,%u) - y=(%u,%u)\n", meeting_snake->left_start, meeting_snake->right_start, @@ -506,6 +527,9 @@ diff_divide_myers_backward(bool *found_midpoint, int delta = (int)right->atoms.len - (int)left->atoms.len; int c; int x; + int prev_x; + int prev_y; + int x_before_slide; *found_midpoint = false; @@ -575,7 +599,8 @@ diff_divide_myers_backward(bool *found_midpoint, * y = x - c + delta). */ int prev_c = c - 1; - int prev_x = kd_backward[prev_c]; + prev_x = kd_backward[prev_c]; + prev_y = xc_to_y(prev_x, prev_c, delta); x = prev_x; } else { /* The bottom most one. @@ -583,7 +608,8 @@ diff_divide_myers_backward(bool *found_midpoint, * graph: x -= 1. */ int prev_c = c + 1; - int prev_x = kd_backward[prev_c]; + prev_x = kd_backward[prev_c]; + prev_y = xc_to_y(prev_x, prev_c, delta); x = prev_x - 1; } @@ -601,7 +627,7 @@ diff_divide_myers_backward(bool *found_midpoint, debug_dump_atom(right, left, &right->atoms.head[xc_to_y(x, c, delta)-1]); } - int x_before_slide = x; + x_before_slide = x; while (x > 0 && xc_to_y(x, c, delta) > 0) { bool same; int r = diff_atom_same(&same, @@ -729,12 +755,28 @@ diff_divide_myers_backward(bool *found_midpoint, k, c, forward_x, xk_to_y(forward_x, k), x, xc_to_y(x, c, delta)); if (forward_x >= x) { - *meeting_snake = (struct diff_box){ - .left_start = x, - .left_end = x_before_slide, - .right_start = xc_to_y(x, c, delta), - .right_end = xk_to_y(x_before_slide, k), - }; + if (x_before_slide != x) { + /* met after sliding down a mid-snake */ + *meeting_snake = (struct diff_box){ + .left_start = x, + .left_end = x_before_slide, + .right_start = xc_to_y(x, c, delta), + .right_end = xk_to_y(x_before_slide, k), + }; + } else { + /* met after a side step, non-identical + * line. Mark that as box divider + * instead. This makes sure that + * myers_divide never returns the same + * box that came as input, avoiding + * "infinite" looping. */ + *meeting_snake = (struct diff_box){ + .left_start = x, + .left_end = prev_x, + .right_start = xc_to_y(x, c, delta), + .right_end = prev_y, + }; + } debug("HIT x=%u,%u - y=%u,%u\n", meeting_snake->left_start, meeting_snake->right_start, @@ -974,10 +1016,11 @@ diff_algo_myers_divide(const struct diff_algo_config * /* else: left_section_len == 0 and right_section_len == 0, i.e. * nothing before the mid-snake. */ - if (mid_snake.left_end > mid_snake.left_start) { - /* The midpoint is a "snake", i.e. on a section of - * identical data on both sides: that section - * immediately becomes a solved diff chunk. */ + if (mid_snake.left_end > mid_snake.left_start + || mid_snake.right_end > mid_snake.right_start) { + /* The midpoint is a section of identical data on both + * sides, or a certain differing line: that section + * immediately becomes a solved chunk. */ debug("the mid-snake\n"); if (!diff_state_add_chunk(state, true, &left->atoms.head[mid_snake.left_start],