1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
67 #include <sys/types.h>
70 #include <sys/queue.h>
89 #include "got_error.h"
90 #include "got_object.h"
93 #include "got_lib_diff.h"
95 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
96 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
99 * diff - compare two files.
103 * Uses an algorithm due to Harold Stone, which finds
104 * a pair of longest identical subsequences in the two
107 * The major goal is to generate the match vector J.
108 * J[i] is the index of the line in file1 corresponding
109 * to line i file0. J[i] = 0 if there is no
110 * such line in file1.
112 * Lines are hashed so as to work in core. All potential
113 * matches are located by sorting the lines of each file
114 * on the hash (called ``value''). In particular, this
115 * collects the equivalence classes in file1 together.
116 * Subroutine equiv replaces the value of each line in
117 * file0 by the index of the first element of its
118 * matching equivalence in (the reordered) file1.
119 * To save space equiv squeezes file1 into a single
120 * array member in which the equivalence classes
121 * are simply concatenated, except that their first
122 * members are flagged by changing sign.
124 * Next the indices that point into member are unsorted into
125 * array class according to the original order of file0.
127 * The cleverness lies in routine stone. This marches
128 * through the lines of file0, developing a vector klist
129 * of "k-candidates". At step i a k-candidate is a matched
130 * pair of lines x,y (x in file0 y in file1) such that
131 * there is a common subsequence of length k
132 * between the first i lines of file0 and the first y
133 * lines of file1, but there is no such subsequence for
134 * any smaller y. x is the earliest possible mate to y
135 * that occurs in such a subsequence.
137 * Whenever any of the members of the equivalence class of
138 * lines in file1 matable to a line in file0 has serial number
139 * less than the y of some k-candidate, that k-candidate
140 * with the smallest such y is replaced. The new
141 * k-candidate is chained (via pred) to the current
142 * k-1 candidate so that the actual subsequence can
143 * be recovered. When a member has serial number greater
144 * that the y of all k-candidates, the klist is extended.
145 * At the end, the longest subsequence is pulled out
146 * and placed in the array J by unravel
148 * With J in hand, the matches there recorded are
149 * check'ed against reality to assure that no spurious
150 * matches have crept in due to hashing. If they have,
151 * they are broken, and "jackpot" is recorded--a harmless
152 * matter except that a true match for a spuriously
153 * mated line may now be unnecessarily reported as a change.
155 * Much of the complexity of the program comes simply
156 * from trying to minimize core utilization and
157 * maximize the range of doable problems by dynamically
158 * allocating what is needed and reusing what is not.
159 * The core requirements for problems larger than somewhat
160 * are (in words) 2*length(file0) + length(file1) +
161 * 3*(number of k-candidates installed), typically about
162 * 6n words for files of length n.
176 static void diff_output(FILE *, const char *, ...);
177 static int output(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
178 static void check(struct got_diff_state *, FILE *, FILE *, int);
179 static void range(FILE *, int, int, char *);
180 static void uni_range(FILE *, int, int);
181 static void dump_unified_vec(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
182 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
183 static void prune(struct got_diff_state *);
184 static void equiv(struct line *, int, struct line *, int, int *);
185 static void unravel(struct got_diff_state *, int);
186 static int unsort(struct line *, int, int *);
187 static int change(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
188 static void sort(struct line *, int);
189 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
190 static int asciifile(FILE *);
191 static void fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int);
192 static int newcand(struct got_diff_state *, int, int, int, int *);
193 static int search(struct got_diff_state *, int *, int, int);
194 static int skipline(FILE *);
195 static int isqrt(int);
196 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
197 static int readhash(struct got_diff_state *, FILE *, int);
198 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
199 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
202 * chrtran points to one of 2 translation tables: cup2low if folding upper to
203 * lower case clow2low if not folding case
205 u_char clow2low[256] = {
206 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
207 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
208 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
209 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
210 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
211 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
212 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
213 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
214 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
215 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
216 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
217 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
218 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
219 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
220 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
221 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
222 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
223 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
224 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
225 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
226 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
227 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
228 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
232 u_char cup2low[256] = {
233 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
234 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
235 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
236 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
237 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
238 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
239 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
240 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
241 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
242 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
243 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
244 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
245 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
246 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
247 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
248 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
249 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
250 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
251 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
252 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
253 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
254 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
255 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
260 diff_output(FILE *outfile, const char *fmt, ...)
268 vfprintf(outfile, fmt, ap);
273 got_diff_state_free(struct got_diff_state *ds)
284 const struct got_error *
285 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
286 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile,
287 struct got_diff_changes *changes)
289 const struct got_error *err = NULL;
296 ds->lastmatchline = 0;
297 ds->context_vec_ptr = ds->context_vec_start - 1;
298 ds->max_context = GOT_DIFF_MAX_CONTEXT;
299 if (flags & D_IGNORECASE)
300 ds->chrtran = cup2low;
302 ds->chrtran = clow2low;
303 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
304 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
307 if (flags & D_EMPTY1) {
308 f1 = fopen(_PATH_DEVNULL, "r");
310 err = got_error_from_errno2("fopen", _PATH_DEVNULL);
314 else if (f1 == NULL) {
319 if (flags & D_EMPTY2) {
320 f2 = fopen(_PATH_DEVNULL, "r");
322 err = got_error_from_errno2("fopen", _PATH_DEVNULL);
325 } else if (f2 == NULL) {
330 switch (files_differ(ds, f1, f2, flags)) {
341 if ((flags & D_FORCEASCII) == 0 &&
342 (!asciifile(f1) || !asciifile(f2))) {
347 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
348 err = got_error_from_errno("prepare");
351 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
352 err = got_error_from_errno("prepare");
357 sort(ds->sfile[0], ds->slen[0]);
358 sort(ds->sfile[1], ds->slen[1]);
360 ds->member = (int *)ds->file[1];
361 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
362 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
364 err = got_error_from_errno("reallocarray");
369 ds->class = (int *)ds->file[0];
370 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
371 err = got_error_from_errno("unsort");
374 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
376 err = got_error_from_errno("reallocarray");
381 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
382 if (ds->klist == NULL) {
383 err = got_error_from_errno("calloc");
388 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
389 if (ds->clist == NULL) {
390 err = got_error_from_errno("calloc");
393 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
395 err = got_error_from_errno("stone");
399 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
401 err = got_error_from_errno("reallocarray");
405 unravel(ds, ds->klist[i]);
407 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
409 err = got_error_from_errno("reallocarray");
413 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
415 err = got_error_from_errno("reallocarray");
419 check(ds, f1, f2, flags);
420 if (output(outfile, changes, ds, args, args->label[0], f1,
421 args->label[1], f2, flags))
422 err = got_error_from_errno("output");
429 if ((flags & D_EMPTY1) && f1) {
430 if (fclose(f1) != 0 && err == NULL)
431 err = got_error_from_errno("fclose");
433 if ((flags & D_EMPTY2) && f2) {
434 if (fclose(f2) != 0 && err == NULL)
435 err = got_error_from_errno("fclose");
441 * Check to see if the given files differ.
442 * Returns 0 if they are the same, 1 if different, and -1 on error.
443 * XXX - could use code from cmp(1) [faster]
446 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
448 char buf1[BUFSIZ], buf2[BUFSIZ];
451 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
452 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
455 i = fread(buf1, 1, sizeof(buf1), f1);
456 j = fread(buf2, 1, sizeof(buf2), f2);
457 if ((!i && ferror(f1)) || (!j && ferror(f2)))
463 if (memcmp(buf1, buf2, i) != 0)
469 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
477 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
481 p = calloc(sz + 3, sizeof(*p));
484 for (j = 0; (h = readhash(ds, fd, flags));) {
487 q = reallocarray(p, sz + 3, sizeof(*p));
503 prune(struct got_diff_state *ds)
507 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
508 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
511 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
512 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
515 for (j = 0; j < 2; j++) {
516 ds->sfile[j] = ds->file[j] + ds->pref;
517 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
518 for (i = 0; i <= ds->slen[j]; i++)
519 ds->sfile[j][i].serial = i;
524 equiv(struct line *a, int n, struct line *b, int m, int *c)
529 while (i <= n && j <= m) {
530 if (a[i].value < b[j].value)
532 else if (a[i].value == b[j].value)
543 while (b[j + 1].value == b[j].value) {
551 /* Code taken from ping.c */
560 do { /* newton was a stinker */
565 } while ((x - y) > 1 || (x - y) < -1);
571 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
574 int oldc, tc, oldl, sq;
575 u_int numtries, bound;
578 if (flags & D_MINIMAL)
582 bound = MAXIMUM(256, sq);
586 c[0] = newcand(ds, 0, 0, 0, &error);
589 for (i = 1; i <= n; i++) {
598 if (y <= ds->clist[oldc].y)
600 l = search(ds, c, k, y);
604 if (ds->clist[c[l]].y <= y)
607 c[l] = newcand(ds, i, y, oldc, &error);
614 c[l] = newcand(ds, i, y, oldc, &error);
620 } while ((y = b[++j]) > 0 && numtries < bound);
626 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
630 if (ds->clen == ds->clistlen) {
631 ds->clistlen = ds->clistlen * 11 / 10;
632 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
641 q = ds->clist + ds->clen;
650 search(struct got_diff_state *ds, int *c, int k, int y)
654 if (ds->clist[c[k]].y < y) /* quick look for typical case */
662 t = ds->clist[c[l]].y;
674 unravel(struct got_diff_state *ds, int p)
679 for (i = 0; i <= ds->len[0]; i++)
680 ds->J[i] = i <= ds->pref ? i :
681 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
682 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
683 ds->J[q->x + ds->pref] = q->y + ds->pref;
687 * Check does double duty:
688 * 1. ferret out any fortuitous correspondences due
689 * to confounding by hashing (which result in "jackpot")
690 * 2. collect random access indexes to the two files
693 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
695 int i, j, jackpot, c, d;
701 ds->ixold[0] = ds->ixnew[0] = 0;
704 for (i = 1; i <= ds->len[0]; i++) {
706 ds->ixold[i] = ctold += skipline(f1);
709 while (j < ds->J[i]) {
710 ds->ixnew[j] = ctnew += skipline(f2);
713 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
718 * GNU diff ignores a missing newline
719 * in one file for -b or -w.
721 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
722 if (c == EOF && d == '\n') {
725 } else if (c == '\n' && d == EOF) {
732 if ((flags & D_FOLDBLANKS) && isspace(c) &&
738 } while (isspace(c = getc(f1)));
743 } while (isspace(d = getc(f2)));
744 } else if ((flags & D_IGNOREBLANKS)) {
745 while (isspace(c) && c != '\n') {
749 while (isspace(d) && d != '\n') {
754 if (ds->chrtran[c] != ds->chrtran[d]) {
757 if (c != '\n' && c != EOF)
758 ctold += skipline(f1);
759 if (d != '\n' && c != EOF)
760 ctnew += skipline(f2);
763 if (c == '\n' || c == EOF)
770 if ((c = getc(f1)) != (d = getc(f2))) {
773 if (c != '\n' && c != EOF)
774 ctold += skipline(f1);
775 if (d != '\n' && c != EOF)
776 ctnew += skipline(f2);
779 if (c == '\n' || c == EOF)
783 ds->ixold[i] = ctold;
784 ds->ixnew[j] = ctnew;
787 for (; j <= ds->len[1]; j++)
788 ds->ixnew[j] = ctnew += skipline(f2);
791 * fprintf(stderr, "jackpot\n");
795 /* shellsort CACM #201 */
797 sort(struct line *a, int n)
799 struct line *ai, *aim, w;
804 for (j = 1; j <= n; j *= 2)
806 for (m /= 2; m != 0; m /= 2) {
808 for (j = 1; j <= k; j++) {
809 for (ai = &a[j]; ai > a; ai -= m) {
812 break; /* wraparound */
813 if (aim->value > ai[0].value ||
814 (aim->value == ai[0].value &&
815 aim->serial > ai[0].serial))
817 w.value = ai[0].value;
818 ai[0].value = aim->value;
819 aim->value = w.value;
820 w.serial = ai[0].serial;
821 ai[0].serial = aim->serial;
822 aim->serial = w.serial;
829 unsort(struct line *f, int l, int *b)
833 a = calloc(l + 1, sizeof(*a));
836 for (i = 1; i <= l; i++)
837 a[f[i].serial] = f[i].value;
838 for (i = 1; i <= l; i++)
850 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
856 output(FILE *outfile, struct got_diff_changes *changes,
857 struct got_diff_state *ds, struct got_diff_args *args,
858 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
860 int m, i0, i1, j0, j1;
867 ds->J[m + 1] = ds->len[1] + 1;
868 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
869 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
871 j0 = ds->J[i0 - 1] + 1;
873 while (i1 < m && ds->J[i1 + 1] == 0)
875 j1 = ds->J[i1 + 1] - 1;
877 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
878 i0, i1, j0, j1, &flags);
883 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
884 1, 0, 1, ds->len[1], &flags);
888 if (ds->anychange != 0 && args->diff_format == D_UNIFIED)
889 dump_unified_vec(outfile, changes, ds, args, f1, f2, flags);
895 range(FILE *outfile, int a, int b, char *separator)
897 diff_output(outfile, "%d", a > b ? b : a);
899 diff_output(outfile, "%s%d", separator, b);
903 uni_range(FILE *outfile, int a, int b)
906 diff_output(outfile, "%d,%d", a, b - a + 1);
908 diff_output(outfile, "%d", b);
910 diff_output(outfile, "%d,0", b);
914 * Indicate that there is a difference between lines a and b of the from file
915 * to get to lines c to d of the to file. If a is greater then b then there
916 * are no lines in the from file involved and this means that there were
917 * lines appended (beginning at b). If c is greater than d then there are
918 * lines missing from the to file.
921 change(FILE *outfile, struct got_diff_changes *changes,
922 struct got_diff_state *ds, struct got_diff_args *args,
923 const char *file1, FILE *f1, const char *file2, FILE *f2,
924 int a, int b, int c, int d, int *pflags)
929 if (*pflags & D_HEADER) {
930 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
931 *pflags &= ~D_HEADER;
933 if (args->diff_format == D_UNIFIED) {
935 * Allocate change records as needed.
937 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
938 struct context_vec *cvp;
940 offset = ds->context_vec_ptr - ds->context_vec_start;
941 ds->max_context <<= 1;
942 cvp = reallocarray(ds->context_vec_start,
943 ds->max_context, sizeof(*ds->context_vec_start));
945 free(ds->context_vec_start);
948 ds->context_vec_start = cvp;
949 ds->context_vec_end = ds->context_vec_start +
951 ds->context_vec_ptr = ds->context_vec_start + offset;
953 if (ds->anychange == 0) {
955 * Print the context/unidiff header first time through.
957 print_header(outfile, ds, args, file1, file2);
959 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
960 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
962 * If this change is more than 'diff_context' lines from the
963 * previous change, dump the record and reset it.
965 dump_unified_vec(outfile, changes, ds, args, f1, f2,
968 ds->context_vec_ptr++;
969 ds->context_vec_ptr->a = a;
970 ds->context_vec_ptr->b = b;
971 ds->context_vec_ptr->c = c;
972 ds->context_vec_ptr->d = d;
975 if (ds->anychange == 0)
977 if (args->diff_format == D_BRIEF)
979 if (args->diff_format == D_NORMAL) {
980 range(outfile, a, b, ",");
981 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
982 range(outfile, c, d, ",");
983 diff_output(outfile, "\n");
984 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', *pflags);
985 if (a <= b && c <= d)
986 diff_output(outfile, "---\n");
988 fetch(outfile, ds, args, ds->ixnew, c, d, f2,
989 args->diff_format == D_NORMAL ? '>' : '\0', *pflags);
994 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
995 long *f, int a, int b, FILE *lb, int ch, int flags)
997 int i, j, c, col, nc;
1001 for (i = a; i <= b; i++) {
1002 fseek(lb, f[i - 1], SEEK_SET);
1003 nc = f[i] - f[i - 1];
1005 diff_output(outfile, "%c", ch);
1006 if (args->Tflag && (args->diff_format == D_UNIFIED ||
1007 args->diff_format == D_NORMAL))
1008 diff_output(outfile, "\t");
1009 else if (args->diff_format != D_UNIFIED)
1010 diff_output(outfile, " ");
1013 for (j = 0; j < nc; j++) {
1014 if ((c = getc(lb)) == EOF) {
1015 diff_output(outfile, "\n\\ No newline at end of "
1019 if (c == '\t' && (flags & D_EXPANDTABS)) {
1021 diff_output(outfile, " ");
1022 } while (++col & 7);
1024 diff_output(outfile, "%c", c);
1032 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1035 readhash(struct got_diff_state *ds, FILE *f, int flags)
1042 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1043 if (flags & D_IGNORECASE)
1044 for (i = 0; (t = getc(f)) != '\n'; i++) {
1050 sum = sum * 127 + ds->chrtran[t];
1053 for (i = 0; (t = getc(f)) != '\n'; i++) {
1059 sum = sum * 127 + t;
1063 switch (t = getc(f)) {
1072 if (space && (flags & D_IGNOREBLANKS) == 0) {
1076 sum = sum * 127 + ds->chrtran[t];
1090 * There is a remote possibility that we end up with a zero sum.
1091 * Zero is used as an EOF marker, so return 1 instead.
1093 return (sum == 0 ? 1 : sum);
1099 unsigned char buf[BUFSIZ];
1106 cnt = fread(buf, 1, sizeof(buf), f);
1107 return (memchr(buf, '\0', cnt) == NULL);
1110 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1113 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1115 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1117 int last = ds->lastline;
1121 while (pos > last) {
1122 fseek(fp, f[pos - 1], SEEK_SET);
1123 nc = f[pos] - f[pos - 1];
1124 if (nc >= sizeof(buf))
1125 nc = sizeof(buf) - 1;
1126 nc = fread(buf, 1, nc, fp);
1129 buf[strcspn(buf, "\n")] = '\0';
1130 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1131 if (begins_with(buf, "private:")) {
1133 state = " (private)";
1134 } else if (begins_with(buf, "protected:")) {
1136 state = " (protected)";
1137 } else if (begins_with(buf, "public:")) {
1139 state = " (public)";
1141 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1143 strlcat(ds->lastbuf, state,
1144 sizeof ds->lastbuf);
1145 ds->lastmatchline = pos;
1152 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1155 /* dump accumulated "unified" diff changes */
1157 dump_unified_vec(FILE *outfile, struct got_diff_changes *changes,
1158 struct got_diff_state *ds, struct got_diff_args *args,
1159 FILE *f1, FILE *f2, int flags)
1161 struct context_vec *cvp = ds->context_vec_start;
1162 int lowa, upb, lowc, upd;
1166 if (ds->context_vec_start > ds->context_vec_ptr)
1169 b = d = 0; /* gcc */
1170 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1171 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1172 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1173 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1175 diff_output(outfile, "@@ -");
1176 uni_range(outfile, lowa, upb);
1177 diff_output(outfile, " +");
1178 uni_range(outfile, lowc, upd);
1179 diff_output(outfile, " @@");
1180 if ((flags & D_PROTOTYPE)) {
1181 f = match_function(ds, ds->ixold, lowa-1, f1);
1183 diff_output(outfile, " %s", f);
1185 diff_output(outfile, "\n");
1188 * Output changes in "unified" diff format--the old and new lines
1189 * are printed together.
1191 for (; cvp <= ds->context_vec_ptr; cvp++) {
1193 struct got_diff_change *change;
1194 change = calloc(1, sizeof(*change));
1196 memcpy(&change->cv, cvp, sizeof(change->cv));
1197 SIMPLEQ_INSERT_TAIL(&changes->entries, change,
1199 changes->nchanges++;
1209 * c: both new and old changes
1210 * d: only changes in the old file
1211 * a: only changes in the new file
1213 if (a <= b && c <= d)
1216 ch = (a <= b) ? 'd' : 'a';
1220 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', flags);
1221 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', flags);
1222 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', flags);
1225 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', flags);
1226 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', flags);
1229 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', flags);
1230 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', flags);
1236 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', flags);
1238 ds->context_vec_ptr = ds->context_vec_start - 1;
1242 got_diff_dump_change(FILE *outfile, struct got_diff_change *change,
1243 struct got_diff_state *ds, struct got_diff_args *args,
1244 FILE *f1, FILE *f2, int diff_flags)
1246 ds->context_vec_ptr = &change->cv;
1247 ds->context_vec_start = &change->cv;
1248 ds->context_vec_end = &change->cv;
1250 /* XXX TODO needs error checking */
1251 dump_unified_vec(outfile, NULL, ds, args, f1, f2, diff_flags);
1255 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1256 const char *file1, const char *file2)
1258 if (args->label[0] != NULL)
1259 diff_output(outfile, "--- %s\n", args->label[0]);
1261 diff_output(outfile, "--- %s\t%s", file1,
1262 ctime(&ds->stb1.st_mtime));
1263 if (args->label[1] != NULL)
1264 diff_output(outfile, "+++ %s\n", args->label[1]);
1266 diff_output(outfile, "+++ %s\t%s", file2,
1267 ctime(&ds->stb2.st_mtime));