1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
69 #include <sys/queue.h>
87 #include "got_error.h"
88 #include "got_object.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
97 * diff - compare two files.
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
175 * The following struct is used to record change information when
176 * doing a "context" or "unified" diff. (see routine "change" to
177 * understand the highly mnemonic field names)
180 int a; /* start line in old file */
181 int b; /* end line in old file */
182 int c; /* start line in new file */
183 int d; /* end line in new file */
186 static void diff_output(FILE *, const char *, ...);
187 static int output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
188 static void check(struct got_diff_state *, FILE *, FILE *, int);
189 static void uni_range(FILE *, int, int);
190 static void dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
191 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
192 static void prune(struct got_diff_state *);
193 static void equiv(struct line *, int, struct line *, int, int *);
194 static void unravel(struct got_diff_state *, int);
195 static int unsort(struct line *, int, int *);
196 static int change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
197 static void sort(struct line *, int);
198 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
199 static int asciifile(FILE *);
200 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
201 static int newcand(struct got_diff_state *, int, int, int, int *);
202 static int search(struct got_diff_state *, int *, int, int);
203 static int skipline(FILE *);
204 static int isqrt(int);
205 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
206 static int readhash(struct got_diff_state *, FILE *, int);
207 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
208 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
211 * chrtran points to one of 2 translation tables: cup2low if folding upper to
212 * lower case clow2low if not folding case
214 u_char clow2low[256] = {
215 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
216 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
217 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
218 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
219 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
220 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
221 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
222 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
223 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
224 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
225 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
226 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
227 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
228 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
229 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
230 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
231 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
232 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
233 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
234 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
235 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
236 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
237 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
241 u_char cup2low[256] = {
242 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
243 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
244 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
245 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
246 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
247 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
248 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
249 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
250 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
251 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
252 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
253 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
254 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
255 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
256 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
257 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
258 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
259 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
260 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
261 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
262 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
263 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
264 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
269 diff_output(FILE *outfile, const char *fmt, ...)
274 vfprintf(outfile, fmt, ap);
278 const struct got_error *
279 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
280 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
282 const struct got_error *err = NULL;
289 ds->lastmatchline = 0;
290 ds->context_vec_ptr = ds->context_vec_start - 1;
291 ds->max_context = 64;
292 if (flags & D_IGNORECASE)
293 ds->chrtran = cup2low;
295 ds->chrtran = clow2low;
296 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
297 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
300 if (flags & D_EMPTY1) {
301 f1 = fopen(_PATH_DEVNULL, "r");
303 err = got_error_from_errno();
307 else if (f1 == NULL) {
312 if (flags & D_EMPTY2) {
313 f2 = fopen(_PATH_DEVNULL, "r");
315 err = got_error_from_errno();
318 } else if (f2 == NULL) {
323 switch (files_differ(ds, f1, f2, flags)) {
334 if ((flags & D_FORCEASCII) == 0 &&
335 (!asciifile(f1) || !asciifile(f2))) {
340 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
341 err = got_error_from_errno();
344 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
345 err = got_error_from_errno();
350 sort(ds->sfile[0], ds->slen[0]);
351 sort(ds->sfile[1], ds->slen[1]);
353 ds->member = (int *)ds->file[1];
354 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
355 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
357 err = got_error_from_errno();
362 ds->class = (int *)ds->file[0];
363 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
364 err = got_error_from_errno();
367 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
369 err = got_error_from_errno();
374 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
375 if (ds->klist == NULL) {
376 err = got_error_from_errno();
381 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
382 if (ds->clist == NULL) {
383 err = got_error_from_errno();
386 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
388 err = got_error_from_errno();
392 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
394 err = got_error_from_errno();
398 unravel(ds, ds->klist[i]);
400 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
402 err = got_error_from_errno();
406 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
408 err = got_error_from_errno();
412 check(ds, f1, f2, flags);
413 if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2,
415 err = got_error_from_errno();
438 * Check to see if the given files differ.
439 * Returns 0 if they are the same, 1 if different, and -1 on error.
440 * XXX - could use code from cmp(1) [faster]
443 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
445 char buf1[BUFSIZ], buf2[BUFSIZ];
448 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
449 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
452 i = fread(buf1, 1, sizeof(buf1), f1);
453 j = fread(buf2, 1, sizeof(buf2), f2);
454 if ((!i && ferror(f1)) || (!j && ferror(f2)))
460 if (memcmp(buf1, buf2, i) != 0)
466 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
474 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
478 p = calloc(sz + 3, sizeof(*p));
481 for (j = 0; (h = readhash(ds, fd, flags));) {
484 q = reallocarray(p, sz + 3, sizeof(*p));
500 prune(struct got_diff_state *ds)
504 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
505 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
508 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
509 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
512 for (j = 0; j < 2; j++) {
513 ds->sfile[j] = ds->file[j] + ds->pref;
514 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
515 for (i = 0; i <= ds->slen[j]; i++)
516 ds->sfile[j][i].serial = i;
521 equiv(struct line *a, int n, struct line *b, int m, int *c)
526 while (i <= n && j <= m) {
527 if (a[i].value < b[j].value)
529 else if (a[i].value == b[j].value)
540 while (b[j + 1].value == b[j].value) {
548 /* Code taken from ping.c */
557 do { /* newton was a stinker */
562 } while ((x - y) > 1 || (x - y) < -1);
568 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
571 int oldc, tc, oldl, sq;
572 u_int numtries, bound;
575 if (flags & D_MINIMAL)
579 bound = MAXIMUM(256, sq);
583 c[0] = newcand(ds, 0, 0, 0, &error);
586 for (i = 1; i <= n; i++) {
595 if (y <= ds->clist[oldc].y)
597 l = search(ds, c, k, y);
601 if (ds->clist[c[l]].y <= y)
604 c[l] = newcand(ds, i, y, oldc, &error);
611 c[l] = newcand(ds, i, y, oldc, &error);
617 } while ((y = b[++j]) > 0 && numtries < bound);
623 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
627 if (ds->clen == ds->clistlen) {
628 ds->clistlen = ds->clistlen * 11 / 10;
629 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
638 q = ds->clist + ds->clen;
647 search(struct got_diff_state *ds, int *c, int k, int y)
651 if (ds->clist[c[k]].y < y) /* quick look for typical case */
659 t = ds->clist[c[l]].y;
671 unravel(struct got_diff_state *ds, int p)
676 for (i = 0; i <= ds->len[0]; i++)
677 ds->J[i] = i <= ds->pref ? i :
678 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
679 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
680 ds->J[q->x + ds->pref] = q->y + ds->pref;
684 * Check does double duty:
685 * 1. ferret out any fortuitous correspondences due
686 * to confounding by hashing (which result in "jackpot")
687 * 2. collect random access indexes to the two files
690 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
692 int i, j, jackpot, c, d;
698 ds->ixold[0] = ds->ixnew[0] = 0;
701 for (i = 1; i <= ds->len[0]; i++) {
703 ds->ixold[i] = ctold += skipline(f1);
706 while (j < ds->J[i]) {
707 ds->ixnew[j] = ctnew += skipline(f2);
710 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
715 * GNU diff ignores a missing newline
716 * in one file for -b or -w.
718 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
719 if (c == EOF && d == '\n') {
722 } else if (c == '\n' && d == EOF) {
729 if ((flags & D_FOLDBLANKS) && isspace(c) &&
735 } while (isspace(c = getc(f1)));
740 } while (isspace(d = getc(f2)));
741 } else if ((flags & D_IGNOREBLANKS)) {
742 while (isspace(c) && c != '\n') {
746 while (isspace(d) && d != '\n') {
751 if (ds->chrtran[c] != ds->chrtran[d]) {
754 if (c != '\n' && c != EOF)
755 ctold += skipline(f1);
756 if (d != '\n' && c != EOF)
757 ctnew += skipline(f2);
760 if (c == '\n' || c == EOF)
767 if ((c = getc(f1)) != (d = getc(f2))) {
770 if (c != '\n' && c != EOF)
771 ctold += skipline(f1);
772 if (d != '\n' && c != EOF)
773 ctnew += skipline(f2);
776 if (c == '\n' || c == EOF)
780 ds->ixold[i] = ctold;
781 ds->ixnew[j] = ctnew;
784 for (; j <= ds->len[1]; j++)
785 ds->ixnew[j] = ctnew += skipline(f2);
788 * fprintf(stderr, "jackpot\n");
792 /* shellsort CACM #201 */
794 sort(struct line *a, int n)
796 struct line *ai, *aim, w;
801 for (j = 1; j <= n; j *= 2)
803 for (m /= 2; m != 0; m /= 2) {
805 for (j = 1; j <= k; j++) {
806 for (ai = &a[j]; ai > a; ai -= m) {
809 break; /* wraparound */
810 if (aim->value > ai[0].value ||
811 (aim->value == ai[0].value &&
812 aim->serial > ai[0].serial))
814 w.value = ai[0].value;
815 ai[0].value = aim->value;
816 aim->value = w.value;
817 w.serial = ai[0].serial;
818 ai[0].serial = aim->serial;
819 aim->serial = w.serial;
826 unsort(struct line *f, int l, int *b)
830 a = calloc(l + 1, sizeof(*a));
833 for (i = 1; i <= l; i++)
834 a[f[i].serial] = f[i].value;
835 for (i = 1; i <= l; i++)
847 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
853 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
854 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
856 int m, i0, i1, j0, j1;
863 ds->J[m + 1] = ds->len[1] + 1;
864 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
865 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
867 j0 = ds->J[i0 - 1] + 1;
869 while (i1 < m && ds->J[i1 + 1] == 0)
871 j1 = ds->J[i1 + 1] - 1;
873 error = change(outfile, ds, args, file1, f1, file2, f2,
874 i0, i1, j0, j1, &flags);
879 error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0,
880 1, ds->len[1], &flags);
884 if (ds->anychange != 0)
885 dump_unified_vec(outfile, ds, args, f1, f2, flags);
891 uni_range(FILE *outfile, int a, int b)
894 diff_output(outfile, "%d,%d", a, b - a + 1);
896 diff_output(outfile, "%d", b);
898 diff_output(outfile, "%d,0", b);
902 * Indicate that there is a difference between lines a and b of the from file
903 * to get to lines c to d of the to file. If a is greater then b then there
904 * are no lines in the from file involved and this means that there were
905 * lines appended (beginning at b). If c is greater than d then there are
906 * lines missing from the to file.
909 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
910 const char *file1, FILE *f1, const char *file2, FILE *f2,
911 int a, int b, int c, int d, int *pflags)
918 if (*pflags & D_HEADER) {
919 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
920 *pflags &= ~D_HEADER;
922 if (args->diff_format == D_UNIFIED) {
924 * Allocate change records as needed.
926 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
927 struct context_vec *cvp;
929 offset = ds->context_vec_ptr - ds->context_vec_start;
930 ds->max_context <<= 1;
931 ds->context_vec_start =
932 cvp = reallocarray(ds->context_vec_start,
933 ds->max_context, sizeof(*ds->context_vec_start));
935 free(ds->context_vec_start);
938 ds->context_vec_start = cvp;
939 ds->context_vec_end = ds->context_vec_start +
941 ds->context_vec_ptr = ds->context_vec_start + offset;
943 if (ds->anychange == 0) {
945 * Print the context/unidiff header first time through.
947 print_header(outfile, ds, args, file1, file2);
949 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
950 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
952 * If this change is more than 'diff_context' lines from the
953 * previous change, dump the record and reset it.
955 dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
957 ds->context_vec_ptr++;
958 ds->context_vec_ptr->a = a;
959 ds->context_vec_ptr->b = b;
960 ds->context_vec_ptr->c = c;
961 ds->context_vec_ptr->d = d;
964 if (ds->anychange == 0)
966 if (args->diff_format == D_BRIEF)
968 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, '\0', 0, *pflags);
973 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
974 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
976 int i, j, c, lastc, col, nc;
980 for (i = a; i <= b; i++) {
981 fseek(lb, f[i - 1], SEEK_SET);
982 nc = f[i] - f[i - 1];
984 diff_output(outfile, "%c", ch);
985 if (args->Tflag && args->diff_format == D_UNIFIED)
986 diff_output(outfile, "\t");
987 else if (args->diff_format != D_UNIFIED)
988 diff_output(outfile, " ");
991 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
992 if ((c = getc(lb)) == EOF) {
993 diff_output(outfile, "\n\\ No newline at end of "
997 if (c == '\t' && (flags & D_EXPANDTABS)) {
999 diff_output(outfile, " ");
1000 } while (++col & 7);
1002 diff_output(outfile, "%c", c);
1011 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1014 readhash(struct got_diff_state *ds, FILE *f, int flags)
1021 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1022 if (flags & D_IGNORECASE)
1023 for (i = 0; (t = getc(f)) != '\n'; i++) {
1029 sum = sum * 127 + ds->chrtran[t];
1032 for (i = 0; (t = getc(f)) != '\n'; i++) {
1038 sum = sum * 127 + t;
1042 switch (t = getc(f)) {
1051 if (space && (flags & D_IGNOREBLANKS) == 0) {
1055 sum = sum * 127 + ds->chrtran[t];
1069 * There is a remote possibility that we end up with a zero sum.
1070 * Zero is used as an EOF marker, so return 1 instead.
1072 return (sum == 0 ? 1 : sum);
1078 unsigned char buf[BUFSIZ];
1085 cnt = fread(buf, 1, sizeof(buf), f);
1086 return (memchr(buf, '\0', cnt) == NULL);
1089 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1092 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1094 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1096 int last = ds->lastline;
1100 while (pos > last) {
1101 fseek(fp, f[pos - 1], SEEK_SET);
1102 nc = f[pos] - f[pos - 1];
1103 if (nc >= sizeof(buf))
1104 nc = sizeof(buf) - 1;
1105 nc = fread(buf, 1, nc, fp);
1108 buf[strcspn(buf, "\n")] = '\0';
1109 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1110 if (begins_with(buf, "private:")) {
1112 state = " (private)";
1113 } else if (begins_with(buf, "protected:")) {
1115 state = " (protected)";
1116 } else if (begins_with(buf, "public:")) {
1118 state = " (public)";
1120 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1122 strlcat(ds->lastbuf, state,
1123 sizeof ds->lastbuf);
1124 ds->lastmatchline = pos;
1131 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1134 /* dump accumulated "unified" diff changes */
1136 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1137 FILE *f1, FILE *f2, int flags)
1139 struct context_vec *cvp = ds->context_vec_start;
1140 int lowa, upb, lowc, upd;
1144 if (ds->context_vec_start > ds->context_vec_ptr)
1147 b = d = 0; /* gcc */
1148 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1149 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1150 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1151 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1153 diff_output(outfile, "@@ -");
1154 uni_range(outfile, lowa, upb);
1155 diff_output(outfile, " +");
1156 uni_range(outfile, lowc, upd);
1157 diff_output(outfile, " @@");
1158 if ((flags & D_PROTOTYPE)) {
1159 f = match_function(ds, ds->ixold, lowa-1, f1);
1161 diff_output(outfile, " %s", f);
1163 diff_output(outfile, "\n");
1166 * Output changes in "unified" diff format--the old and new lines
1167 * are printed together.
1169 for (; cvp <= ds->context_vec_ptr; cvp++) {
1176 * c: both new and old changes
1177 * d: only changes in the old file
1178 * a: only changes in the new file
1180 if (a <= b && c <= d)
1183 ch = (a <= b) ? 'd' : 'a';
1187 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1188 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1189 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1192 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1193 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1196 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1197 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1203 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1205 ds->context_vec_ptr = ds->context_vec_start - 1;
1209 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1210 const char *file1, const char *file2)
1212 if (args->label[0] != NULL)
1213 diff_output(outfile, "--- %s\n", args->label[0]);
1215 diff_output(outfile, "--- %s\t%s", file1,
1216 ctime(&ds->stb1.st_mtime));
1217 if (args->label[1] != NULL)
1218 diff_output(outfile, "+++ %s\n", args->label[1]);
1220 diff_output(outfile, "+++ %s\t%s", file2,
1221 ctime(&ds->stb2.st_mtime));