1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
67 #include <sys/types.h>
70 #include <sys/queue.h>
89 #include "got_error.h"
90 #include "got_object.h"
93 #include "got_lib_diff.h"
95 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
96 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
99 * diff - compare two files.
103 * Uses an algorithm due to Harold Stone, which finds
104 * a pair of longest identical subsequences in the two
107 * The major goal is to generate the match vector J.
108 * J[i] is the index of the line in file1 corresponding
109 * to line i file0. J[i] = 0 if there is no
110 * such line in file1.
112 * Lines are hashed so as to work in core. All potential
113 * matches are located by sorting the lines of each file
114 * on the hash (called ``value''). In particular, this
115 * collects the equivalence classes in file1 together.
116 * Subroutine equiv replaces the value of each line in
117 * file0 by the index of the first element of its
118 * matching equivalence in (the reordered) file1.
119 * To save space equiv squeezes file1 into a single
120 * array member in which the equivalence classes
121 * are simply concatenated, except that their first
122 * members are flagged by changing sign.
124 * Next the indices that point into member are unsorted into
125 * array class according to the original order of file0.
127 * The cleverness lies in routine stone. This marches
128 * through the lines of file0, developing a vector klist
129 * of "k-candidates". At step i a k-candidate is a matched
130 * pair of lines x,y (x in file0 y in file1) such that
131 * there is a common subsequence of length k
132 * between the first i lines of file0 and the first y
133 * lines of file1, but there is no such subsequence for
134 * any smaller y. x is the earliest possible mate to y
135 * that occurs in such a subsequence.
137 * Whenever any of the members of the equivalence class of
138 * lines in file1 matable to a line in file0 has serial number
139 * less than the y of some k-candidate, that k-candidate
140 * with the smallest such y is replaced. The new
141 * k-candidate is chained (via pred) to the current
142 * k-1 candidate so that the actual subsequence can
143 * be recovered. When a member has serial number greater
144 * that the y of all k-candidates, the klist is extended.
145 * At the end, the longest subsequence is pulled out
146 * and placed in the array J by unravel
148 * With J in hand, the matches there recorded are
149 * check'ed against reality to assure that no spurious
150 * matches have crept in due to hashing. If they have,
151 * they are broken, and "jackpot" is recorded--a harmless
152 * matter except that a true match for a spuriously
153 * mated line may now be unnecessarily reported as a change.
155 * Much of the complexity of the program comes simply
156 * from trying to minimize core utilization and
157 * maximize the range of doable problems by dynamically
158 * allocating what is needed and reusing what is not.
159 * The core requirements for problems larger than somewhat
160 * are (in words) 2*length(file0) + length(file1) +
161 * 3*(number of k-candidates installed), typically about
162 * 6n words for files of length n.
176 static void diff_output(FILE *, const char *, ...);
177 static int output(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
178 static void check(struct got_diff_state *, FILE *, FILE *, int);
179 static void range(FILE *, int, int, char *);
180 static void uni_range(FILE *, int, int);
181 static void dump_unified_vec(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
182 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
183 static void prune(struct got_diff_state *);
184 static void equiv(struct line *, int, struct line *, int, int *);
185 static void unravel(struct got_diff_state *, int);
186 static int unsort(struct line *, int, int *);
187 static int change(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
188 static void sort(struct line *, int);
189 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
190 static int asciifile(FILE *);
191 static void fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int);
192 static int newcand(struct got_diff_state *, int, int, int, int *);
193 static int search(struct got_diff_state *, int *, int, int);
194 static int skipline(FILE *);
195 static int isqrt(int);
196 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
197 static int readhash(struct got_diff_state *, FILE *, int);
198 static int files_differ(struct got_diff_state *, FILE *, FILE *);
199 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
202 * chrtran points to one of 2 translation tables: cup2low if folding upper to
203 * lower case clow2low if not folding case
205 u_char clow2low[256] = {
206 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
207 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
208 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
209 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
210 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
211 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
212 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
213 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
214 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
215 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
216 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
217 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
218 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
219 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
220 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
221 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
222 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
223 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
224 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
225 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
226 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
227 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
228 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
232 u_char cup2low[256] = {
233 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
234 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
235 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
236 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
237 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
238 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
239 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
240 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
241 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
242 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
243 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
244 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
245 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
246 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
247 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
248 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
249 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
250 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
251 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
252 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
253 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
254 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
255 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
260 diff_output(FILE *outfile, const char *fmt, ...)
268 vfprintf(outfile, fmt, ap);
273 got_diff_state_free(struct got_diff_state *ds)
284 const struct got_error *
285 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
286 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile,
287 struct got_diff_changes *changes)
289 const struct got_error *err = NULL;
296 ds->lastmatchline = 0;
297 ds->context_vec_ptr = ds->context_vec_start - 1;
298 ds->max_context = GOT_DIFF_MAX_CONTEXT;
299 if (flags & D_IGNORECASE)
300 ds->chrtran = cup2low;
302 ds->chrtran = clow2low;
303 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
304 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
307 if (!(flags & D_EMPTY1) && f1 == NULL) {
312 if (!(flags & D_EMPTY2) && f2 == NULL) {
317 switch (files_differ(ds, f1, f2)) {
328 if ((flags & D_FORCEASCII) == 0 &&
329 (!asciifile(f1) || !asciifile(f2))) {
334 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
335 err = got_error_from_errno("prepare");
338 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
339 err = got_error_from_errno("prepare");
344 sort(ds->sfile[0], ds->slen[0]);
345 sort(ds->sfile[1], ds->slen[1]);
347 ds->member = (int *)ds->file[1];
348 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
349 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
351 err = got_error_from_errno("reallocarray");
356 ds->class = (int *)ds->file[0];
357 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
358 err = got_error_from_errno("unsort");
361 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
363 err = got_error_from_errno("reallocarray");
368 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
369 if (ds->klist == NULL) {
370 err = got_error_from_errno("calloc");
375 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
376 if (ds->clist == NULL) {
377 err = got_error_from_errno("calloc");
380 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
382 err = got_error_from_errno("stone");
386 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
388 err = got_error_from_errno("reallocarray");
392 unravel(ds, ds->klist[i]);
394 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
396 err = got_error_from_errno("reallocarray");
400 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
402 err = got_error_from_errno("reallocarray");
406 check(ds, f1, f2, flags);
407 if (output(outfile, changes, ds, args, args->label[0], f1,
408 args->label[1], f2, flags))
409 err = got_error_from_errno("output");
420 * Check to see if the given files differ.
421 * Returns 0 if they are the same, 1 if different, and -1 on error.
422 * XXX - could use code from cmp(1) [faster]
425 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2)
427 char buf1[BUFSIZ], buf2[BUFSIZ];
430 if (f1 == NULL || f2 == NULL || ds->stb1.st_size != ds->stb2.st_size ||
431 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
434 i = fread(buf1, 1, sizeof(buf1), f1);
435 j = fread(buf2, 1, sizeof(buf2), f2);
436 if ((!i && ferror(f1)) || (!j && ferror(f2)))
442 if (memcmp(buf1, buf2, i) != 0)
448 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
457 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
461 p = calloc(sz + 3, sizeof(*p));
464 for (j = 0; fd != NULL && (h = readhash(ds, fd, flags));) {
467 q = reallocarray(p, sz + 3, sizeof(*p));
483 prune(struct got_diff_state *ds)
487 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
488 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
491 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
492 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
495 for (j = 0; j < 2; j++) {
496 ds->sfile[j] = ds->file[j] + ds->pref;
497 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
498 for (i = 0; i <= ds->slen[j]; i++)
499 ds->sfile[j][i].serial = i;
504 equiv(struct line *a, int n, struct line *b, int m, int *c)
509 while (i <= n && j <= m) {
510 if (a[i].value < b[j].value)
512 else if (a[i].value == b[j].value)
523 while (b[j + 1].value == b[j].value) {
531 /* Code taken from ping.c */
540 do { /* newton was a stinker */
545 } while ((x - y) > 1 || (x - y) < -1);
551 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
554 int oldc, tc, oldl, sq;
555 u_int numtries, bound;
558 if (flags & D_MINIMAL)
562 bound = MAXIMUM(256, sq);
566 c[0] = newcand(ds, 0, 0, 0, &error);
569 for (i = 1; i <= n; i++) {
578 if (y <= ds->clist[oldc].y)
580 l = search(ds, c, k, y);
584 if (ds->clist[c[l]].y <= y)
587 c[l] = newcand(ds, i, y, oldc, &error);
594 c[l] = newcand(ds, i, y, oldc, &error);
600 } while ((y = b[++j]) > 0 && numtries < bound);
606 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
610 if (ds->clen == ds->clistlen) {
611 ds->clistlen = ds->clistlen * 11 / 10;
612 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
621 q = ds->clist + ds->clen;
630 search(struct got_diff_state *ds, int *c, int k, int y)
634 if (ds->clist[c[k]].y < y) /* quick look for typical case */
642 t = ds->clist[c[l]].y;
654 unravel(struct got_diff_state *ds, int p)
659 for (i = 0; i <= ds->len[0]; i++)
660 ds->J[i] = i <= ds->pref ? i :
661 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
662 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
663 ds->J[q->x + ds->pref] = q->y + ds->pref;
667 * Check does double duty:
668 * 1. ferret out any fortuitous correspondences due
669 * to confounding by hashing (which result in "jackpot")
670 * 2. collect random access indexes to the two files
673 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
675 int i, j, jackpot, c, d;
683 ds->ixold[0] = ds->ixnew[0] = 0;
686 for (i = 1; i <= ds->len[0]; i++) {
688 ds->ixold[i] = ctold += skipline(f1);
691 while (j < ds->J[i]) {
692 ds->ixnew[j] = ctnew += skipline(f2);
695 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
697 c = (f1 == NULL ? EOF : getc(f1));
698 d = (f2 == NULL ? EOF : getc(f2));
700 * GNU diff ignores a missing newline
701 * in one file for -b or -w.
703 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
704 if (c == EOF && d == '\n') {
707 } else if (c == '\n' && d == EOF) {
714 if ((flags & D_FOLDBLANKS) && isspace(c) &&
720 } while (f1 && isspace(c = getc(f1)));
725 } while (f2 && isspace(d = getc(f2)));
726 } else if ((flags & D_IGNOREBLANKS)) {
727 while (f1 && isspace(c) && c != '\n') {
731 while (f2 && isspace(d) && d != '\n') {
736 if (ds->chrtran[c] != ds->chrtran[d]) {
739 if (c != '\n' && c != EOF)
740 ctold += skipline(f1);
741 if (d != '\n' && c != EOF)
742 ctnew += skipline(f2);
745 if (c == '\n' || c == EOF)
752 c = (f1 == NULL ? EOF : getc(f1));
753 d = (f2 == NULL ? EOF : getc(f2));
757 if (c != '\n' && c != EOF)
758 ctold += skipline(f1);
759 if (d != '\n' && c != EOF)
760 ctnew += skipline(f2);
763 if (c == '\n' || c == EOF)
767 ds->ixold[i] = ctold;
768 ds->ixnew[j] = ctnew;
771 for (; j <= ds->len[1]; j++)
772 ds->ixnew[j] = ctnew += skipline(f2);
775 * fprintf(stderr, "jackpot\n");
779 /* shellsort CACM #201 */
781 sort(struct line *a, int n)
783 struct line *ai, *aim, w;
788 for (j = 1; j <= n; j *= 2)
790 for (m /= 2; m != 0; m /= 2) {
792 for (j = 1; j <= k; j++) {
793 for (ai = &a[j]; ai > a; ai -= m) {
796 break; /* wraparound */
797 if (aim->value > ai[0].value ||
798 (aim->value == ai[0].value &&
799 aim->serial > ai[0].serial))
801 w.value = ai[0].value;
802 ai[0].value = aim->value;
803 aim->value = w.value;
804 w.serial = ai[0].serial;
805 ai[0].serial = aim->serial;
806 aim->serial = w.serial;
813 unsort(struct line *f, int l, int *b)
817 a = calloc(l + 1, sizeof(*a));
820 for (i = 1; i <= l; i++)
821 a[f[i].serial] = f[i].value;
822 for (i = 1; i <= l; i++)
834 for (i = 1; f != NULL && (c = getc(f)) != '\n' && c != EOF; i++)
840 output(FILE *outfile, struct got_diff_changes *changes,
841 struct got_diff_state *ds, struct got_diff_args *args,
842 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
844 int m, i0, i1, j0, j1;
853 ds->J[m + 1] = ds->len[1] + 1;
854 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
855 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
857 j0 = ds->J[i0 - 1] + 1;
859 while (i1 < m && ds->J[i1 + 1] == 0)
861 j1 = ds->J[i1 + 1] - 1;
863 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
864 i0, i1, j0, j1, &flags);
869 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
870 1, 0, 1, ds->len[1], &flags);
874 if (ds->anychange != 0 && args->diff_format == D_UNIFIED)
875 dump_unified_vec(outfile, changes, ds, args, f1, f2, flags);
881 range(FILE *outfile, int a, int b, char *separator)
883 diff_output(outfile, "%d", a > b ? b : a);
885 diff_output(outfile, "%s%d", separator, b);
889 uni_range(FILE *outfile, int a, int b)
892 diff_output(outfile, "%d,%d", a, b - a + 1);
894 diff_output(outfile, "%d", b);
896 diff_output(outfile, "%d,0", b);
900 * Indicate that there is a difference between lines a and b of the from file
901 * to get to lines c to d of the to file. If a is greater then b then there
902 * are no lines in the from file involved and this means that there were
903 * lines appended (beginning at b). If c is greater than d then there are
904 * lines missing from the to file.
907 change(FILE *outfile, struct got_diff_changes *changes,
908 struct got_diff_state *ds, struct got_diff_args *args,
909 const char *file1, FILE *f1, const char *file2, FILE *f2,
910 int a, int b, int c, int d, int *pflags)
915 if (*pflags & D_HEADER) {
916 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
917 *pflags &= ~D_HEADER;
919 if (args->diff_format == D_UNIFIED) {
921 * Allocate change records as needed.
923 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
924 struct context_vec *cvp;
926 offset = ds->context_vec_ptr - ds->context_vec_start;
927 ds->max_context <<= 1;
928 cvp = reallocarray(ds->context_vec_start,
929 ds->max_context, sizeof(*ds->context_vec_start));
931 free(ds->context_vec_start);
934 ds->context_vec_start = cvp;
935 ds->context_vec_end = ds->context_vec_start +
937 ds->context_vec_ptr = ds->context_vec_start + offset;
939 if (ds->anychange == 0) {
941 * Print the context/unidiff header first time through.
943 print_header(outfile, ds, args, file1, file2);
945 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
946 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
948 * If this change is more than 'diff_context' lines from the
949 * previous change, dump the record and reset it.
951 dump_unified_vec(outfile, changes, ds, args, f1, f2,
954 ds->context_vec_ptr++;
955 ds->context_vec_ptr->a = a;
956 ds->context_vec_ptr->b = b;
957 ds->context_vec_ptr->c = c;
958 ds->context_vec_ptr->d = d;
961 if (ds->anychange == 0)
963 if (args->diff_format == D_BRIEF)
965 if (args->diff_format == D_NORMAL) {
966 range(outfile, a, b, ",");
967 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
968 range(outfile, c, d, ",");
969 diff_output(outfile, "\n");
970 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', *pflags);
971 if (a <= b && c <= d)
972 diff_output(outfile, "---\n");
974 fetch(outfile, ds, args, ds->ixnew, c, d, f2,
975 args->diff_format == D_NORMAL ? '>' : '\0', *pflags);
980 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
981 long *f, int a, int b, FILE *lb, int ch, int flags)
983 int i, j, c, col, nc;
985 if (lb == NULL || a > b)
987 for (i = a; i <= b; i++) {
988 fseek(lb, f[i - 1], SEEK_SET);
989 nc = f[i] - f[i - 1];
991 diff_output(outfile, "%c", ch);
992 if (args->Tflag && (args->diff_format == D_UNIFIED ||
993 args->diff_format == D_NORMAL))
994 diff_output(outfile, "\t");
995 else if (args->diff_format != D_UNIFIED)
996 diff_output(outfile, " ");
999 for (j = 0; j < nc; j++) {
1000 if ((c = getc(lb)) == EOF) {
1001 diff_output(outfile, "\n\\ No newline at end of "
1005 if (c == '\t' && (flags & D_EXPANDTABS)) {
1007 diff_output(outfile, " ");
1008 } while (++col & 7);
1010 diff_output(outfile, "%c", c);
1018 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1021 readhash(struct got_diff_state *ds, FILE *f, int flags)
1028 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1029 if (flags & D_IGNORECASE)
1030 for (i = 0; (t = getc(f)) != '\n'; i++) {
1036 sum = sum * 127 + ds->chrtran[t];
1039 for (i = 0; (t = getc(f)) != '\n'; i++) {
1045 sum = sum * 127 + t;
1049 switch (t = getc(f)) {
1058 if (space && (flags & D_IGNOREBLANKS) == 0) {
1062 sum = sum * 127 + ds->chrtran[t];
1076 * There is a remote possibility that we end up with a zero sum.
1077 * Zero is used as an EOF marker, so return 1 instead.
1079 return (sum == 0 ? 1 : sum);
1085 unsigned char buf[BUFSIZ];
1092 cnt = fread(buf, 1, sizeof(buf), f);
1093 return (memchr(buf, '\0', cnt) == NULL);
1096 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1099 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1101 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1103 int last = ds->lastline;
1107 while (pos > last) {
1108 fseek(fp, f[pos - 1], SEEK_SET);
1109 nc = f[pos] - f[pos - 1];
1110 if (nc >= sizeof(buf))
1111 nc = sizeof(buf) - 1;
1112 nc = fread(buf, 1, nc, fp);
1115 buf[strcspn(buf, "\n")] = '\0';
1116 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1117 if (begins_with(buf, "private:")) {
1119 state = " (private)";
1120 } else if (begins_with(buf, "protected:")) {
1122 state = " (protected)";
1123 } else if (begins_with(buf, "public:")) {
1125 state = " (public)";
1127 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1129 strlcat(ds->lastbuf, state,
1130 sizeof ds->lastbuf);
1131 ds->lastmatchline = pos;
1138 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1141 /* dump accumulated "unified" diff changes */
1143 dump_unified_vec(FILE *outfile, struct got_diff_changes *changes,
1144 struct got_diff_state *ds, struct got_diff_args *args,
1145 FILE *f1, FILE *f2, int flags)
1147 struct context_vec *cvp = ds->context_vec_start;
1148 int lowa, upb, lowc, upd;
1152 if (ds->context_vec_start > ds->context_vec_ptr)
1155 b = d = 0; /* gcc */
1156 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1157 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1158 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1159 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1161 diff_output(outfile, "@@ -");
1162 uni_range(outfile, lowa, upb);
1163 diff_output(outfile, " +");
1164 uni_range(outfile, lowc, upd);
1165 diff_output(outfile, " @@");
1166 if (f1 != NULL && (flags & D_PROTOTYPE)) {
1167 f = match_function(ds, ds->ixold, lowa-1, f1);
1169 diff_output(outfile, " %s", f);
1171 diff_output(outfile, "\n");
1174 * Output changes in "unified" diff format--the old and new lines
1175 * are printed together.
1177 for (; cvp <= ds->context_vec_ptr; cvp++) {
1179 struct got_diff_change *change;
1180 change = calloc(1, sizeof(*change));
1182 memcpy(&change->cv, cvp, sizeof(change->cv));
1183 SIMPLEQ_INSERT_TAIL(&changes->entries, change,
1185 changes->nchanges++;
1195 * c: both new and old changes
1196 * d: only changes in the old file
1197 * a: only changes in the new file
1199 if (a <= b && c <= d)
1202 ch = (a <= b) ? 'd' : 'a';
1206 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', flags);
1207 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', flags);
1208 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', flags);
1211 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', flags);
1212 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', flags);
1215 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', flags);
1216 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', flags);
1222 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', flags);
1224 ds->context_vec_ptr = ds->context_vec_start - 1;
1228 got_diff_dump_change(FILE *outfile, struct got_diff_change *change,
1229 struct got_diff_state *ds, struct got_diff_args *args,
1230 FILE *f1, FILE *f2, int diff_flags)
1232 ds->context_vec_ptr = &change->cv;
1233 ds->context_vec_start = &change->cv;
1234 ds->context_vec_end = &change->cv;
1236 /* XXX TODO needs error checking */
1237 dump_unified_vec(outfile, NULL, ds, args, f1, f2, diff_flags);
1241 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1242 const char *file1, const char *file2)
1244 if (args->label[0] != NULL)
1245 diff_output(outfile, "--- %s\n", args->label[0]);
1247 diff_output(outfile, "--- %s\t%s", file1,
1248 ctime(&ds->stb1.st_mtime));
1249 if (args->label[1] != NULL)
1250 diff_output(outfile, "+++ %s\n", args->label[1]);
1252 diff_output(outfile, "+++ %s\t%s", file2,
1253 ctime(&ds->stb2.st_mtime));