Blame


1 3b0f3d61 2020-01-22 neels /* Commandline diff utility to test diff implementations. */
2 3b0f3d61 2020-01-22 neels /*
3 3b0f3d61 2020-01-22 neels * Copyright (c) 2018 Martin Pieuchot
4 3b0f3d61 2020-01-22 neels * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
5 3b0f3d61 2020-01-22 neels *
6 3b0f3d61 2020-01-22 neels * Permission to use, copy, modify, and distribute this software for any
7 3b0f3d61 2020-01-22 neels * purpose with or without fee is hereby granted, provided that the above
8 3b0f3d61 2020-01-22 neels * copyright notice and this permission notice appear in all copies.
9 3b0f3d61 2020-01-22 neels *
10 3b0f3d61 2020-01-22 neels * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 3b0f3d61 2020-01-22 neels * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 3b0f3d61 2020-01-22 neels * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 3b0f3d61 2020-01-22 neels * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 3b0f3d61 2020-01-22 neels * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 3b0f3d61 2020-01-22 neels * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 3b0f3d61 2020-01-22 neels * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 3b0f3d61 2020-01-22 neels */
18 3b0f3d61 2020-01-22 neels
19 3b0f3d61 2020-01-22 neels #include <sys/mman.h>
20 3b0f3d61 2020-01-22 neels #include <sys/stat.h>
21 18a9c7f8 2020-09-20 stsp #include <sys/types.h>
22 3b0f3d61 2020-01-22 neels
23 3b0f3d61 2020-01-22 neels #include <err.h>
24 3b0f3d61 2020-01-22 neels #include <fcntl.h>
25 fe6d58fb 2020-11-14 naddy #include <stdint.h>
26 3b0f3d61 2020-01-22 neels #include <stdio.h>
27 3b0f3d61 2020-01-22 neels #include <stdlib.h>
28 e10a628a 2020-09-16 stsp #include <stdbool.h>
29 3e6cba3a 2020-08-13 stsp #include <string.h>
30 3b0f3d61 2020-01-22 neels #include <unistd.h>
31 3b0f3d61 2020-01-22 neels
32 1dfba055 2020-10-07 stsp #include <arraylist.h>
33 1dfba055 2020-10-07 stsp #include <diff_main.h>
34 1dfba055 2020-10-07 stsp #include <diff_output.h>
35 8ad022d2 2020-05-05 neels
36 65a56b16 2020-10-12 neels enum diffreg_algo {
37 65a56b16 2020-10-12 neels DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE = 0,
38 65a56b16 2020-10-12 neels DIFFREG_ALGO_MYERS_THEN_PATIENCE = 1,
39 65a56b16 2020-10-12 neels DIFFREG_ALGO_PATIENCE = 2,
40 65a56b16 2020-10-12 neels DIFFREG_ALGO_NONE = 3,
41 65a56b16 2020-10-12 neels };
42 65a56b16 2020-10-12 neels
43 3b0f3d61 2020-01-22 neels __dead void usage(void);
44 e51ebd83 2020-11-21 stsp int diffreg(char *, char *, enum diffreg_algo, bool, bool, bool,
45 13e2caa3 2020-10-17 stsp int, bool);
46 7a54ad3a 2020-09-20 stsp FILE * openfile(const char *, char **, struct stat *);
47 3b0f3d61 2020-01-22 neels
48 3b0f3d61 2020-01-22 neels __dead void
49 3b0f3d61 2020-01-22 neels usage(void)
50 3b0f3d61 2020-01-22 neels {
51 760fe30e 2020-05-05 neels fprintf(stderr,
52 e51ebd83 2020-11-21 stsp "usage: %s [-apPQTwe] [-U n] file1 file2\n"
53 760fe30e 2020-05-05 neels "\n"
54 e51ebd83 2020-11-21 stsp " -a Treat input as ASCII even if binary data is detected\n"
55 13e2caa3 2020-10-17 stsp " -p Show function prototypes in hunk headers\n"
56 65a56b16 2020-10-12 neels " -P Use Patience Diff (slower but often nicer)\n"
57 65a56b16 2020-10-12 neels " -Q Use forward-Myers for small files, otherwise Patience\n"
58 65a56b16 2020-10-12 neels " -T Trivial algo: detect similar start and end only\n"
59 732e8ee0 2020-09-20 stsp " -w Ignore Whitespace\n"
60 0c9a7e9d 2020-10-07 stsp " -U n Number of Context Lines\n"
61 0c9a7e9d 2020-10-07 stsp " -e Produce ed script output\n"
62 760fe30e 2020-05-05 neels , getprogname());
63 3b0f3d61 2020-01-22 neels exit(1);
64 3b0f3d61 2020-01-22 neels }
65 3b0f3d61 2020-01-22 neels
66 3b0f3d61 2020-01-22 neels int
67 3b0f3d61 2020-01-22 neels main(int argc, char *argv[])
68 3b0f3d61 2020-01-22 neels {
69 3e6cba3a 2020-08-13 stsp int ch, rc;
70 e51ebd83 2020-11-21 stsp bool force_text = false;
71 65a56b16 2020-10-12 neels bool ignore_whitespace = false;
72 13e2caa3 2020-10-17 stsp bool show_function_prototypes = false;
73 b7ba71f0 2020-10-07 stsp bool edscript = false;
74 527f2c8a 2020-09-20 stsp int context_lines = 3;
75 65a56b16 2020-10-12 neels enum diffreg_algo algo = DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE;
76 3b0f3d61 2020-01-22 neels
77 e51ebd83 2020-11-21 stsp while ((ch = getopt(argc, argv, "apPQTwU:e")) != -1) {
78 3b0f3d61 2020-01-22 neels switch (ch) {
79 e51ebd83 2020-11-21 stsp case 'a':
80 e51ebd83 2020-11-21 stsp force_text = true;
81 e51ebd83 2020-11-21 stsp break;
82 13e2caa3 2020-10-17 stsp case 'p':
83 13e2caa3 2020-10-17 stsp show_function_prototypes = true;
84 13e2caa3 2020-10-17 stsp break;
85 65a56b16 2020-10-12 neels case 'P':
86 65a56b16 2020-10-12 neels algo = DIFFREG_ALGO_PATIENCE;
87 760fe30e 2020-05-05 neels break;
88 65a56b16 2020-10-12 neels case 'Q':
89 65a56b16 2020-10-12 neels algo = DIFFREG_ALGO_MYERS_THEN_PATIENCE;
90 65a56b16 2020-10-12 neels break;
91 65a56b16 2020-10-12 neels case 'T':
92 65a56b16 2020-10-12 neels algo = DIFFREG_ALGO_NONE;
93 65a56b16 2020-10-12 neels break;
94 732e8ee0 2020-09-20 stsp case 'w':
95 732e8ee0 2020-09-20 stsp ignore_whitespace = true;
96 732e8ee0 2020-09-20 stsp break;
97 0c9a7e9d 2020-10-07 stsp case 'U':
98 527f2c8a 2020-09-20 stsp context_lines = atoi(optarg);
99 527f2c8a 2020-09-20 stsp break;
100 b7ba71f0 2020-10-07 stsp case 'e':
101 b7ba71f0 2020-10-07 stsp edscript = true;
102 b7ba71f0 2020-10-07 stsp break;
103 3b0f3d61 2020-01-22 neels default:
104 3b0f3d61 2020-01-22 neels usage();
105 3b0f3d61 2020-01-22 neels }
106 3b0f3d61 2020-01-22 neels }
107 3b0f3d61 2020-01-22 neels
108 3b0f3d61 2020-01-22 neels argc -= optind;
109 3b0f3d61 2020-01-22 neels argv += optind;
110 3b0f3d61 2020-01-22 neels
111 3b0f3d61 2020-01-22 neels if (argc != 2)
112 3b0f3d61 2020-01-22 neels usage();
113 3b0f3d61 2020-01-22 neels
114 e51ebd83 2020-11-21 stsp rc = diffreg(argv[0], argv[1], algo, force_text, ignore_whitespace,
115 13e2caa3 2020-10-17 stsp show_function_prototypes, context_lines, edscript);
116 3e6cba3a 2020-08-13 stsp if (rc != DIFF_RC_OK) {
117 3e6cba3a 2020-08-13 stsp fprintf(stderr, "diff: %s\n", strerror(rc));
118 3e6cba3a 2020-08-13 stsp return 1;
119 3e6cba3a 2020-08-13 stsp }
120 3e6cba3a 2020-08-13 stsp return 0;
121 3b0f3d61 2020-01-22 neels }
122 3b0f3d61 2020-01-22 neels
123 0d27172a 2020-05-06 neels const struct diff_algo_config myers_then_patience;
124 0d27172a 2020-05-06 neels const struct diff_algo_config myers_then_myers_divide;
125 0d27172a 2020-05-06 neels const struct diff_algo_config patience;
126 0d27172a 2020-05-06 neels const struct diff_algo_config myers_divide;
127 3b0f3d61 2020-01-22 neels
128 760fe30e 2020-05-05 neels const struct diff_algo_config myers_then_patience = (struct diff_algo_config){
129 3b0f3d61 2020-01-22 neels .impl = diff_algo_myers,
130 9e668157 2020-01-27 neels .permitted_state_size = 1024 * 1024 * sizeof(int),
131 3b0f3d61 2020-01-22 neels .fallback_algo = &patience,
132 3b0f3d61 2020-01-22 neels };
133 3b0f3d61 2020-01-22 neels
134 0d27172a 2020-05-06 neels const struct diff_algo_config myers_then_myers_divide =
135 0d27172a 2020-05-06 neels (struct diff_algo_config){
136 760fe30e 2020-05-05 neels .impl = diff_algo_myers,
137 760fe30e 2020-05-05 neels .permitted_state_size = 1024 * 1024 * sizeof(int),
138 760fe30e 2020-05-05 neels .fallback_algo = &myers_divide,
139 760fe30e 2020-05-05 neels };
140 760fe30e 2020-05-05 neels
141 3b0f3d61 2020-01-22 neels const struct diff_algo_config patience = (struct diff_algo_config){
142 3b0f3d61 2020-01-22 neels .impl = diff_algo_patience,
143 0d27172a 2020-05-06 neels /* After subdivision, do Patience again: */
144 0d27172a 2020-05-06 neels .inner_algo = &patience,
145 0d27172a 2020-05-06 neels /* If subdivision failed, do Myers Divide et Impera: */
146 0d27172a 2020-05-06 neels .fallback_algo = &myers_then_myers_divide,
147 3b0f3d61 2020-01-22 neels };
148 3b0f3d61 2020-01-22 neels
149 3b0f3d61 2020-01-22 neels const struct diff_algo_config myers_divide = (struct diff_algo_config){
150 3b0f3d61 2020-01-22 neels .impl = diff_algo_myers_divide,
151 0d27172a 2020-05-06 neels /* When division succeeded, start from the top: */
152 0d27172a 2020-05-06 neels .inner_algo = &myers_then_myers_divide,
153 0d27172a 2020-05-06 neels /* (fallback_algo = NULL implies diff_algo_none). */
154 3b0f3d61 2020-01-22 neels };
155 3b0f3d61 2020-01-22 neels
156 65a56b16 2020-10-12 neels const struct diff_algo_config no_algo = (struct diff_algo_config){
157 65a56b16 2020-10-12 neels .impl = diff_algo_none,
158 65a56b16 2020-10-12 neels };
159 65a56b16 2020-10-12 neels
160 65a56b16 2020-10-12 neels /* If the state for a forward-Myers is small enough, use Myers, otherwise first
161 65a56b16 2020-10-12 neels * do a Myers-divide. */
162 65a56b16 2020-10-12 neels const struct diff_config diff_config_myers_then_myers_divide = {
163 3b0f3d61 2020-01-22 neels .atomize_func = diff_atomize_text_by_line,
164 760fe30e 2020-05-05 neels .algo = &myers_then_myers_divide,
165 3b0f3d61 2020-01-22 neels };
166 3b0f3d61 2020-01-22 neels
167 65a56b16 2020-10-12 neels /* If the state for a forward-Myers is small enough, use Myers, otherwise first
168 65a56b16 2020-10-12 neels * do a Patience. */
169 65a56b16 2020-10-12 neels const struct diff_config diff_config_myers_then_patience = {
170 760fe30e 2020-05-05 neels .atomize_func = diff_atomize_text_by_line,
171 760fe30e 2020-05-05 neels .algo = &myers_then_patience,
172 760fe30e 2020-05-05 neels };
173 760fe30e 2020-05-05 neels
174 65a56b16 2020-10-12 neels /* Directly force Patience as a first divider of the source file. */
175 65a56b16 2020-10-12 neels const struct diff_config diff_config_patience = {
176 65a56b16 2020-10-12 neels .atomize_func = diff_atomize_text_by_line,
177 65a56b16 2020-10-12 neels .algo = &patience,
178 65a56b16 2020-10-12 neels };
179 65a56b16 2020-10-12 neels
180 65a56b16 2020-10-12 neels /* Directly force Patience as a first divider of the source file. */
181 65a56b16 2020-10-12 neels const struct diff_config diff_config_no_algo = {
182 65a56b16 2020-10-12 neels .atomize_func = diff_atomize_text_by_line,
183 65a56b16 2020-10-12 neels };
184 65a56b16 2020-10-12 neels
185 3b0f3d61 2020-01-22 neels int
186 e51ebd83 2020-11-21 stsp diffreg(char *file1, char *file2, enum diffreg_algo algo, bool force_text,
187 e51ebd83 2020-11-21 stsp bool ignore_whitespace, bool show_function_prototypes, int context_lines,
188 e51ebd83 2020-11-21 stsp bool edscript)
189 3b0f3d61 2020-01-22 neels {
190 3b0f3d61 2020-01-22 neels char *str1, *str2;
191 7a54ad3a 2020-09-20 stsp FILE *f1, *f2;
192 3b0f3d61 2020-01-22 neels struct stat st1, st2;
193 3b0f3d61 2020-01-22 neels struct diff_input_info info = {
194 3b0f3d61 2020-01-22 neels .left_path = file1,
195 3b0f3d61 2020-01-22 neels .right_path = file2,
196 3b0f3d61 2020-01-22 neels };
197 c16dde50 2020-10-22 stsp struct diff_data left = {}, right = {};
198 c16dde50 2020-10-22 stsp struct diff_result *result = NULL;
199 3e6cba3a 2020-08-13 stsp int rc;
200 0d27172a 2020-05-06 neels const struct diff_config *cfg;
201 00d5652b 2020-09-22 stsp int diff_flags = 0;
202 0d27172a 2020-05-06 neels
203 65a56b16 2020-10-12 neels switch (algo) {
204 65a56b16 2020-10-12 neels default:
205 65a56b16 2020-10-12 neels case DIFFREG_ALGO_MYERS_THEN_MYERS_DIVIDE:
206 65a56b16 2020-10-12 neels cfg = &diff_config_myers_then_myers_divide;
207 65a56b16 2020-10-12 neels break;
208 65a56b16 2020-10-12 neels case DIFFREG_ALGO_MYERS_THEN_PATIENCE:
209 65a56b16 2020-10-12 neels cfg = &diff_config_myers_then_patience;
210 65a56b16 2020-10-12 neels break;
211 65a56b16 2020-10-12 neels case DIFFREG_ALGO_PATIENCE:
212 65a56b16 2020-10-12 neels cfg = &diff_config_patience;
213 65a56b16 2020-10-12 neels break;
214 65a56b16 2020-10-12 neels case DIFFREG_ALGO_NONE:
215 65a56b16 2020-10-12 neels cfg = &diff_config_no_algo;
216 65a56b16 2020-10-12 neels break;
217 65a56b16 2020-10-12 neels }
218 3b0f3d61 2020-01-22 neels
219 7a54ad3a 2020-09-20 stsp f1 = openfile(file1, &str1, &st1);
220 7a54ad3a 2020-09-20 stsp f2 = openfile(file2, &str2, &st2);
221 3b0f3d61 2020-01-22 neels
222 e51ebd83 2020-11-21 stsp if (force_text)
223 e51ebd83 2020-11-21 stsp diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
224 00d5652b 2020-09-22 stsp if (ignore_whitespace)
225 00d5652b 2020-09-22 stsp diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
226 13e2caa3 2020-10-17 stsp if (show_function_prototypes)
227 13e2caa3 2020-10-17 stsp diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
228 00d5652b 2020-09-22 stsp
229 c16dde50 2020-10-22 stsp rc = diff_atomize_file(&left, cfg, f1, str1, st1.st_size, diff_flags);
230 c16dde50 2020-10-22 stsp if (rc)
231 c16dde50 2020-10-22 stsp goto done;
232 c16dde50 2020-10-22 stsp rc = diff_atomize_file(&right, cfg, f2, str2, st2.st_size, diff_flags);
233 c16dde50 2020-10-22 stsp if (rc)
234 c16dde50 2020-10-22 stsp goto done;
235 c16dde50 2020-10-22 stsp
236 c16dde50 2020-10-22 stsp result = diff_main(cfg, &left, &right);
237 f8cbb8fe 2020-05-05 neels #if 0
238 f8cbb8fe 2020-05-05 neels rc = diff_output_plain(stdout, &info, result);
239 f8cbb8fe 2020-05-05 neels #else
240 b7ba71f0 2020-10-07 stsp if (edscript)
241 b7ba71f0 2020-10-07 stsp rc = diff_output_edscript(NULL, stdout, &info, result);
242 b7ba71f0 2020-10-07 stsp else {
243 b7ba71f0 2020-10-07 stsp rc = diff_output_unidiff(NULL, stdout, &info, result,
244 b7ba71f0 2020-10-07 stsp context_lines);
245 b7ba71f0 2020-10-07 stsp }
246 f8cbb8fe 2020-05-05 neels #endif
247 c16dde50 2020-10-22 stsp done:
248 f8cbb8fe 2020-05-05 neels diff_result_free(result);
249 c16dde50 2020-10-22 stsp diff_data_free(&left);
250 c16dde50 2020-10-22 stsp diff_data_free(&right);
251 c6eecea3 2020-07-26 stsp if (str1)
252 c6eecea3 2020-07-26 stsp munmap(str1, st1.st_size);
253 c6eecea3 2020-07-26 stsp if (str2)
254 c6eecea3 2020-07-26 stsp munmap(str2, st2.st_size);
255 7a54ad3a 2020-09-20 stsp fclose(f1);
256 7a54ad3a 2020-09-20 stsp fclose(f2);
257 3b0f3d61 2020-01-22 neels
258 f8cbb8fe 2020-05-05 neels return rc;
259 3b0f3d61 2020-01-22 neels }
260 3b0f3d61 2020-01-22 neels
261 7a54ad3a 2020-09-20 stsp FILE *
262 c6eecea3 2020-07-26 stsp openfile(const char *path, char **p, struct stat *st)
263 3b0f3d61 2020-01-22 neels {
264 7a54ad3a 2020-09-20 stsp FILE *f = NULL;
265 3b0f3d61 2020-01-22 neels
266 7a54ad3a 2020-09-20 stsp f = fopen(path, "r");
267 7a54ad3a 2020-09-20 stsp if (f == NULL)
268 3b0f3d61 2020-01-22 neels err(2, "%s", path);
269 3b0f3d61 2020-01-22 neels
270 7a54ad3a 2020-09-20 stsp if (fstat(fileno(f), st) == -1)
271 3b0f3d61 2020-01-22 neels err(2, "%s", path);
272 3b0f3d61 2020-01-22 neels
273 c6eecea3 2020-07-26 stsp #ifndef DIFF_NO_MMAP
274 7a54ad3a 2020-09-20 stsp *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
275 c6eecea3 2020-07-26 stsp if (*p == MAP_FAILED)
276 c6eecea3 2020-07-26 stsp #endif
277 c6eecea3 2020-07-26 stsp *p = NULL; /* fall back on file I/O */
278 3b0f3d61 2020-01-22 neels
279 7a54ad3a 2020-09-20 stsp return f;
280 3b0f3d61 2020-01-22 neels }