Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 enum got_diff_algorithm {
18 GOT_DIFF_ALGORITHM_MYERS,
19 GOT_DIFF_ALGORITHM_PATIENCE,
20 };
22 /*
23 * List of all line types in a diff (including '{got,tog} log' lines).
24 * XXX GOT_DIFF_LINE_HUNK to GOT_DIFF_LINE_NONE inclusive must map to the
25 * DIFF_LINE_* macro counterparts defined in lib/diff_output.h (i.e., 60-64).
26 */
27 enum got_diff_line_type {
28 GOT_DIFF_LINE_LOGMSG,
29 GOT_DIFF_LINE_AUTHOR,
30 GOT_DIFF_LINE_DATE,
31 GOT_DIFF_LINE_CHANGES,
32 GOT_DIFF_LINE_META,
33 GOT_DIFF_LINE_BLOB_MIN,
34 GOT_DIFF_LINE_BLOB_PLUS,
35 GOT_DIFF_LINE_HUNK = 60,
36 GOT_DIFF_LINE_MINUS,
37 GOT_DIFF_LINE_PLUS,
38 GOT_DIFF_LINE_CONTEXT,
39 GOT_DIFF_LINE_NONE
40 };
42 struct got_diff_line {
43 off_t offset;
44 uint8_t type;
45 };
47 /*
48 * Compute the differences between two blobs and write unified diff text
49 * to the provided output file. Two open temporary files must be provided
50 * for internal use; these files can be obtained from got_opentemp() and
51 * must be closed by the caller.
52 * If one of the blobs being diffed does not exist, all corresponding
53 * blob object arguments should be set to NULL.
54 * Two const char * diff header labels may be provided which will be used
55 * to identify each blob in the diff output.
56 * If a label is NULL, use the blob's SHA1 checksum instead.
57 * The number of context lines to show in the diff must be specified as well.
58 * Whitespace differences may optionally be ignored.
59 * If not NULL, the two initial output arguments will be populated with an
60 * array of line offsets for, and the number of lines in, the unidiff text.
61 */
62 const struct got_error *got_diff_blob(struct got_diff_line **, size_t *,
63 struct got_blob_object *, struct got_blob_object *, FILE *, FILE *,
64 const char *, const char *, enum got_diff_algorithm, int, int, int,
65 FILE *);
67 /*
68 * Compute the differences between a blob and a file and write unified diff
69 * text to the provided output file. The blob object, its content, and its
70 * size must be provided. The file's size must be provided, as well as a
71 * const char * diff header label which identifies the file.
72 * An optional const char * diff header label for the blob may be provided, too.
73 * The number of context lines to show in the diff must be specified as well.
74 * Whitespace differences may optionally be ignored.
75 */
76 const struct got_error *got_diff_blob_file(struct got_blob_object *, FILE *,
77 off_t, const char *, FILE *, int, struct stat *, const char *,
78 enum got_diff_algorithm, int, int, int, FILE *);
80 /*
81 * A callback function invoked to handle the differences between two blobs
82 * when diffing trees with got_diff_tree(). This callback receives two blobs,
83 * their respective IDs, and two corresponding paths within the diffed trees.
84 * The first blob contains content from the old side of the diff, and
85 * the second blob contains content on the new side of the diff.
86 * Two open temporary files must be provided for internal use; these files
87 * can be obtained from got_opentemp() and must be closed by the caller.
88 * The blob object argument for either blob may be NULL to indicate
89 * that no content is present on its respective side of the diff.
90 * File modes from relevant tree objects which contain the blobs may
91 * also be passed. These will be zero if not available.
92 */
93 typedef const struct got_error *(*got_diff_blob_cb)(void *,
94 struct got_blob_object *, struct got_blob_object *, FILE *, FILE *,
95 struct got_object_id *, struct got_object_id *,
96 const char *, const char *, mode_t, mode_t, struct got_repository *);
98 /*
99 * A pre-defined implementation of got_diff_blob_cb() which appends unidiff
100 * output to a file. The caller must allocate and fill in the argument
101 * structure.
102 */
103 struct got_diff_blob_output_unidiff_arg {
104 FILE *outfile; /* Unidiff text will be written here. */
105 int diff_context; /* Sets the number of context lines. */
106 int ignore_whitespace; /* Ignore whitespace differences. */
107 int force_text_diff; /* Assume text even if binary data detected. */
108 enum got_diff_algorithm diff_algo; /* Diffing algorithm to use. */
110 /*
111 * The number of lines contained in produced unidiff text output,
112 * and an array of got_diff_lines with byte offset and line type to
113 * each line. May be initialized to zero and NULL to ignore line
114 * metadata. If not NULL, then the array of line offsets and types will
115 * be populated. Optionally, the array can be pre-populated with line
116 * offsets and types, with nlines > 0 indicating the length of the
117 * pre-populated array. This is useful if the output file already
118 * contains some lines of text. The array will be grown as needed to
119 * accomodate additional offsets and types, and the last offset found
120 * in a pre-populated array will be added to all subsequent offsets.
121 */
122 size_t nlines;
123 struct got_diff_line *lines; /* Dispose of with free(3) when done. */
124 };
125 const struct got_error *got_diff_blob_output_unidiff(void *,
126 struct got_blob_object *, struct got_blob_object *, FILE *, FILE *,
127 struct got_object_id *, struct got_object_id *,
128 const char *, const char *, mode_t, mode_t, struct got_repository *);
130 /*
131 * Compute the differences between two trees and invoke the provided
132 * got_diff_blob_cb() callback when content differs.
133 * Diffing of blob content can be suppressed by passing zero for the
134 * 'diff_content' parameter. The callback will then only receive blob
135 * object IDs and diff labels, but NULL pointers instead of blob objects.
136 * If 'diff_content' is set, two open temporary FILEs and two open
137 * temporary file descriptors must be provided for internal use; these
138 * files can be obtained from got_opentemp() and got_opentempfd(),
139 * and must be closed by the caller. Otherwise the files can be NULL.
140 * The set of arguments relating to either tree may be NULL to indicate
141 * that no content is present on its respective side of the diff.
142 */
143 const struct got_error *got_diff_tree(struct got_tree_object *,
144 struct got_tree_object *, FILE *, FILE *, int, int,
145 const char *, const char *,
146 struct got_repository *, got_diff_blob_cb cb, void *cb_arg, int);
148 /*
149 * Pre-defined implementations of got_diff_blob_cb(): the first of which
150 * collects a list of file paths that differ between two trees; the second
151 * also computes a diffstat of added/removed lines for each collected path
152 * and requires passing an initialized got_diffstat_cb_arg argument.
153 * The caller must allocate and initialize a got_pathlist_head * argument.
154 * Data pointers of entries added to the path list will point to a struct
155 * got_diff_changed_path object.
156 * The caller is expected to free both the path and data pointers of all
157 * entries on the path list.
158 */
159 struct got_diff_changed_path {
160 uint32_t add; /* number of lines added */
161 uint32_t rm; /* number of lines removed */
162 /*
163 * The modification status of this path. It can be GOT_STATUS_ADD,
164 * GOT_STATUS_DELETE, GOT_STATUS_MODIFY, or GOT_STATUS_MODE_CHANGE.
165 */
166 int status;
167 };
168 const struct got_error *got_diff_tree_collect_changed_paths(void *,
169 struct got_blob_object *, struct got_blob_object *, FILE *, FILE *,
170 struct got_object_id *, struct got_object_id *,
171 const char *, const char *, mode_t, mode_t, struct got_repository *);
173 struct got_diffstat_cb_arg {
174 size_t max_path_len;
175 uint32_t ins;
176 uint32_t del;
177 int add_cols;
178 int rm_cols;
179 int nfiles;
180 struct got_pathlist_head *paths;
181 int ignore_ws;
182 int force_text;
183 enum got_diff_algorithm diff_algo;
184 };
185 const struct got_error *got_diff_tree_compute_diffstat(void *,
186 struct got_blob_object *, struct got_blob_object *, FILE *, FILE *,
187 struct got_object_id *, struct got_object_id *, const char *, const char *,
188 mode_t, mode_t, struct got_repository *);
190 /*
191 * Diff two objects, assuming both objects are blobs. Two const char * diff
192 * header labels may be provided which will be used to identify each blob in
193 * the diff output. If a label is NULL, use the blob's SHA1 checksum instead.
194 * Two open temporary files and two temporary file descriptors must be
195 * provided for internal use; these files can be obtained from
196 * got_opentemp() and got_opentempfd(), and must be closed by the caller.
197 * The set of arguments relating to either blob may be NULL/-1 to indicate
198 * that no content is present on its respective side of the diff.
199 * The number of context lines to show in the diff must be specified as well.
200 * Write unified diff text to the provided output FILE.
201 * If not NULL, the two initial output arguments will be populated with an
202 * array of line offsets for, and the number of lines in, the unidiff text.
203 */
204 const struct got_error *got_diff_objects_as_blobs(struct got_diff_line **,
205 size_t *, FILE *, FILE *, int, int, struct got_object_id *,
206 struct got_object_id *, const char *, const char *, enum got_diff_algorithm,
207 int, int, int, struct got_repository *, FILE *);
209 struct got_pathlist_head;
211 /*
212 * Diff two objects, assuming both objects are trees. Two const char * diff
213 * header labels may be provided which will be used to identify each blob in
214 * the trees. If a label is NULL, use the blob's SHA1 checksum instead.
215 * The number of context lines to show in diffs must be specified.
216 * Two open temporary files and two temporary file descriptors must be
217 * provided for internal use; these files can be obtained from
218 * got_opentemp() and got_opentempfd(), and must be closed by the caller.
219 * If 'diff_content' is not set, the files may be NULL / -1.
220 * The set of arguments relating to either tree may be NULL to indicate
221 * that no content is present on its respective side of the diff.
222 * Write unified diff text to the provided output FILE.
223 * If not NULL, the two initial output arguments will be populated with an
224 * array of line offsets for, and the number of lines in, the unidiff text.
225 */
226 const struct got_error *got_diff_objects_as_trees(struct got_diff_line **,
227 size_t *, FILE *, FILE *, int, int, struct got_object_id *,
228 struct got_object_id *, struct got_pathlist_head *, const char *,
229 const char *, enum got_diff_algorithm, int, int, int,
230 struct got_repository *, FILE *);
232 /*
233 * Diff two objects, assuming both objects are commits.
234 * The number of context lines to show in diffs must be specified.
235 * Two open temporary files and two temporary file descriptors must be
236 * provided for internal use; these files can be obtained from
237 * got_opentemp() and got_opentempfd(), and must be closed by the caller.
238 * The set of arguments relating to either commit may be NULL to indicate
239 * that no content is present on its respective side of the diff.
240 * Write unified diff text to the provided output FILE.
241 * If not NULL, the two initial output arguments will be populated with an
242 * array of line offsets for, and the number of lines in, the unidiff text.
243 */
244 const struct got_error *got_diff_objects_as_commits(struct got_diff_line **,
245 size_t *, FILE *, FILE *, int, int, struct got_object_id *,
246 struct got_object_id *, struct got_pathlist_head *, enum got_diff_algorithm,
247 int, int, int, struct got_repository *, FILE *);
249 #define GOT_DIFF_MAX_CONTEXT 64