Blob


1 /*
2 * Copyright (c) 2020 Ori Bernstein
3 * Copyright (c) 2021 Stefan Sperling <stsp@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21 #include <sys/uio.h>
22 #include <sys/stat.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <limits.h>
29 #include <zlib.h>
31 #if defined(__FreeBSD__)
32 #include <unistd.h>
33 #endif
35 #include "got_error.h"
36 #include "got_cancel.h"
37 #include "got_object.h"
38 #include "got_path.h"
39 #include "got_reference.h"
40 #include "got_repository_admin.h"
41 #include "got_opentemp.h"
43 #include "got_lib_deltify.h"
44 #include "got_lib_delta.h"
45 #include "got_lib_object.h"
46 #include "got_lib_object_idset.h"
47 #include "got_lib_object_cache.h"
48 #include "got_lib_deflate.h"
49 #include "got_lib_pack.h"
50 #include "got_lib_privsep.h"
51 #include "got_lib_repository.h"
53 #ifndef MIN
54 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
55 #endif
57 #ifndef MAX
58 #define MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))
59 #endif
61 struct got_pack_meta {
62 struct got_object_id id;
63 char *path;
64 int obj_type;
65 off_t size;
66 time_t mtime;
68 /* The best delta we picked */
69 struct got_pack_meta *head;
70 struct got_pack_meta *prev;
71 off_t delta_offset; /* offset in delta cache file */
72 off_t delta_len; /* length in delta cache file */
73 int nchain;
75 /* Only used for delta window */
76 struct got_delta_table *dtab;
78 /* Only used for writing offset deltas */
79 off_t off;
80 };
82 struct got_pack_metavec {
83 struct got_pack_meta **meta;
84 int nmeta;
85 int metasz;
86 };
88 static const struct got_error *
89 alloc_meta(struct got_pack_meta **new, struct got_object_id *id,
90 const char *path, int obj_type, time_t mtime)
91 {
92 const struct got_error *err = NULL;
93 struct got_pack_meta *m;
95 *new = NULL;
97 m = calloc(1, sizeof(*m));
98 if (m == NULL)
99 return got_error_from_errno("calloc");
101 memcpy(&m->id, id, sizeof(m->id));
103 m->path = strdup(path);
104 if (m->path == NULL) {
105 err = got_error_from_errno("strdup");
106 free(m);
107 return err;
110 m->obj_type = obj_type;
111 m->mtime = mtime;
112 *new = m;
113 return NULL;
116 static void
117 clear_meta(struct got_pack_meta *meta)
119 if (meta == NULL)
120 return;
121 free(meta->path);
122 meta->path = NULL;
125 static void
126 free_nmeta(struct got_pack_meta **meta, int nmeta)
128 int i;
130 for (i = 0; i < nmeta; i++)
131 clear_meta(meta[i]);
132 free(meta);
135 static int
136 delta_order_cmp(const void *pa, const void *pb)
138 struct got_pack_meta *a, *b;
139 int cmp;
141 a = *(struct got_pack_meta **)pa;
142 b = *(struct got_pack_meta **)pb;
144 if (a->obj_type != b->obj_type)
145 return a->obj_type - b->obj_type;
146 cmp = strcmp(a->path, b->path);
147 if (cmp != 0)
148 return cmp;
149 if (a->mtime != b->mtime)
150 return a->mtime - b->mtime;
151 return got_object_id_cmp(&a->id, &b->id);
154 static int
155 delta_size(struct got_delta_instruction *deltas, int ndeltas)
157 int i, size = 32;
158 for (i = 0; i < ndeltas; i++) {
159 if (deltas[i].copy)
160 size += GOT_DELTA_SIZE_SHIFT;
161 else
162 size += deltas[i].len + 1;
164 return size;
167 static const struct got_error *
168 encode_delta(struct got_pack_meta *m, struct got_raw_object *o,
169 struct got_delta_instruction *deltas, int ndeltas,
170 off_t base_size, FILE *f)
172 unsigned char buf[16], *bp;
173 int i, j;
174 off_t n;
175 size_t w;
176 struct got_delta_instruction *d;
178 /* base object size */
179 buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK;
180 n = base_size >> GOT_DELTA_SIZE_SHIFT;
181 for (i = 1; n > 0; i++) {
182 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
183 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
184 n >>= GOT_DELTA_SIZE_SHIFT;
186 w = fwrite(buf, 1, i, f);
187 if (w != i)
188 return got_ferror(f, GOT_ERR_IO);
190 /* target object size */
191 buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK;
192 n = o->size >> GOT_DELTA_SIZE_SHIFT;
193 for (i = 1; n > 0; i++) {
194 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
195 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
196 n >>= GOT_DELTA_SIZE_SHIFT;
198 w = fwrite(buf, 1, i, f);
199 if (w != i)
200 return got_ferror(f, GOT_ERR_IO);
202 for (j = 0; j < ndeltas; j++) {
203 d = &deltas[j];
204 if (d->copy) {
205 n = d->offset;
206 bp = &buf[1];
207 buf[0] = GOT_DELTA_BASE_COPY;
208 for (i = 0; i < 4; i++) {
209 /* DELTA_COPY_OFF1 ... DELTA_COPY_OFF4 */
210 buf[0] |= 1 << i;
211 *bp++ = n & 0xff;
212 n >>= 8;
213 if (n == 0)
214 break;
217 n = d->len;
218 if (n != GOT_DELTA_COPY_DEFAULT_LEN) {
219 /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */
220 for (i = 0; i < 3 && n > 0; i++) {
221 buf[0] |= 1 << (i + 4);
222 *bp++ = n & 0xff;
223 n >>= 8;
226 w = fwrite(buf, 1, bp - buf, f);
227 if (w != bp - buf)
228 return got_ferror(f, GOT_ERR_IO);
229 } else {
230 char content[128];
231 size_t r;
232 if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1)
233 return got_error_from_errno("fseeko");
234 n = 0;
235 while (n != d->len) {
236 buf[0] = (d->len - n < 127) ? d->len - n : 127;
237 w = fwrite(buf, 1, 1, f);
238 if (w != 1)
239 return got_ferror(f, GOT_ERR_IO);
240 r = fread(content, 1, buf[0], o->f);
241 if (r != buf[0])
242 return got_ferror(o->f, GOT_ERR_IO);
243 w = fwrite(content, 1, buf[0], f);
244 if (w != buf[0])
245 return got_ferror(f, GOT_ERR_IO);
246 n += buf[0];
251 return NULL;
255 static const struct got_error *
256 pick_deltas(struct got_pack_meta **meta, int nmeta, int nours,
257 FILE *delta_cache, struct got_repository *repo,
258 got_pack_progress_cb progress_cb, void *progress_arg,
259 got_cancel_cb cancel_cb, void *cancel_arg)
261 const struct got_error *err = NULL;
262 struct got_pack_meta *m = NULL, *base = NULL;
263 struct got_raw_object *raw = NULL, *base_raw = NULL;
264 struct got_delta_instruction *deltas = NULL, *best_deltas = NULL;
265 int i, j, size, best_size, ndeltas, best_ndeltas;
266 const int max_base_candidates = 10;
267 int outfd = -1;
269 qsort(meta, nmeta, sizeof(struct got_pack_meta *), delta_order_cmp);
270 for (i = 0; i < nmeta; i++) {
271 if (cancel_cb) {
272 err = (*cancel_cb)(cancel_arg);
273 if (err)
274 break;
276 if (progress_cb) {
277 err = progress_cb(progress_arg, 0L, nours, nmeta, i, 0);
278 if (err)
279 goto done;
281 m = meta[i];
283 if (m->obj_type == GOT_OBJ_TYPE_COMMIT ||
284 m->obj_type == GOT_OBJ_TYPE_TAG)
285 continue;
287 err = got_object_raw_open(&raw, &outfd, repo, &m->id);
288 if (err)
289 goto done;
290 m->size = raw->size;
292 err = got_deltify_init(&m->dtab, raw->f, raw->hdrlen,
293 raw->size + raw->hdrlen);
294 if (err)
295 goto done;
297 if (i > max_base_candidates) {
298 struct got_pack_meta *n = NULL;
299 n = meta[i - (max_base_candidates + 1)];
300 got_deltify_free(n->dtab);
301 n->dtab = NULL;
304 best_size = raw->size;
305 best_ndeltas = 0;
306 for (j = MAX(0, i - max_base_candidates); j < i; j++) {
307 if (cancel_cb) {
308 err = (*cancel_cb)(cancel_arg);
309 if (err)
310 goto done;
312 base = meta[j];
313 /* long chains make unpacking slow, avoid such bases */
314 if (base->nchain >= 32 ||
315 base->obj_type != m->obj_type)
316 continue;
318 err = got_object_raw_open(&base_raw, &outfd, repo,
319 &base->id);
320 if (err)
321 goto done;
322 err = got_deltify(&deltas, &ndeltas,
323 raw->f, raw->hdrlen, raw->size + raw->hdrlen,
324 base->dtab, base_raw->f, base_raw->hdrlen,
325 base_raw->size + base_raw->hdrlen);
326 got_object_raw_close(base_raw);
327 base_raw = NULL;
328 if (err)
329 goto done;
331 size = delta_size(deltas, ndeltas);
332 if (size + 32 < best_size){
333 /*
334 * if we already picked a best delta,
335 * replace it.
336 */
337 best_size = size;
338 free(best_deltas);
339 best_deltas = deltas;
340 best_ndeltas = ndeltas;
341 deltas = NULL;
342 m->nchain = base->nchain + 1;
343 m->prev = base;
344 m->head = base->head;
345 if (m->head == NULL)
346 m->head = base;
347 } else {
348 free(deltas);
349 deltas = NULL;
350 ndeltas = 0;
354 if (best_ndeltas > 0) {
355 m->delta_offset = ftello(delta_cache);
356 err = encode_delta(m, raw, best_deltas,
357 best_ndeltas, m->prev->size, delta_cache);
358 free(best_deltas);
359 best_deltas = NULL;
360 best_ndeltas = 0;
361 if (err)
362 goto done;
363 m->delta_len = ftello(delta_cache) - m->delta_offset;
366 got_object_raw_close(raw);
367 raw = NULL;
369 done:
370 for (i = MAX(0, nmeta - max_base_candidates); i < nmeta; i++) {
371 got_deltify_free(meta[i]->dtab);
372 meta[i]->dtab = NULL;
374 if (raw)
375 got_object_raw_close(raw);
376 if (base_raw)
377 got_object_raw_close(base_raw);
378 if (outfd != -1 && close(outfd) == -1 && err == NULL)
379 err = got_error_from_errno("close");
380 free(deltas);
381 free(best_deltas);
382 return err;
385 static const struct got_error *
386 search_packidx(int *found, struct got_object_id *id,
387 struct got_repository *repo)
389 const struct got_error *err = NULL;
390 struct got_packidx *packidx = NULL;
391 int idx;
393 *found = 0;
395 err = got_repo_search_packidx(&packidx, &idx, repo, id);
396 if (err == NULL)
397 *found = 1; /* object is already packed */
398 else if (err->code == GOT_ERR_NO_OBJ)
399 err = NULL;
400 return err;
403 static const int obj_types[] = {
404 GOT_OBJ_TYPE_ANY,
405 GOT_OBJ_TYPE_COMMIT,
406 GOT_OBJ_TYPE_TREE,
407 GOT_OBJ_TYPE_BLOB,
408 GOT_OBJ_TYPE_TAG,
409 GOT_OBJ_TYPE_OFFSET_DELTA,
410 GOT_OBJ_TYPE_REF_DELTA
411 };
413 static const struct got_error *
414 add_meta(struct got_pack_metavec *v, struct got_object_idset *idset,
415 struct got_object_id *id, const char *path, int obj_type,
416 time_t mtime, int loose_obj_only, struct got_repository *repo)
418 const struct got_error *err;
419 struct got_pack_meta *m;
421 if (loose_obj_only) {
422 int is_packed;
423 err = search_packidx(&is_packed, id, repo);
424 if (err)
425 return err;
426 if (is_packed)
427 return NULL;
430 err = got_object_idset_add(idset, id, (void *)&obj_types[obj_type]);
431 if (err)
432 return err;
434 if (v == NULL)
435 return NULL;
437 err = alloc_meta(&m, id, path, obj_type, mtime);
438 if (err)
439 goto done;
441 if (v->nmeta == v->metasz){
442 size_t newsize = 2 * v->metasz;
443 struct got_pack_meta **new;
444 new = reallocarray(v->meta, newsize, sizeof(*new));
445 if (new == NULL) {
446 err = got_error_from_errno("reallocarray");
447 goto done;
449 v->meta = new;
450 v->metasz = newsize;
452 done:
453 if (err) {
454 clear_meta(m);
455 free(m);
456 } else
457 v->meta[v->nmeta++] = m;
459 return err;
462 static const struct got_error *
463 load_tree_entries(struct got_object_id_queue *ids, struct got_pack_metavec *v,
464 struct got_object_idset *idset, struct got_object_id *tree_id,
465 const char *dpath, time_t mtime, struct got_repository *repo,
466 int loose_obj_only, got_cancel_cb cancel_cb, void *cancel_arg)
468 const struct got_error *err;
469 struct got_tree_object *tree;
470 char *p = NULL;
471 int i;
473 err = got_object_open_as_tree(&tree, repo, tree_id);
474 if (err)
475 return err;
477 for (i = 0; i < got_object_tree_get_nentries(tree); i++) {
478 struct got_tree_entry *e = got_object_tree_get_entry(tree, i);
479 struct got_object_id *id = got_tree_entry_get_id(e);
480 mode_t mode = got_tree_entry_get_mode(e);
482 if (cancel_cb) {
483 err = (*cancel_cb)(cancel_arg);
484 if (err)
485 break;
488 if (got_object_tree_entry_is_submodule(e) ||
489 got_object_idset_contains(idset, id))
490 continue;
492 if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "",
493 got_tree_entry_get_name(e)) == -1) {
494 err = got_error_from_errno("asprintf");
495 break;
498 if (S_ISDIR(mode)) {
499 struct got_object_qid *qid;
500 err = got_object_qid_alloc(&qid, id);
501 if (err)
502 break;
503 STAILQ_INSERT_TAIL(ids, qid, entry);
504 } else if (S_ISREG(mode) || S_ISLNK(mode)) {
505 err = add_meta(v, idset, id, p, GOT_OBJ_TYPE_BLOB,
506 mtime, loose_obj_only, repo);
507 if (err)
508 break;
510 free(p);
511 p = NULL;
514 got_object_tree_close(tree);
515 free(p);
516 return err;
519 static const struct got_error *
520 load_tree(struct got_pack_metavec *v, struct got_object_idset *idset,
521 struct got_object_id *tree_id, const char *dpath, time_t mtime,
522 int loose_obj_only, struct got_repository *repo,
523 got_cancel_cb cancel_cb, void *cancel_arg)
525 const struct got_error *err = NULL;
526 struct got_object_id_queue tree_ids;
527 struct got_object_qid *qid;
529 if (got_object_idset_contains(idset, tree_id))
530 return NULL;
532 err = got_object_qid_alloc(&qid, tree_id);
533 if (err)
534 return err;
536 STAILQ_INIT(&tree_ids);
537 STAILQ_INSERT_TAIL(&tree_ids, qid, entry);
539 while (!STAILQ_EMPTY(&tree_ids)) {
540 if (cancel_cb) {
541 err = (*cancel_cb)(cancel_arg);
542 if (err)
543 break;
546 qid = STAILQ_FIRST(&tree_ids);
547 STAILQ_REMOVE_HEAD(&tree_ids, entry);
549 if (got_object_idset_contains(idset, qid->id)) {
550 got_object_qid_free(qid);
551 continue;
554 err = add_meta(v, idset, qid->id, dpath, GOT_OBJ_TYPE_TREE,
555 mtime, loose_obj_only, repo);
556 if (err) {
557 got_object_qid_free(qid);
558 break;
561 err = load_tree_entries(&tree_ids, v, idset, qid->id, dpath,
562 mtime, repo, loose_obj_only, cancel_cb, cancel_arg);
563 got_object_qid_free(qid);
564 if (err)
565 break;
568 got_object_id_queue_free(&tree_ids);
569 return err;
572 static const struct got_error *
573 load_commit(struct got_pack_metavec *v, struct got_object_idset *idset,
574 struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
575 got_cancel_cb cancel_cb, void *cancel_arg)
577 const struct got_error *err;
578 struct got_commit_object *commit;
580 if (got_object_idset_contains(idset, id))
581 return NULL;
583 if (loose_obj_only) {
584 int is_packed;
585 err = search_packidx(&is_packed, id, repo);
586 if (err)
587 return err;
588 if (is_packed)
589 return NULL;
592 err = got_object_open_as_commit(&commit, repo, id);
593 if (err)
594 return err;
596 err = add_meta(v, idset, id, "", GOT_OBJ_TYPE_COMMIT,
597 got_object_commit_get_committer_time(commit),
598 loose_obj_only, repo);
599 if (err)
600 goto done;
602 err = load_tree(v, idset, got_object_commit_get_tree_id(commit),
603 "", got_object_commit_get_committer_time(commit),
604 loose_obj_only, repo, cancel_cb, cancel_arg);
605 done:
606 got_object_commit_close(commit);
607 return err;
610 static const struct got_error *
611 load_tag(struct got_pack_metavec *v, struct got_object_idset *idset,
612 struct got_object_id *id, struct got_repository *repo, int loose_obj_only,
613 got_cancel_cb cancel_cb, void *cancel_arg)
615 const struct got_error *err;
616 struct got_tag_object *tag = NULL;
618 if (got_object_idset_contains(idset, id))
619 return NULL;
621 if (loose_obj_only) {
622 int is_packed;
623 err = search_packidx(&is_packed, id, repo);
624 if (err)
625 return err;
626 if (is_packed)
627 return NULL;
630 err = got_object_open_as_tag(&tag, repo, id);
631 if (err)
632 return err;
634 err = add_meta(v, idset, id, "", GOT_OBJ_TYPE_TAG,
635 got_object_tag_get_tagger_time(tag),
636 loose_obj_only, repo);
637 if (err)
638 goto done;
640 switch (got_object_tag_get_object_type(tag)) {
641 case GOT_OBJ_TYPE_COMMIT:
642 err = load_commit(v, idset,
643 got_object_tag_get_object_id(tag), repo,
644 loose_obj_only, cancel_cb, cancel_arg);
645 break;
646 case GOT_OBJ_TYPE_TREE:
647 err = load_tree(v, idset, got_object_tag_get_object_id(tag),
648 "", got_object_tag_get_tagger_time(tag),
649 loose_obj_only, repo, cancel_cb, cancel_arg);
650 break;
651 default:
652 break;
655 done:
656 got_object_tag_close(tag);
657 return err;
660 enum findtwixt_color {
661 COLOR_KEEP = 0,
662 COLOR_DROP,
663 COLOR_BLANK,
664 };
665 static const int findtwixt_colors[] = {
666 COLOR_KEEP,
667 COLOR_DROP,
668 COLOR_BLANK
669 };
671 static const struct got_error *
672 queue_commit_id(struct got_object_id_queue *ids, struct got_object_id *id,
673 int color, struct got_repository *repo)
675 const struct got_error *err;
676 struct got_object_qid *qid;
678 err = got_object_qid_alloc(&qid, id);
679 if (err)
680 return err;
682 STAILQ_INSERT_TAIL(ids, qid, entry);
683 qid->data = (void *)&findtwixt_colors[color];
684 return NULL;
687 static const struct got_error *
688 drop_commit(struct got_object_idset *keep, struct got_object_idset *drop,
689 struct got_object_id *id, struct got_repository *repo,
690 got_cancel_cb cancel_cb, void *cancel_arg)
692 const struct got_error *err = NULL;
693 struct got_commit_object *commit;
694 const struct got_object_id_queue *parents;
695 struct got_object_id_queue ids;
696 struct got_object_qid *qid;
698 STAILQ_INIT(&ids);
700 err = got_object_qid_alloc(&qid, id);
701 if (err)
702 return err;
703 STAILQ_INSERT_HEAD(&ids, qid, entry);
705 while (!STAILQ_EMPTY(&ids)) {
706 if (cancel_cb) {
707 err = (*cancel_cb)(cancel_arg);
708 if (err)
709 break;
712 qid = STAILQ_FIRST(&ids);
713 STAILQ_REMOVE_HEAD(&ids, entry);
715 if (got_object_idset_contains(drop, qid->id)) {
716 got_object_qid_free(qid);
717 continue;
720 err = got_object_idset_add(drop, qid->id,
721 (void *)&obj_types[GOT_OBJ_TYPE_COMMIT]);
722 if (err) {
723 got_object_qid_free(qid);
724 break;
727 if (!got_object_idset_contains(keep, qid->id)) {
728 got_object_qid_free(qid);
729 continue;
732 err = got_object_open_as_commit(&commit, repo, qid->id);
733 got_object_qid_free(qid);
734 if (err)
735 break;
737 parents = got_object_commit_get_parent_ids(commit);
738 if (parents) {
739 err = got_object_id_queue_copy(parents, &ids);
740 if (err) {
741 got_object_commit_close(commit);
742 break;
745 got_object_commit_close(commit);
748 got_object_id_queue_free(&ids);
749 return err;
752 struct append_id_arg {
753 struct got_object_id **array;
754 int idx;
755 };
757 static const struct got_error *
758 append_id(struct got_object_id *id, void *data, void *arg)
760 struct append_id_arg *a = arg;
762 a->array[a->idx] = got_object_id_dup(id);
763 if (a->array[a->idx] == NULL)
764 return got_error_from_errno("got_object_id_dup");
766 a->idx++;
767 return NULL;
770 static const struct got_error *
771 findtwixt(struct got_object_id ***res, int *nres,
772 struct got_object_id **head, int nhead,
773 struct got_object_id **tail, int ntail,
774 struct got_repository *repo,
775 got_cancel_cb cancel_cb, void *cancel_arg)
777 const struct got_error *err = NULL;
778 struct got_object_id_queue ids;
779 struct got_object_idset *keep, *drop;
780 struct got_object_qid *qid;
781 int i, ncolor, nkeep, obj_type;
783 STAILQ_INIT(&ids);
784 *res = NULL;
785 *nres = 0;
787 keep = got_object_idset_alloc();
788 if (keep == NULL)
789 return got_error_from_errno("got_object_idset_alloc");
791 drop = got_object_idset_alloc();
792 if (drop == NULL) {
793 err = got_error_from_errno("got_object_idset_alloc");
794 goto done;
797 for (i = 0; i < nhead; i++) {
798 struct got_object_id *id = head[i];
799 if (id == NULL)
800 continue;
801 err = got_object_get_type(&obj_type, repo, id);
802 if (err)
803 return err;
804 if (obj_type != GOT_OBJ_TYPE_COMMIT)
805 continue;
806 err = queue_commit_id(&ids, id, COLOR_KEEP, repo);
807 if (err)
808 goto done;
810 for (i = 0; i < ntail; i++) {
811 struct got_object_id *id = tail[i];
812 if (id == NULL)
813 continue;
814 err = got_object_get_type(&obj_type, repo, id);
815 if (err)
816 return err;
817 if (obj_type != GOT_OBJ_TYPE_COMMIT)
818 continue;
819 err = queue_commit_id(&ids, id, COLOR_DROP, repo);
820 if (err)
821 goto done;
824 while (!STAILQ_EMPTY(&ids)) {
825 int qcolor;
826 qid = STAILQ_FIRST(&ids);
827 qcolor = *((int *)qid->data);
829 if (got_object_idset_contains(drop, qid->id))
830 ncolor = COLOR_DROP;
831 else if (got_object_idset_contains(keep, qid->id))
832 ncolor = COLOR_KEEP;
833 else
834 ncolor = COLOR_BLANK;
836 if (ncolor == COLOR_DROP || (ncolor == COLOR_KEEP &&
837 qcolor == COLOR_KEEP)) {
838 STAILQ_REMOVE_HEAD(&ids, entry);
839 got_object_qid_free(qid);
840 continue;
843 if (ncolor == COLOR_KEEP && qcolor == COLOR_DROP) {
844 err = drop_commit(keep, drop, qid->id, repo,
845 cancel_cb, cancel_arg);
846 if (err)
847 goto done;
848 } else if (ncolor == COLOR_BLANK) {
849 struct got_commit_object *commit;
850 struct got_object_id *id;
851 const struct got_object_id_queue *parents;
852 struct got_object_qid *pid;
854 id = got_object_id_dup(qid->id);
855 if (id == NULL) {
856 err = got_error_from_errno("got_object_id_dup");
857 goto done;
859 if (qcolor == COLOR_KEEP)
860 err = got_object_idset_add(keep, id,
861 (void *)&obj_types[GOT_OBJ_TYPE_COMMIT]);
862 else
863 err = got_object_idset_add(drop, id,
864 (void *)&obj_types[GOT_OBJ_TYPE_COMMIT]);
865 if (err) {
866 free(id);
867 goto done;
870 err = got_object_open_as_commit(&commit, repo, id);
871 if (err) {
872 free(id);
873 goto done;
875 parents = got_object_commit_get_parent_ids(commit);
876 if (parents) {
877 STAILQ_FOREACH(pid, parents, entry) {
878 err = queue_commit_id(&ids, pid->id,
879 qcolor, repo);
880 if (err) {
881 free(id);
882 goto done;
886 got_object_commit_close(commit);
887 commit = NULL;
888 } else {
889 /* should not happen */
890 err = got_error_fmt(GOT_ERR_NOT_IMPL,
891 "%s ncolor=%d qcolor=%d", __func__, ncolor, qcolor);
892 goto done;
895 STAILQ_REMOVE_HEAD(&ids, entry);
896 got_object_qid_free(qid);
899 nkeep = got_object_idset_num_elements(keep);
900 if (nkeep > 0) {
901 struct append_id_arg arg;
902 arg.array = calloc(nkeep, sizeof(struct got_object_id *));
903 if (arg.array == NULL) {
904 err = got_error_from_errno("calloc");
905 goto done;
907 arg.idx = 0;
908 err = got_object_idset_for_each(keep, append_id, &arg);
909 if (err) {
910 free(arg.array);
911 goto done;
913 *res = arg.array;
914 *nres = nkeep;
916 done:
917 got_object_idset_free(keep);
918 got_object_idset_free(drop);
919 got_object_id_queue_free(&ids);
920 return err;
923 static const struct got_error *
924 read_meta(struct got_pack_meta ***meta, int *nmeta,
925 struct got_object_id **theirs, int ntheirs,
926 struct got_object_id **ours, int nours, struct got_repository *repo,
927 int loose_obj_only, got_pack_progress_cb progress_cb, void *progress_arg,
928 got_cancel_cb cancel_cb, void *cancel_arg)
930 const struct got_error *err = NULL;
931 struct got_object_id **ids = NULL;
932 struct got_object_idset *idset;
933 int i, nobj = 0, obj_type;
934 struct got_pack_metavec v;
936 *meta = NULL;
937 *nmeta = 0;
939 idset = got_object_idset_alloc();
940 if (idset == NULL)
941 return got_error_from_errno("got_object_idset_alloc");
943 v.nmeta = 0;
944 v.metasz = 64;
945 v.meta = calloc(v.metasz, sizeof(struct got_pack_meta *));
946 if (v.meta == NULL) {
947 err = got_error_from_errno("calloc");
948 goto done;
951 err = findtwixt(&ids, &nobj, ours, nours, theirs, ntheirs, repo,
952 cancel_cb, cancel_arg);
953 if (err || nobj == 0)
954 goto done;
956 for (i = 0; i < ntheirs; i++) {
957 struct got_object_id *id = theirs[i];
958 if (id == NULL)
959 continue;
960 err = got_object_get_type(&obj_type, repo, id);
961 if (err)
962 return err;
963 if (obj_type != GOT_OBJ_TYPE_COMMIT)
964 continue;
965 err = load_commit(NULL, idset, id, repo,
966 loose_obj_only, cancel_cb, cancel_arg);
967 if (err)
968 goto done;
969 if (progress_cb) {
970 err = progress_cb(progress_arg, 0L, nours,
971 v.nmeta, 0, 0);
972 if (err)
973 goto done;
977 for (i = 0; i < ntheirs; i++) {
978 struct got_object_id *id = theirs[i];
979 int *cached_type;
980 if (id == NULL)
981 continue;
982 cached_type = got_object_idset_get(idset, id);
983 if (cached_type == NULL) {
984 err = got_object_get_type(&obj_type, repo, id);
985 if (err)
986 goto done;
987 } else
988 obj_type = *cached_type;
989 if (obj_type != GOT_OBJ_TYPE_TAG)
990 continue;
991 err = load_tag(NULL, idset, id, repo,
992 loose_obj_only, cancel_cb, cancel_arg);
993 if (err)
994 goto done;
995 if (progress_cb) {
996 err = progress_cb(progress_arg, 0L, nours,
997 v.nmeta, 0, 0);
998 if (err)
999 goto done;
1003 for (i = 0; i < nobj; i++) {
1004 err = load_commit(&v, idset, ids[i], repo,
1005 loose_obj_only, cancel_cb, cancel_arg);
1006 if (err)
1007 goto done;
1008 if (progress_cb) {
1009 err = progress_cb(progress_arg, 0L, nours,
1010 v.nmeta, 0, 0);
1011 if (err)
1012 goto done;
1016 for (i = 0; i < nours; i++) {
1017 struct got_object_id *id = ours[i];
1018 int *cached_type;
1019 if (id == NULL)
1020 continue;
1021 cached_type = got_object_idset_get(idset, id);
1022 if (cached_type == NULL) {
1023 err = got_object_get_type(&obj_type, repo, id);
1024 if (err)
1025 goto done;
1026 } else
1027 obj_type = *cached_type;
1028 if (obj_type != GOT_OBJ_TYPE_TAG)
1029 continue;
1030 err = load_tag(&v, idset, id, repo,
1031 loose_obj_only, cancel_cb, cancel_arg);
1032 if (err)
1033 goto done;
1034 if (progress_cb) {
1035 err = progress_cb(progress_arg, 0L, nours,
1036 v.nmeta, 0, 0);
1037 if (err)
1038 goto done;
1042 done:
1043 for (i = 0; i < nobj; i++) {
1044 free(ids[i]);
1046 free(ids);
1047 got_object_idset_free(idset);
1048 if (err == NULL) {
1049 *meta = v.meta;
1050 *nmeta = v.nmeta;
1051 } else
1052 free(v.meta);
1054 return err;
1057 const struct got_error *
1058 hwrite(FILE *f, void *buf, int len, SHA1_CTX *ctx)
1060 size_t n;
1062 SHA1Update(ctx, buf, len);
1063 n = fwrite(buf, 1, len, f);
1064 if (n != len)
1065 return got_ferror(f, GOT_ERR_IO);
1066 return NULL;
1069 static void
1070 putbe32(char *b, uint32_t n)
1072 b[0] = n >> 24;
1073 b[1] = n >> 16;
1074 b[2] = n >> 8;
1075 b[3] = n >> 0;
1078 static int
1079 write_order_cmp(const void *pa, const void *pb)
1081 struct got_pack_meta *a, *b, *ahd, *bhd;
1083 a = *(struct got_pack_meta **)pa;
1084 b = *(struct got_pack_meta **)pb;
1085 ahd = (a->head == NULL) ? a : a->head;
1086 bhd = (b->head == NULL) ? b : b->head;
1087 if (ahd->mtime != bhd->mtime)
1088 return bhd->mtime - ahd->mtime;
1089 if (ahd != bhd)
1090 return (uintptr_t)bhd - (uintptr_t)ahd;
1091 if (a->nchain != b->nchain)
1092 return a->nchain - b->nchain;
1093 return a->mtime - b->mtime;
1096 static const struct got_error *
1097 packhdr(int *hdrlen, char *hdr, size_t bufsize, int obj_type, size_t len)
1099 size_t i;
1101 *hdrlen = 0;
1103 hdr[0] = obj_type << 4;
1104 hdr[0] |= len & 0xf;
1105 len >>= 4;
1106 for (i = 1; len != 0; i++){
1107 if (i >= bufsize)
1108 return got_error(GOT_ERR_NO_SPACE);
1109 hdr[i - 1] |= GOT_DELTA_SIZE_MORE;
1110 hdr[i] = len & GOT_DELTA_SIZE_VAL_MASK;
1111 len >>= GOT_DELTA_SIZE_SHIFT;
1114 *hdrlen = i;
1115 return NULL;
1118 static int
1119 packoff(char *hdr, off_t off)
1121 int i, j;
1122 char rbuf[8];
1124 rbuf[0] = off & GOT_DELTA_SIZE_VAL_MASK;
1125 for (i = 1; (off >>= GOT_DELTA_SIZE_SHIFT) != 0; i++) {
1126 rbuf[i] = (--off & GOT_DELTA_SIZE_VAL_MASK) |
1127 GOT_DELTA_SIZE_MORE;
1130 j = 0;
1131 while (i > 0)
1132 hdr[j++] = rbuf[--i];
1133 return j;
1136 static const struct got_error *
1137 genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delta_cache,
1138 struct got_pack_meta **meta, int nmeta, int nours,
1139 int use_offset_deltas, struct got_repository *repo,
1140 got_pack_progress_cb progress_cb, void *progress_arg,
1141 got_cancel_cb cancel_cb, void *cancel_arg)
1143 const struct got_error *err = NULL;
1144 int i, nh;
1145 SHA1_CTX ctx;
1146 struct got_pack_meta *m;
1147 struct got_raw_object *raw = NULL;
1148 FILE *delta_file = NULL;
1149 char buf[32];
1150 size_t outlen, n;
1151 struct got_deflate_checksum csum;
1152 off_t packfile_size = 0;
1153 int outfd = -1;
1155 SHA1Init(&ctx);
1156 csum.output_sha1 = &ctx;
1157 csum.output_crc = NULL;
1159 err = hwrite(packfile, "PACK", 4, &ctx);
1160 if (err)
1161 return err;
1162 putbe32(buf, GOT_PACKFILE_VERSION);
1163 err = hwrite(packfile, buf, 4, &ctx);
1164 if (err)
1165 goto done;
1166 putbe32(buf, nmeta);
1167 err = hwrite(packfile, buf, 4, &ctx);
1168 if (err)
1169 goto done;
1170 qsort(meta, nmeta, sizeof(struct got_pack_meta *), write_order_cmp);
1171 for (i = 0; i < nmeta; i++) {
1172 if (progress_cb) {
1173 err = progress_cb(progress_arg, packfile_size, nours,
1174 nmeta, nmeta, i);
1175 if (err)
1176 goto done;
1178 m = meta[i];
1179 m->off = ftello(packfile);
1180 err = got_object_raw_open(&raw, &outfd, repo, &m->id);
1181 if (err)
1182 goto done;
1183 if (m->delta_len == 0) {
1184 err = packhdr(&nh, buf, sizeof(buf),
1185 m->obj_type, raw->size);
1186 if (err)
1187 goto done;
1188 err = hwrite(packfile, buf, nh, &ctx);
1189 if (err)
1190 goto done;
1191 packfile_size += nh;
1192 if (fseeko(raw->f, raw->hdrlen, SEEK_SET) == -1) {
1193 err = got_error_from_errno("fseeko");
1194 goto done;
1196 err = got_deflate_to_file(&outlen, raw->f, packfile,
1197 &csum);
1198 if (err)
1199 goto done;
1200 packfile_size += outlen;
1201 } else {
1202 off_t remain;
1203 if (delta_file == NULL) {
1204 delta_file = got_opentemp();
1205 if (delta_file == NULL) {
1206 err = got_error_from_errno(
1207 "got_opentemp");
1208 goto done;
1211 if (ftruncate(fileno(delta_file), 0L) == -1) {
1212 err = got_error_from_errno("ftruncate");
1213 goto done;
1215 if (fseeko(delta_file, 0L, SEEK_SET) == -1) {
1216 err = got_error_from_errno("fseeko");
1217 goto done;
1219 if (fseeko(delta_cache, m->delta_offset, SEEK_SET)
1220 == -1) {
1221 err = got_error_from_errno("fseeko");
1222 goto done;
1224 remain = m->delta_len;
1225 while (remain > 0) {
1226 char delta_buf[8192];
1227 size_t r, w, n;
1228 n = MIN(remain, sizeof(delta_buf));
1229 r = fread(delta_buf, 1, n, delta_cache);
1230 if (r != n) {
1231 err = got_ferror(delta_cache,
1232 GOT_ERR_IO);
1233 goto done;
1235 w = fwrite(delta_buf, 1, n, delta_file);
1236 if (w != n) {
1237 err = got_ferror(delta_file,
1238 GOT_ERR_IO);
1239 goto done;
1241 remain -= n;
1243 if (use_offset_deltas && m->prev->off != 0) {
1244 err = packhdr(&nh, buf, sizeof(buf),
1245 GOT_OBJ_TYPE_OFFSET_DELTA, m->delta_len);
1246 if (err)
1247 goto done;
1248 nh += packoff(buf + nh,
1249 m->off - m->prev->off);
1250 err = hwrite(packfile, buf, nh, &ctx);
1251 if (err)
1252 goto done;
1253 packfile_size += nh;
1254 } else {
1255 err = packhdr(&nh, buf, sizeof(buf),
1256 GOT_OBJ_TYPE_REF_DELTA, m->delta_len);
1257 err = hwrite(packfile, buf, nh, &ctx);
1258 if (err)
1259 goto done;
1260 packfile_size += nh;
1261 err = hwrite(packfile, m->prev->id.sha1,
1262 sizeof(m->prev->id.sha1), &ctx);
1263 packfile_size += sizeof(m->prev->id.sha1);
1264 if (err)
1265 goto done;
1267 if (fseeko(delta_file, 0L, SEEK_SET) == -1) {
1268 err = got_error_from_errno("fseeko");
1269 goto done;
1271 err = got_deflate_to_file(&outlen, delta_file,
1272 packfile, &csum);
1273 if (err)
1274 goto done;
1275 packfile_size += outlen;
1277 got_object_raw_close(raw);
1278 raw = NULL;
1280 SHA1Final(pack_sha1, &ctx);
1281 n = fwrite(pack_sha1, 1, SHA1_DIGEST_LENGTH, packfile);
1282 if (n != SHA1_DIGEST_LENGTH)
1283 err = got_ferror(packfile, GOT_ERR_IO);
1284 packfile_size += SHA1_DIGEST_LENGTH;
1285 packfile_size += sizeof(struct got_packfile_hdr);
1286 err = progress_cb(progress_arg, packfile_size, nours,
1287 nmeta, nmeta, nmeta);
1288 if (err)
1289 goto done;
1290 done:
1291 if (delta_file && fclose(delta_file) == EOF && err == NULL)
1292 err = got_error_from_errno("fclose");
1293 if (raw)
1294 got_object_raw_close(raw);
1295 if (outfd != -1 && close(outfd) == -1 && err == NULL)
1296 err = got_error_from_errno("close");
1297 return err;
1300 const struct got_error *
1301 got_pack_create(uint8_t *packsha1, FILE *packfile,
1302 struct got_object_id **theirs, int ntheirs,
1303 struct got_object_id **ours, int nours,
1304 struct got_repository *repo, int loose_obj_only, int allow_empty,
1305 got_pack_progress_cb progress_cb, void *progress_arg,
1306 got_cancel_cb cancel_cb, void *cancel_arg)
1308 const struct got_error *err;
1309 struct got_pack_meta **meta;
1310 int nmeta;
1311 FILE *delta_cache = NULL;
1313 err = read_meta(&meta, &nmeta, theirs, ntheirs, ours, nours, repo,
1314 loose_obj_only, progress_cb, progress_arg, cancel_cb, cancel_arg);
1315 if (err)
1316 return err;
1318 if (nmeta == 0 && !allow_empty) {
1319 err = got_error(GOT_ERR_CANNOT_PACK);
1320 goto done;
1323 delta_cache = got_opentemp();
1324 if (delta_cache == NULL) {
1325 err = got_error_from_errno("got_opentemp");
1326 goto done;
1329 if (nmeta > 0) {
1330 err = pick_deltas(meta, nmeta, nours, delta_cache, repo,
1331 progress_cb, progress_arg, cancel_cb, cancel_arg);
1332 if (err)
1333 goto done;
1334 if (fseeko(delta_cache, 0L, SEEK_SET) == -1) {
1335 err = got_error_from_errno("fseeko");
1336 goto done;
1340 err = genpack(packsha1, packfile, delta_cache, meta, nmeta, nours, 1,
1341 repo, progress_cb, progress_arg, cancel_cb, cancel_arg);
1342 if (err)
1343 goto done;
1344 done:
1345 free_nmeta(meta, nmeta);
1346 if (delta_cache && fclose(delta_cache) == EOF && err == NULL)
1347 err = got_error_from_errno("fclose");
1348 return err;