Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/stat.h>
18 #include <sys/queue.h>
20 #include <errno.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sha1.h>
25 #include <zlib.h>
26 #include <ctype.h>
27 #include <limits.h>
29 #include "got_error.h"
30 #include "got_object.h"
31 #include "got_repository.h"
32 #include "got_sha1.h"
33 #include "delta.h"
34 #include "pack.h"
35 #include "zb.h"
36 #include "object.h"
38 #ifndef MIN
39 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
40 #endif
42 #ifndef nitems
43 #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
44 #endif
46 #define GOT_OBJ_TAG_COMMIT "commit"
47 #define GOT_OBJ_TAG_TREE "tree"
48 #define GOT_OBJ_TAG_BLOB "blob"
50 #define GOT_COMMIT_TAG_TREE "tree "
51 #define GOT_COMMIT_TAG_PARENT "parent "
52 #define GOT_COMMIT_TAG_AUTHOR "author "
53 #define GOT_COMMIT_TAG_COMMITTER "committer "
55 char *
56 got_object_id_str(struct got_object_id *id, char *buf, size_t size)
57 {
58 return got_sha1_digest_to_str(id->sha1, buf, size);
59 }
61 const struct got_error *
62 got_parse_object_id(struct got_object_id **id, const char *buf)
63 {
64 *id = calloc(1, sizeof(**id));
65 if (*id == NULL)
66 return got_error(GOT_ERR_NO_MEM);
67 if (!got_parse_sha1_digest((*id)->sha1, buf)) {
68 free(*id);
69 *id = NULL;
70 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
71 }
72 return NULL;
73 }
76 int
77 got_object_id_cmp(struct got_object_id *id1, struct got_object_id *id2)
78 {
79 return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
80 }
82 int
83 got_object_get_type(struct got_object *obj)
84 {
85 switch (obj->type) {
86 case GOT_OBJ_TYPE_COMMIT:
87 case GOT_OBJ_TYPE_TREE:
88 case GOT_OBJ_TYPE_BLOB:
89 case GOT_OBJ_TYPE_TAG:
90 return obj->type;
91 default:
92 abort();
93 break;
94 }
96 /* not reached */
97 return 0;
98 }
100 static const struct got_error *
101 parse_object_header(struct got_object **obj, char *buf, size_t len)
103 const char *obj_tags[] = {
104 GOT_OBJ_TAG_COMMIT,
105 GOT_OBJ_TAG_TREE,
106 GOT_OBJ_TAG_BLOB
107 };
108 const int obj_types[] = {
109 GOT_OBJ_TYPE_COMMIT,
110 GOT_OBJ_TYPE_TREE,
111 GOT_OBJ_TYPE_BLOB,
112 };
113 int type = 0;
114 size_t size = 0, hdrlen = 0;
115 int i;
116 char *p = strchr(buf, '\0');
118 if (p == NULL)
119 return got_error(GOT_ERR_BAD_OBJ_HDR);
121 hdrlen = strlen(buf) + 1 /* '\0' */;
123 for (i = 0; i < nitems(obj_tags); i++) {
124 const char *tag = obj_tags[i];
125 size_t tlen = strlen(tag);
126 const char *errstr;
128 if (strncmp(buf, tag, tlen) != 0)
129 continue;
131 type = obj_types[i];
132 if (len <= tlen)
133 return got_error(GOT_ERR_BAD_OBJ_HDR);
134 size = strtonum(buf + tlen, 0, LONG_MAX, &errstr);
135 if (errstr != NULL)
136 return got_error(GOT_ERR_BAD_OBJ_HDR);
137 break;
140 if (type == 0)
141 return got_error(GOT_ERR_BAD_OBJ_HDR);
143 *obj = calloc(1, sizeof(**obj));
144 if (*obj == NULL)
145 return got_error(GOT_ERR_NO_MEM);
146 (*obj)->type = type;
147 (*obj)->hdrlen = hdrlen;
148 (*obj)->size = size;
149 return NULL;
152 static const struct got_error *
153 read_object_header(struct got_object **obj, struct got_repository *repo,
154 FILE *f)
156 const struct got_error *err;
157 struct got_zstream_buf zb;
158 char *buf;
159 size_t len;
160 const size_t zbsize = 64;
161 size_t outlen, totlen;
162 int i, ret;
164 buf = calloc(zbsize, sizeof(char));
165 if (buf == NULL)
166 return got_error(GOT_ERR_NO_MEM);
168 err = got_inflate_init(&zb, zbsize);
169 if (err)
170 return err;
172 i = 0;
173 totlen = 0;
174 do {
175 err = got_inflate_read(&zb, f, &outlen);
176 if (err)
177 goto done;
178 if (strchr(zb.outbuf, '\0') == NULL) {
179 buf = recallocarray(buf, 1 + i, 2 + i, zbsize);
180 if (buf == NULL) {
181 err = got_error(GOT_ERR_NO_MEM);
182 goto done;
185 memcpy(buf + totlen, zb.outbuf, outlen);
186 totlen += outlen;
187 i++;
188 } while (strchr(zb.outbuf, '\0') == NULL);
190 err = parse_object_header(obj, buf, totlen);
191 done:
192 got_inflate_end(&zb);
193 return err;
196 static const struct got_error *
197 object_path(char **path, struct got_object_id *id, struct got_repository *repo)
199 const struct got_error *err = NULL;
200 char hex[SHA1_DIGEST_STRING_LENGTH];
201 char *path_objects = got_repo_get_path_objects(repo);
203 if (path_objects == NULL)
204 return got_error(GOT_ERR_NO_MEM);
206 got_object_id_str(id, hex, sizeof(hex));
208 if (asprintf(path, "%s/%.2x/%s", path_objects,
209 id->sha1[0], hex + 2) == -1)
210 err = got_error(GOT_ERR_NO_MEM);
212 free(path_objects);
213 return err;
216 static const struct got_error *
217 open_loose_object(FILE **f, struct got_object *obj, struct got_repository *repo)
219 const struct got_error *err = NULL;
220 char *path;
222 err = object_path(&path, &obj->id, repo);
223 if (err)
224 return err;
225 *f = fopen(path, "rb");
226 if (*f == NULL) {
227 err = got_error_from_errno();
228 goto done;
230 done:
231 free(path);
232 return err;
235 const struct got_error *
236 got_object_open(struct got_object **obj, struct got_repository *repo,
237 struct got_object_id *id)
239 const struct got_error *err = NULL;
240 char *path;
241 FILE *f;
243 err = object_path(&path, id, repo);
244 if (err)
245 return err;
247 f = fopen(path, "rb");
248 if (f == NULL) {
249 if (errno != ENOENT) {
250 err = got_error_from_errno();
251 goto done;
253 err = got_packfile_open_object(obj, id, repo);
254 if (err)
255 goto done;
256 if (*obj == NULL)
257 err = got_error(GOT_ERR_NO_OBJ);
258 } else {
259 err = read_object_header(obj, repo, f);
260 if (err)
261 goto done;
262 memcpy((*obj)->id.sha1, id->sha1, SHA1_DIGEST_LENGTH);
264 done:
265 free(path);
266 if (err && f)
267 fclose(f);
268 return err;
272 void
273 got_object_close(struct got_object *obj)
275 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
276 struct got_delta *delta;
277 while (!SIMPLEQ_EMPTY(&obj->deltas.entries)) {
278 delta = SIMPLEQ_FIRST(&obj->deltas.entries);
279 SIMPLEQ_REMOVE_HEAD(&obj->deltas.entries, entry);
280 got_delta_close(delta);
283 if (obj->flags & GOT_OBJ_FLAG_PACKED)
284 free(obj->path_packfile);
285 free(obj);
288 static int
289 commit_object_valid(struct got_commit_object *commit)
291 int i;
292 int n;
294 if (commit == NULL)
295 return 0;
297 n = 0;
298 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
299 if (commit->tree_id->sha1[i] == 0)
300 n++;
302 if (n == SHA1_DIGEST_LENGTH)
303 return 0;
305 return 1;
308 static const struct got_error *
309 parse_commit_object(struct got_commit_object **commit, char *buf, size_t len)
311 const struct got_error *err = NULL;
312 char *s = buf;
313 size_t tlen;
314 ssize_t remain = (ssize_t)len;
316 *commit = calloc(1, sizeof(**commit));
317 if (*commit == NULL)
318 return got_error(GOT_ERR_NO_MEM);
319 (*commit)->tree_id = calloc(1, sizeof(*(*commit)->tree_id));
320 if ((*commit)->tree_id == NULL) {
321 free(*commit);
322 *commit = NULL;
323 return got_error(GOT_ERR_NO_MEM);
326 SIMPLEQ_INIT(&(*commit)->parent_ids);
328 tlen = strlen(GOT_COMMIT_TAG_TREE);
329 if (strncmp(s, GOT_COMMIT_TAG_TREE, tlen) == 0) {
330 remain -= tlen;
331 if (remain < SHA1_DIGEST_STRING_LENGTH) {
332 err = got_error(GOT_ERR_BAD_OBJ_DATA);
333 goto done;
335 s += tlen;
336 if (!got_parse_sha1_digest((*commit)->tree_id->sha1, s)) {
337 err = got_error(GOT_ERR_BAD_OBJ_DATA);
338 goto done;
340 remain -= SHA1_DIGEST_STRING_LENGTH;
341 s += SHA1_DIGEST_STRING_LENGTH;
342 } else {
343 err = got_error(GOT_ERR_BAD_OBJ_DATA);
344 goto done;
347 tlen = strlen(GOT_COMMIT_TAG_PARENT);
348 while (strncmp(s, GOT_COMMIT_TAG_PARENT, tlen) == 0) {
349 struct got_parent_id *pid;
351 remain -= tlen;
352 if (remain < SHA1_DIGEST_STRING_LENGTH) {
353 err = got_error(GOT_ERR_BAD_OBJ_DATA);
354 goto done;
357 pid = calloc(1, sizeof(*pid));
358 if (pid == NULL) {
359 err = got_error(GOT_ERR_NO_MEM);
360 goto done;
362 pid->id = calloc(1, sizeof(*pid->id));
363 if (pid->id == NULL) {
364 free(pid);
365 err = got_error(GOT_ERR_NO_MEM);
366 goto done;
368 s += tlen;
369 if (!got_parse_sha1_digest(pid->id->sha1, s)) {
370 err = got_error(GOT_ERR_BAD_OBJ_DATA);
371 free(pid->id);
372 free(pid);
373 goto done;
375 SIMPLEQ_INSERT_TAIL(&(*commit)->parent_ids, pid, entry);
376 (*commit)->nparents++;
378 remain -= SHA1_DIGEST_STRING_LENGTH;
379 s += SHA1_DIGEST_STRING_LENGTH;
382 tlen = strlen(GOT_COMMIT_TAG_AUTHOR);
383 if (strncmp(s, GOT_COMMIT_TAG_AUTHOR, tlen) == 0) {
384 char *p;
386 remain -= tlen;
387 if (remain <= 0) {
388 err = got_error(GOT_ERR_BAD_OBJ_DATA);
389 goto done;
391 s += tlen;
392 p = strchr(s, '\n');
393 if (p == NULL) {
394 err = got_error(GOT_ERR_BAD_OBJ_DATA);
395 goto done;
397 *p = '\0';
398 (*commit)->author = strdup(s);
399 if ((*commit)->author == NULL) {
400 err = got_error(GOT_ERR_NO_MEM);
401 goto done;
403 s += strlen((*commit)->author) + 1;
404 remain -= strlen((*commit)->author) + 1;
407 tlen = strlen(GOT_COMMIT_TAG_COMMITTER);
408 if (strncmp(s, GOT_COMMIT_TAG_COMMITTER, tlen) == 0) {
409 char *p;
411 remain -= tlen;
412 if (remain <= 0) {
413 err = got_error(GOT_ERR_BAD_OBJ_DATA);
414 goto done;
416 s += tlen;
417 p = strchr(s, '\n');
418 if (p == NULL) {
419 err = got_error(GOT_ERR_BAD_OBJ_DATA);
420 goto done;
422 *p = '\0';
423 (*commit)->committer = strdup(s);
424 if ((*commit)->committer == NULL) {
425 err = got_error(GOT_ERR_NO_MEM);
426 goto done;
428 s += strlen((*commit)->committer) + 1;
429 remain -= strlen((*commit)->committer) + 1;
432 (*commit)->logmsg = strndup(s, remain);
433 if ((*commit)->logmsg == NULL) {
434 err = got_error(GOT_ERR_NO_MEM);
435 goto done;
437 done:
438 if (err) {
439 got_object_commit_close(*commit);
440 *commit = NULL;
442 return err;
445 static void
446 tree_entry_close(struct got_tree_entry *te)
448 free(te->id);
449 free(te->name);
450 free(te);
453 static const struct got_error *
454 parse_tree_entry(struct got_tree_entry **te, size_t *elen, char *buf,
455 size_t maxlen)
457 char *p = buf, *space;
458 const struct got_error *err = NULL;
459 char hex[SHA1_DIGEST_STRING_LENGTH];
461 *te = calloc(1, sizeof(**te));
462 if (*te == NULL)
463 return got_error(GOT_ERR_NO_MEM);
465 (*te)->id = calloc(1, sizeof(*(*te)->id));
466 if ((*te)->id == NULL) {
467 free(*te);
468 *te = NULL;
469 return got_error(GOT_ERR_NO_MEM);
472 *elen = strlen(buf) + 1;
473 if (*elen > maxlen) {
474 free(*te);
475 *te = NULL;
476 return got_error(GOT_ERR_BAD_OBJ_DATA);
479 space = strchr(buf, ' ');
480 if (space == NULL) {
481 free(*te);
482 *te = NULL;
483 return got_error(GOT_ERR_BAD_OBJ_DATA);
485 while (*p != ' ') {
486 if (*p < '0' && *p > '7') {
487 err = got_error(GOT_ERR_BAD_OBJ_DATA);
488 goto done;
490 (*te)->mode <<= 3;
491 (*te)->mode |= *p - '0';
492 p++;
495 (*te)->name = strdup(space + 1);
496 if (*elen > maxlen || maxlen - *elen < SHA1_DIGEST_LENGTH) {
497 err = got_error(GOT_ERR_BAD_OBJ_DATA);
498 goto done;
500 buf += strlen(buf) + 1;
501 memcpy((*te)->id->sha1, buf, SHA1_DIGEST_LENGTH);
502 *elen += SHA1_DIGEST_LENGTH;
503 done:
504 if (err) {
505 tree_entry_close(*te);
506 *te = NULL;
508 return err;
511 static const struct got_error *
512 parse_tree_object(struct got_tree_object **tree, struct got_repository *repo,
513 char *buf, size_t len)
515 const struct got_error *err;
516 size_t remain = len;
517 int nentries;
519 *tree = calloc(1, sizeof(**tree));
520 if (*tree == NULL)
521 return got_error(GOT_ERR_NO_MEM);
523 SIMPLEQ_INIT(&(*tree)->entries);
525 while (remain > 0) {
526 struct got_tree_entry *te;
527 size_t elen;
529 err = parse_tree_entry(&te, &elen, buf, remain);
530 if (err)
531 return err;
532 (*tree)->nentries++;
533 SIMPLEQ_INSERT_TAIL(&(*tree)->entries, te, entry);
534 buf += elen;
535 remain -= elen;
538 if (remain != 0) {
539 got_object_tree_close(*tree);
540 return got_error(GOT_ERR_BAD_OBJ_DATA);
543 return NULL;
546 static const struct got_error *
547 read_to_mem(uint8_t **outbuf, size_t *outlen, FILE *f)
549 const struct got_error *err = NULL;
550 static const size_t blocksize = 512;
551 size_t n, total, remain;
552 uint8_t *buf;
554 *outbuf = NULL;
555 *outlen = 0;
557 buf = calloc(1, blocksize);
558 if (buf == NULL)
559 return got_error(GOT_ERR_NO_MEM);
561 remain = blocksize;
562 total = 0;
563 while (1) {
564 if (remain == 0) {
565 uint8_t *newbuf;
566 newbuf = reallocarray(buf, 1, total + blocksize);
567 if (newbuf == NULL) {
568 err = got_error(GOT_ERR_NO_MEM);
569 goto done;
571 buf = newbuf;
572 remain += blocksize;
574 n = fread(buf, 1, remain, f);
575 if (n == 0) {
576 if (ferror(f)) {
577 err = got_ferror(f, GOT_ERR_IO);
578 goto done;
580 break; /* EOF */
582 remain -= n;
583 total += n;
584 };
586 done:
587 if (err == NULL) {
588 *outbuf = buf;
589 *outlen = total;
590 } else
591 free(buf);
592 return err;
595 static const struct got_error *
596 read_commit_object(struct got_commit_object **commit,
597 struct got_repository *repo, struct got_object *obj, FILE *f)
599 const struct got_error *err = NULL;
600 size_t len;
601 uint8_t *p;
602 int i, ret;
604 if (obj->flags & GOT_OBJ_FLAG_PACKED)
605 err = read_to_mem(&p, &len, f);
606 else
607 err = got_inflate_to_mem(&p, &len, f);
608 if (err)
609 return err;
611 if (len < obj->hdrlen + obj->size) {
612 err = got_error(GOT_ERR_BAD_OBJ_DATA);
613 goto done;
616 /* Skip object header. */
617 len -= obj->hdrlen;
618 err = parse_commit_object(commit, p + obj->hdrlen, len);
619 free(p);
620 done:
621 return err;
624 const struct got_error *
625 got_object_commit_open(struct got_commit_object **commit,
626 struct got_repository *repo, struct got_object *obj)
628 const struct got_error *err = NULL;
629 FILE *f;
631 if (obj->type != GOT_OBJ_TYPE_COMMIT)
632 return got_error(GOT_ERR_OBJ_TYPE);
634 if (obj->flags & GOT_OBJ_FLAG_PACKED)
635 err = got_packfile_extract_object(&f, obj, repo);
636 else
637 err = open_loose_object(&f, obj, repo);
638 if (err)
639 return err;
641 err = read_commit_object(commit, repo, obj, f);
642 fclose(f);
643 return err;
646 void
647 got_object_commit_close(struct got_commit_object *commit)
649 struct got_parent_id *pid;
651 while (!SIMPLEQ_EMPTY(&commit->parent_ids)) {
652 pid = SIMPLEQ_FIRST(&commit->parent_ids);
653 SIMPLEQ_REMOVE_HEAD(&commit->parent_ids, entry);
654 free(pid->id);
655 free(pid);
658 free(commit->tree_id);
659 free(commit->author);
660 free(commit->committer);
661 free(commit->logmsg);
662 free(commit);
665 static const struct got_error *
666 read_tree_object(struct got_tree_object **tree,
667 struct got_repository *repo, struct got_object *obj, FILE *f)
669 const struct got_error *err = NULL;
670 size_t len;
671 uint8_t *p;
672 int i, ret;
674 if (obj->flags & GOT_OBJ_FLAG_PACKED)
675 err = read_to_mem(&p, &len, f);
676 else
677 err = got_inflate_to_mem(&p, &len, f);
678 if (err)
679 return err;
681 if (len < obj->hdrlen + obj->size) {
682 err = got_error(GOT_ERR_BAD_OBJ_DATA);
683 goto done;
686 /* Skip object header. */
687 len -= obj->hdrlen;
688 err = parse_tree_object(tree, repo, p + obj->hdrlen, len);
689 free(p);
690 done:
691 return err;
694 const struct got_error *
695 got_object_tree_open(struct got_tree_object **tree,
696 struct got_repository *repo, struct got_object *obj)
698 const struct got_error *err = NULL;
699 FILE *f;
701 if (obj->type != GOT_OBJ_TYPE_TREE)
702 return got_error(GOT_ERR_OBJ_TYPE);
704 if (obj->flags & GOT_OBJ_FLAG_PACKED)
705 err = got_packfile_extract_object(&f, obj, repo);
706 else
707 err = open_loose_object(&f, obj, repo);
708 if (err)
709 return err;
711 err = read_tree_object(tree, repo, obj, f);
712 fclose(f);
713 return err;
716 void
717 got_object_tree_close(struct got_tree_object *tree)
719 struct got_tree_entry *te;
721 while (!SIMPLEQ_EMPTY(&tree->entries)) {
722 te = SIMPLEQ_FIRST(&tree->entries);
723 SIMPLEQ_REMOVE_HEAD(&tree->entries, entry);
724 tree_entry_close(te);
727 free(tree);
730 const struct got_error *
731 got_object_blob_open(struct got_blob_object **blob,
732 struct got_repository *repo, struct got_object *obj, size_t blocksize)
734 const struct got_error *err = NULL;
736 if (obj->type != GOT_OBJ_TYPE_BLOB)
737 return got_error(GOT_ERR_OBJ_TYPE);
739 if (blocksize < obj->hdrlen)
740 return got_error(GOT_ERR_NO_SPACE);
742 *blob = calloc(1, sizeof(**blob));
743 if (*blob == NULL)
744 return got_error(GOT_ERR_NO_MEM);
746 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
747 (*blob)->read_buf = calloc(1, blocksize);
748 if ((*blob)->read_buf == NULL)
749 return got_error(GOT_ERR_NO_MEM);
750 err = got_packfile_extract_object(&((*blob)->f), obj, repo);
751 if (err)
752 return err;
753 } else {
754 err = open_loose_object(&((*blob)->f), obj, repo);
755 if (err) {
756 free(*blob);
757 return err;
760 err = got_inflate_init(&(*blob)->zb, blocksize);
761 if (err != NULL) {
762 fclose((*blob)->f);
763 free(*blob);
764 return err;
767 (*blob)->read_buf = (*blob)->zb.outbuf;
768 (*blob)->flags |= GOT_BLOB_F_COMPRESSED;
771 (*blob)->hdrlen = obj->hdrlen;
772 (*blob)->blocksize = blocksize;
773 memcpy(&(*blob)->id.sha1, obj->id.sha1, SHA1_DIGEST_LENGTH);
775 return err;
778 void
779 got_object_blob_close(struct got_blob_object *blob)
781 if (blob->flags & GOT_BLOB_F_COMPRESSED)
782 got_inflate_end(&blob->zb);
783 else
784 free(blob->read_buf);
785 fclose(blob->f);
786 free(blob);
789 char *
790 got_object_blob_id_str(struct got_blob_object *blob, char *buf, size_t size)
792 return got_sha1_digest_to_str(blob->id.sha1, buf, size);
795 size_t
796 got_object_blob_get_hdrlen(struct got_blob_object *blob)
798 return blob->hdrlen;
801 const uint8_t *
802 got_object_blob_get_read_buf(struct got_blob_object *blob)
804 return blob->read_buf;
807 const struct got_error *
808 got_object_blob_read_block(size_t *outlenp, struct got_blob_object *blob)
810 size_t n;
812 if (blob->flags & GOT_BLOB_F_COMPRESSED)
813 return got_inflate_read(&blob->zb, blob->f, outlenp);
815 n = fread(blob->read_buf, 1, blob->blocksize, blob->f);
816 if (n == 0 && ferror(blob->f))
817 return got_ferror(blob->f, GOT_ERR_IO);
818 *outlenp = n;
819 return NULL;