commit 668a20f6fe41d7f9c2f4c32b1ee521bff6d459a8 from: Stefan Sperling date: Wed Mar 18 16:13:41 2020 UTC rewritten got-index-pack; sorry about the monster commit commit - 2decf4c6adde2e9a8078b8af60c575c5bb91902a commit + 668a20f6fe41d7f9c2f4c32b1ee521bff6d459a8 blob - 9fce345c5049ce75a157248987a26b165a993c82 blob + 5559abf9bb796ef3808ab0010c9e8e8c3fc03274 --- got/got.c +++ got/got.c @@ -971,7 +971,7 @@ done: static const struct got_error * fetch_progress(void *arg, const char *message, off_t packfile_size, - int nobjects_total, int nobjects_indexed) + int nobj_total, int nobj_indexed, int nobj_loose, int nobj_resolved) { int *did_something = arg; char scaled[FMT_SCALED_STRSIZE]; @@ -979,13 +979,16 @@ fetch_progress(void *arg, const char *message, off_t p if (message) { printf("\rserver: %s", message); *did_something = 1; - } else if (packfile_size > 0 || nobjects_indexed > 0) { - printf("\rfetching..."); + } else if (packfile_size > 0 || nobj_indexed > 0) { + printf("\r"); if (fmt_scaled(packfile_size, scaled) == 0) - printf(" %*s", FMT_SCALED_STRSIZE, scaled); - if (nobjects_indexed > 0) - printf(" indexed %d/%d objects", nobjects_indexed, - nobjects_total); + printf(" %*s fetched", FMT_SCALED_STRSIZE, scaled); + if (nobj_indexed > 0) + printf("; indexed %d/%d objects", nobj_indexed, + nobj_total); + if (nobj_resolved > 0) + printf("; resolved %d/%d deltified objects ", + nobj_resolved, nobj_total - nobj_loose); *did_something = 1; } fflush(stdout); @@ -1079,18 +1082,18 @@ cmd_clone(int argc, char *argv[]) struct got_object_id *id = pe->data; struct got_reference *ref; - err = got_object_id_str(&id_str, id); + + err = got_ref_alloc(&ref, refname, id); if (err) goto done; - err = got_ref_alloc(&ref, refname, id); - if (err) { - free(id_str); + #if 0 + err = got_object_id_str(&id_str, id); + if (err) goto done; - } - printf("%s: %s\n", got_ref_get_name(ref), id_str); free(id_str); + #endif err = got_ref_write(ref, repo); got_ref_close(ref); if (err) blob - 5c84e0ca2a0a85854b24b9704a1dfeafb4994c52 blob + f81637bef69919b12186fd2f50b4057a22217788 --- include/got_fetch.h +++ include/got_fetch.h @@ -40,7 +40,7 @@ const struct got_error *got_fetch_connect(int *, const /* A callback function which gets invoked with progress information to print. */ typedef const struct got_error *(*got_fetch_progress_cb)(void *, - const char *, off_t, int, int); + const char *, off_t, int, int, int, int); /* * Attempt to fetch a packfile from a server. This pack file will contain blob - 734df058a9220e2d98f2e29f7605822aa1c9c4cc blob + 19368f185da89c0705fa0626390c32750c16b1b7 --- lib/fetch.c +++ lib/fetch.c @@ -451,7 +451,8 @@ got_fetch_pack(struct got_object_id **pack_hash, struc while ((s = strsep(&s0, "\r")) != NULL) { if (*s == '\0') continue; - err = progress_cb(progress_arg, s, 0, 0, 0); + err = progress_cb(progress_arg, s, + packfile_size_cur, 0, 0, 0, 0); if (err) break; } @@ -460,7 +461,7 @@ got_fetch_pack(struct got_object_id **pack_hash, struc goto done; } else if (packfile_size_cur != packfile_size) { err = progress_cb(progress_arg, NULL, - packfile_size_cur, 0, 0); + packfile_size_cur, 0, 0, 0, 0); if (err) break; packfile_size = packfile_size_cur; @@ -496,7 +497,8 @@ got_fetch_pack(struct got_object_id **pack_hash, struc } imsg_init(&idxibuf, imsg_idxfds[0]); - err = got_privsep_send_index_pack_req(&idxibuf, npackfd, *pack_hash); + err = got_privsep_send_index_pack_req(&idxibuf, (*pack_hash)->sha1, + npackfd); if (err != NULL) goto done; npackfd = -1; @@ -506,15 +508,17 @@ got_fetch_pack(struct got_object_id **pack_hash, struc nidxfd = -1; done = 0; while (!done) { - int nobjects_total, nobjects_indexed; - err = got_privsep_recv_index_progress(&done, &nobjects_total, - &nobjects_indexed, &idxibuf); + int nobj_total, nobj_indexed, nobj_loose, nobj_resolved; + + err = got_privsep_recv_index_progress(&done, &nobj_total, + &nobj_indexed, &nobj_loose, &nobj_resolved, + &idxibuf); if (err != NULL) goto done; - if (nobjects_indexed != 0) { + if (nobj_indexed != 0) { err = progress_cb(progress_arg, NULL, - packfile_size, nobjects_total, - nobjects_indexed); + packfile_size, nobj_total, + nobj_indexed, nobj_loose, nobj_resolved); if (err) break; } blob - 363157f9fa862d3d8847f8e39601526a53ece92d blob + 15e9a291534e2394802c47c2b9fec415bf7625ef --- lib/got_lib_pack.h +++ lib/got_lib_pack.h @@ -27,6 +27,14 @@ struct got_pack { const struct got_error *got_pack_stop_privsep_child(struct got_pack *); const struct got_error *got_pack_close(struct got_pack *); +const struct got_error *got_pack_parse_offset_delta(off_t *, size_t *, + struct got_pack *, off_t, int); +const struct got_error *got_pack_resolve_delta_chain(struct got_delta_chain *, + struct got_packidx *, struct got_pack *, off_t, size_t, int, size_t, + unsigned int); +const struct got_error *got_pack_parse_object_type_and_size(uint8_t *, + uint64_t *, size_t *, struct got_pack *, off_t); + #define GOT_PACK_PREFIX "pack-" #define GOT_PACKFILE_SUFFIX ".pack" #define GOT_PACKIDX_SUFFIX ".idx" @@ -171,6 +179,8 @@ const struct got_error *got_packfile_open_object(struc struct got_pack *, struct got_packidx *, int, struct got_object_id *); const struct got_error *got_pack_get_max_delta_object_size(uint64_t *, struct got_object *, struct got_pack *); +const struct got_error *got_pack_dump_delta_chain_to_mem(uint8_t **, size_t *, + struct got_delta_chain *, struct got_pack *); const struct got_error *got_packfile_extract_object(struct got_pack *, struct got_object *, FILE *, FILE *, FILE *); const struct got_error *got_packfile_extract_object_to_mem(uint8_t **, size_t *, blob - 20e77442a08f16547d7bddcb93d3bf617d9afdd9 blob + b949a92af66121cfec06dc0bfea4accecd970423 --- lib/got_lib_privsep.h +++ lib/got_lib_privsep.h @@ -276,13 +276,25 @@ struct got_imsg_fetch_download_progress { /* Number of packfile data bytes downloaded so far. */ off_t packfile_bytes; }; + +/* Structure for GOT_IMSG_IDXPACK_REQUEST data. */ +struct got_imsg_index_pack_request { + uint8_t pack_hash[SHA1_DIGEST_LENGTH]; +} __attribute__((__packed__)); /* Structure for GOT_IMSG_IDXPACK_PROGRESS data. */ struct got_imsg_index_pack_progress { /* Total number of objects in pack file. */ - int nobjects_total; + int nobj_total; + /* Number of objects indexed so far. */ - int nobjects_indexed; + int nobj_indexed; + + /* Number of non-deltified objects in pack file. */ + int nobj_loose; + + /* Number of deltified objects resolved so far. */ + int nobj_resolved; }; /* Structure for GOT_IMSG_PACKIDX. */ @@ -363,13 +375,13 @@ const struct got_error *got_privsep_send_blob_outfd(st const struct got_error *got_privsep_send_tmpfd(struct imsgbuf *, int); const struct got_error *got_privsep_send_obj(struct imsgbuf *, struct got_object *); -const struct got_error *got_privsep_send_index_pack_req(struct imsgbuf *, int, - struct got_object_id *); +const struct got_error *got_privsep_send_index_pack_req(struct imsgbuf *, + uint8_t *, int); const struct got_error *got_privsep_send_index_pack_progress(struct imsgbuf *, - int, int); + int, int, int, int); const struct got_error *got_privsep_send_index_pack_done(struct imsgbuf *); const struct got_error *got_privsep_recv_index_progress(int *, int *, int *, - struct imsgbuf *ibuf); + int *, int *, struct imsgbuf *ibuf); const struct got_error *got_privsep_send_fetch_req(struct imsgbuf *, int, struct got_pathlist_head *); const struct got_error *got_privsep_send_fetch_symrefs(struct imsgbuf *, blob - c2147b8044f1aa9ecd319e1f06a3e7eb56d26b46 blob + 7f264cdcbb1fb07ab69610ba8ef70883ac16cfc2 --- lib/pack.c +++ lib/pack.c @@ -561,8 +561,8 @@ got_pack_close(struct got_pack *pack) return err; } -static const struct got_error * -parse_object_type_and_size(uint8_t *type, uint64_t *size, size_t *len, +const struct got_error * +got_pack_parse_object_type_and_size(uint8_t *type, uint64_t *size, size_t *len, struct got_pack *pack, off_t offset) { uint8_t t = 0; @@ -681,8 +681,8 @@ parse_negative_offset(int64_t *offset, size_t *len, st return NULL; } -static const struct got_error * -parse_offset_delta(off_t *base_offset, size_t *len, struct got_pack *pack, +const struct got_error * +got_pack_parse_offset_delta(off_t *base_offset, size_t *len, struct got_pack *pack, off_t offset, int tslen) { const struct got_error *err; @@ -706,10 +706,6 @@ parse_offset_delta(off_t *base_offset, size_t *len, st } static const struct got_error * -resolve_delta_chain(struct got_delta_chain *, struct got_packidx *, - struct got_pack *, off_t, size_t, int, size_t, unsigned int); - -static const struct got_error * read_delta_data(uint8_t **delta_buf, size_t *delta_len, size_t delta_data_offset, struct got_pack *pack) { @@ -759,7 +755,7 @@ resolve_offset_delta(struct got_delta_chain *deltas, off_t delta_data_offset; size_t consumed; - err = parse_offset_delta(&base_offset, &consumed, pack, + err = got_pack_parse_offset_delta(&base_offset, &consumed, pack, delta_offset, tslen); if (err) return err; @@ -783,12 +779,12 @@ resolve_offset_delta(struct got_delta_chain *deltas, if (base_offset >= pack->filesize) return got_error(GOT_ERR_PACK_OFFSET); - err = parse_object_type_and_size(&base_type, &base_size, &base_tslen, - pack, base_offset); + err = got_pack_parse_object_type_and_size(&base_type, &base_size, + &base_tslen, pack, base_offset); if (err) return err; - return resolve_delta_chain(deltas, packidx, pack, base_offset, + return got_pack_resolve_delta_chain(deltas, packidx, pack, base_offset, base_tslen, base_type, base_size, recursion - 1); } @@ -824,10 +820,6 @@ resolve_ref_delta(struct got_delta_chain *deltas, stru delta_data_offset = lseek(pack->fd, 0, SEEK_CUR); if (delta_data_offset == -1) return got_error_from_errno("lseek"); - err = got_inflate_to_mem_fd(&delta_buf, &delta_len, NULL, - pack->fd); - if (err) - return err; } err = add_delta(deltas, delta_offset, tslen, delta_type, delta_size, @@ -838,7 +830,7 @@ resolve_ref_delta(struct got_delta_chain *deltas, stru /* Delta base must be in the same pack file. */ idx = got_packidx_get_object_idx(packidx, &id); if (idx == -1) - return got_error(GOT_ERR_BAD_PACKFILE); + return got_error(GOT_ERR_NO_OBJ); base_offset = get_object_offset(packidx, idx); if (base_offset == (uint64_t)-1) @@ -847,19 +839,19 @@ resolve_ref_delta(struct got_delta_chain *deltas, stru if (base_offset >= pack->filesize) return got_error(GOT_ERR_PACK_OFFSET); - err = parse_object_type_and_size(&base_type, &base_size, &base_tslen, - pack, base_offset); + err = got_pack_parse_object_type_and_size(&base_type, &base_size, + &base_tslen, pack, base_offset); if (err) return err; - return resolve_delta_chain(deltas, packidx, pack, base_offset, + return got_pack_resolve_delta_chain(deltas, packidx, pack, base_offset, base_tslen, base_type, base_size, recursion - 1); } -static const struct got_error * -resolve_delta_chain(struct got_delta_chain *deltas, struct got_packidx *packidx, - struct got_pack *pack, off_t delta_offset, size_t tslen, int delta_type, - size_t delta_size, unsigned int recursion) +const struct got_error * +got_pack_resolve_delta_chain(struct got_delta_chain *deltas, + struct got_packidx *packidx, struct got_pack *pack, off_t delta_offset, + size_t tslen, int delta_type, size_t delta_size, unsigned int recursion) { const struct got_error *err = NULL; @@ -913,8 +905,9 @@ open_delta_object(struct got_object **obj, struct got_ (*obj)->flags |= GOT_OBJ_FLAG_PACKED; (*obj)->pack_idx = idx; - err = resolve_delta_chain(&(*obj)->deltas, packidx, pack, offset, - tslen, delta_type, delta_size, GOT_DELTA_CHAIN_RECURSION_MAX); + err = got_pack_resolve_delta_chain(&(*obj)->deltas, packidx, pack, + offset, tslen, delta_type, delta_size, + GOT_DELTA_CHAIN_RECURSION_MAX); if (err) goto done; @@ -946,7 +939,8 @@ got_packfile_open_object(struct got_object **obj, stru if (offset == (uint64_t)-1) return got_error(GOT_ERR_BAD_PACKIDX); - err = parse_object_type_and_size(&type, &size, &tslen, pack, offset); + err = got_pack_parse_object_type_and_size(&type, &size, &tslen, + pack, offset); if (err) return err; @@ -1034,7 +1028,7 @@ got_pack_get_max_delta_object_size(uint64_t *size, str return get_delta_chain_max_size(size, &obj->deltas, pack); } -static const struct got_error * +const struct got_error * dump_delta_chain_to_file(size_t *result_size, struct got_delta_chain *deltas, struct got_pack *pack, FILE *outfile, FILE *base_file, FILE *accum_file) { @@ -1198,8 +1192,8 @@ done: return err; } -static const struct got_error * -dump_delta_chain_to_mem(uint8_t **outbuf, size_t *outlen, +const struct got_error * +got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, size_t *outlen, struct got_delta_chain *deltas, struct got_pack *pack) { const struct got_error *err = NULL; @@ -1377,7 +1371,8 @@ got_packfile_extract_object_to_mem(uint8_t **buf, size err = got_inflate_to_mem_fd(buf, len, NULL, pack->fd); } } else - err = dump_delta_chain_to_mem(buf, len, &obj->deltas, pack); + err = got_pack_dump_delta_chain_to_mem(buf, len, &obj->deltas, + pack); return err; } blob - 986fccc8b38434f6702bead30a7eddae76b24dd5 blob + b42b1cd03197ef33807cd68e7fae1906826925a1 --- lib/privsep.c +++ lib/privsep.c @@ -767,12 +767,14 @@ done: } const struct got_error * -got_privsep_send_index_pack_req(struct imsgbuf *ibuf, int fd, struct got_object_id *hash) +got_privsep_send_index_pack_req(struct imsgbuf *ibuf, uint8_t *pack_hash, + int fd) { const struct got_error *err = NULL; + /* Keep in sync with struct got_imsg_index_pack_request */ if (imsg_compose(ibuf, GOT_IMSG_IDXPACK_REQUEST, 0, 0, fd, - hash->sha1, SHA1_DIGEST_LENGTH) == -1) { + pack_hash, SHA1_DIGEST_LENGTH) == -1) { err = got_error_from_errno("imsg_compose INDEX_REQUEST"); close(fd); return err; @@ -781,13 +783,15 @@ got_privsep_send_index_pack_req(struct imsgbuf *ibuf, } const struct got_error * -got_privsep_send_index_pack_progress(struct imsgbuf *ibuf, int nobjects_total, - int nobjects_indexed) +got_privsep_send_index_pack_progress(struct imsgbuf *ibuf, int nobj_total, + int nobj_indexed, int nobj_loose, int nobj_resolved) { struct got_imsg_index_pack_progress iprogress; - iprogress.nobjects_total = nobjects_total; - iprogress.nobjects_indexed = nobjects_indexed; + iprogress.nobj_total = nobj_total; + iprogress.nobj_indexed = nobj_indexed; + iprogress.nobj_loose = nobj_loose; + iprogress.nobj_resolved = nobj_resolved; if (imsg_compose(ibuf, GOT_IMSG_IDXPACK_PROGRESS, 0, 0, -1, &iprogress, sizeof(iprogress)) == -1) @@ -805,8 +809,9 @@ got_privsep_send_index_pack_done(struct imsgbuf *ibuf) } const struct got_error * -got_privsep_recv_index_progress(int *done, int *nobjects_total, - int *nobjects_indexed, struct imsgbuf *ibuf) +got_privsep_recv_index_progress(int *done, int *nobj_total, + int *nobj_indexed, int *nobj_loose, int *nobj_resolved, + struct imsgbuf *ibuf) { const struct got_error *err = NULL; struct imsg imsg; @@ -814,8 +819,9 @@ got_privsep_recv_index_progress(int *done, int *nobjec size_t datalen; *done = 0; - *nobjects_total = 0; - *nobjects_indexed = 0; + *nobj_total = 0; + *nobj_indexed = 0; + *nobj_resolved = 0; err = got_privsep_recv_imsg(&imsg, ibuf, 0); if (err) @@ -836,8 +842,10 @@ got_privsep_recv_index_progress(int *done, int *nobjec break; } iprogress = (struct got_imsg_index_pack_progress *)imsg.data; - *nobjects_total = iprogress->nobjects_total; - *nobjects_indexed = iprogress->nobjects_indexed; + *nobj_total = iprogress->nobj_total; + *nobj_indexed = iprogress->nobj_indexed; + *nobj_loose = iprogress->nobj_loose; + *nobj_resolved = iprogress->nobj_resolved; break; case GOT_IMSG_IDXPACK_DONE: if (datalen != 0) { blob - fcaafeb345feccfcee8a0a7445de7b1a800b1e47 blob + 483b83598ba5e1661ebbc0d8fb41bc95091a622b --- libexec/got-index-pack/Makefile +++ libexec/got-index-pack/Makefile @@ -4,7 +4,7 @@ PROG= got-index-pack SRCS= got-index-pack.c error.c inflate.c object_parse.c object_idset.c \ - path.c privsep.c sha1.c + delta_cache.c delta.c pack.c path.c privsep.c sha1.c CPPFLAGS = -I${.CURDIR}/../../include -I${.CURDIR}/../../lib LDADD = -lutil -lz blob - 4247c3edb9fba18b593047110c5cd3c2330efdfc blob + a520e6ab167c3609a4e0ad87ff8d38d806f5f9ad --- libexec/got-index-pack/got-index-pack.c +++ libexec/got-index-pack/got-index-pack.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2019 Ori Bernstein + * Copyright (c) 2020 Stefan Sperling * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -47,146 +48,41 @@ #include "got_lib_object_parse.h" #include "got_lib_object_idset.h" #include "got_lib_privsep.h" +#include "got_lib_pack.h" +#include "got_lib_delta_cache.h" -typedef struct Cinfo Cinfo; -typedef struct Tinfo Tinfo; -typedef struct Object Object; -typedef struct Pack Pack; -typedef struct Buf Buf; -typedef struct Dirent Dirent; -typedef struct Idxent Idxent; -typedef struct Ols Ols; +struct got_indexed_object { + struct got_object_id id; -enum { - /* 5k objects should be enough */ - Cachemax = 5*1024, - Pathmax = 512, - Hashsz = 20, - Pktmax = 65536, + /* + * Has this object been fully resolved? + * If so, we know its ID, otherwise we don't and 'id' is invalid. + */ + int valid; - Nproto = 16, - Nport = 16, - Nhost = 256, - Npath = 128, - Nrepo = 64, - Nbranch = 32, -}; + /* Offset of type+size field for this object in pack file. */ + off_t off; -typedef enum Type { - GNone = 0, - GCommit = 1, - GTree = 2, - GBlob = 3, - GTag = 4, - GOdelta = 6, - GRdelta = 7, -} Type; + /* Type+size values parsed from pack file. */ + uint8_t type; + uint64_t size; -enum { - Cloaded = 1 << 0, - Cidx = 1 << 1, - Ccache = 1 << 2, - Cexist = 1 << 3, - Cparsed = 1 << 5, -}; + /* Length of on-disk type+size data. */ + size_t tslen; -struct Dirent { - char *name; - int modref; - int mode; - struct got_object_id h; -}; + /* Length of object data following type+size. */ + size_t len; -struct Object { - /* Git data */ - struct got_object_id hash; - Type type; + uint32_t crc; - /* Cache */ - int id; - int flag; - int refs; - Object *next; - Object *prev; + /* For ref deltas. */ + struct got_object_id ref_id; - /* For indexing */ - off_t off; - off_t len; - uint32_t crc; - - /* Everything below here gets cleared */ - char *all; - char *data; - /* size excludes header */ - off_t size; - - union { - Cinfo *commit; - Tinfo *tree; - }; + /* For offset deltas. */ + off_t base_offset; + size_t base_offsetlen; }; -struct Tinfo { - /* Tree */ - Dirent *ent; - int nent; -}; - -struct Cinfo { - /* Commit */ - struct got_object_id *parent; - int nparent; - struct got_object_id tree; - char *author; - char *committer; - char *msg; - int nmsg; - off_t ctime; - off_t mtime; -}; - -typedef struct Buf Buf; - -struct Buf { - int len; - int sz; - char *data; -}; - -static int readpacked(FILE *, Object *, int); -static Object *readidxobject(FILE *, struct got_object_id, int); - -struct got_object_idset *objcache; -int next_object_id; -Object *lruhead; -Object *lrutail; -int ncache; - -#define GETBE16(b)\ - ((((b)[0] & 0xFFul) << 8) | \ - (((b)[1] & 0xFFul) << 0)) - -#define GETBE32(b)\ - ((((b)[0] & 0xFFul) << 24) | \ - (((b)[1] & 0xFFul) << 16) | \ - (((b)[2] & 0xFFul) << 8) | \ - (((b)[3] & 0xFFul) << 0)) -#define GETBE64(b)\ - ((((b)[0] & 0xFFull) << 56) | \ - (((b)[1] & 0xFFull) << 48) | \ - (((b)[2] & 0xFFull) << 40) | \ - (((b)[3] & 0xFFull) << 32) | \ - (((b)[4] & 0xFFull) << 24) | \ - (((b)[5] & 0xFFull) << 16) | \ - (((b)[6] & 0xFFull) << 8) | \ - (((b)[7] & 0xFFull) << 0)) - -#define PUTBE16(b, n)\ - do{ \ - (b)[0] = (n) >> 8; \ - (b)[1] = (n) >> 0; \ - } while(0) - #define PUTBE32(b, n)\ do{ \ (b)[0] = (n) >> 24; \ @@ -207,1039 +103,629 @@ int ncache; (b)[7] = (n) >> 0; \ } while(0) -static int -charval(int c, int *err) +static const struct got_error * +get_obj_type_label(const char **label, int obj_type) { - if(c >= '0' && c <= '9') - return c - '0'; - if(c >= 'a' && c <= 'f') - return c - 'a' + 10; - if(c >= 'A' && c <= 'F') - return c - 'A' + 10; - *err = 1; - return -1; -} + const struct got_error *err = NULL; -static int -hparse(struct got_object_id *h, char *b) -{ - int i, err; - - err = 0; - for(i = 0; i < sizeof(h->sha1); i++){ - err = 0; - h->sha1[i] = 0; - h->sha1[i] |= ((charval(b[2*i], &err) & 0xf) << 4); - h->sha1[i] |= ((charval(b[2*i+1], &err)& 0xf) << 0); - if(err) - return -1; + switch (obj_type) { + case GOT_OBJ_TYPE_BLOB: + *label = GOT_OBJ_LABEL_BLOB; + break; + case GOT_OBJ_TYPE_TREE: + *label = GOT_OBJ_LABEL_TREE; + break; + case GOT_OBJ_TYPE_COMMIT: + *label = GOT_OBJ_LABEL_COMMIT; + break; + case GOT_OBJ_TYPE_TAG: + *label = GOT_OBJ_LABEL_TAG; + break; + default: + *label = NULL; + err = got_error(GOT_ERR_OBJ_TYPE); + break; } - return 0; -} -static void * -emalloc(size_t n) -{ - void *v; - - v = calloc(n, 1); - if(v == NULL) - err(1, "malloc:"); - return v; + return err; } -static void * -erealloc(void *p, ulong n) + +static const struct got_error * +read_packed_object(struct got_pack *pack, struct got_indexed_object *obj) { - void *v; + const struct got_error *err = NULL; + SHA1_CTX ctx; + uint8_t *data; + size_t datalen; + ssize_t n; + char *header; + size_t headerlen; + const char *obj_label; - v = realloc(p, n); - if(v == NULL) - err(1, "realloc:"); - memset(v, 0, n); - return v; -} + err = got_pack_parse_object_type_and_size(&obj->type, &obj->size, &obj->tslen, + pack, obj->off); + if (err) + return err; -static int -hasheq(struct got_object_id *a, struct got_object_id *b) + switch (obj->type) { + case GOT_OBJ_TYPE_BLOB: + case GOT_OBJ_TYPE_COMMIT: + case GOT_OBJ_TYPE_TREE: + case GOT_OBJ_TYPE_TAG: + /* XXX TODO reading large objects into memory is bad! */ + err = got_inflate_to_mem_fd(&data, &datalen, &obj->len, pack->fd); + if (err) + break; + SHA1Init(&ctx); + err = get_obj_type_label(&obj_label, obj->type); + if (err) + break; + if (asprintf(&header, "%s %lld", obj_label, obj->size) == -1) { + err = got_error_from_errno("asprintf"); + free(data); + break; + } + headerlen = strlen(header) + 1; + SHA1Update(&ctx, header, headerlen); + SHA1Update(&ctx, data, datalen); + SHA1Final(obj->id.sha1, &ctx); + free(header); + free(data); + break; + case GOT_OBJ_TYPE_REF_DELTA: + memset(obj->id.sha1, 0xff, SHA1_DIGEST_LENGTH); + n = read(pack->fd, &obj->ref_id.sha1, SHA1_DIGEST_LENGTH); + if (n == -1) { + err = got_error_from_errno("read"); + break; + } + if (n < sizeof(obj->id)) { + err = got_error(GOT_ERR_BAD_PACKFILE); + break; + } + err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len, pack->fd); + if (err) + break; + obj->len += SHA1_DIGEST_LENGTH; + break; + case GOT_OBJ_TYPE_OFFSET_DELTA: + memset(obj->id.sha1, 0xff, SHA1_DIGEST_LENGTH); + err = got_pack_parse_offset_delta(&obj->base_offset, + &obj->base_offsetlen, pack, obj->off, obj->tslen); + if (err) + break; + err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len, pack->fd); + if (err) + break; + obj->len += obj->base_offsetlen; + break; + default: + err = got_error(GOT_ERR_OBJ_TYPE); + break; + } + + return err; +} + +static const struct got_error * +hwrite(int fd, void *buf, int len, SHA1_CTX *ctx) { - return memcmp(a->sha1, b->sha1, sizeof(a->sha1)) == 0; + ssize_t w; + + SHA1Update(ctx, buf, len); + + w = write(fd, buf, len); + if (w == -1) + return got_error_from_errno("write"); + if (w != len) + return got_error(GOT_ERR_IO); + + return NULL; } -static char * -typestr(int t) +static const struct got_error * +object_crc(int packfd, struct got_indexed_object *obj) { - char *types[] = { - "???", - "commit", - "tree", - "blob", - "tag", - "odelta", - "rdelta", - }; - if (t < 0 || t >= sizeof(types)/sizeof(types[0])) - abort(); - return types[t]; + char buf[8096]; + size_t n; + ssize_t r; + + obj->crc = 0; + if (lseek(packfd, obj->off + obj->tslen, SEEK_SET) == -1) + return got_error_from_errno("lseek"); + + obj->crc = crc32(0L, NULL, 0); + for (n = obj->len; n > 0; n -= r){ + r = read(packfd, buf, n > sizeof(buf) ? sizeof(buf) : n); + if (r == -1) + return got_error_from_errno("read"); + if (r == 0) + return NULL; + obj->crc = crc32(obj->crc, buf, r); + } + return 0; } -static char * -hashfmt(char *out, size_t nout, struct got_object_id *h) +#if 0 +static int +indexed_obj_cmp(const void *pa, const void *pb) { - int i, n, c0, c1; - char *p; + struct got_indexed_object *a, *b; - if (nout < 2*sizeof(h->sha1) + 1) - return NULL; - p = out; - for(i = 0; i < sizeof(h->sha1); i++){ - n = (h->sha1[i] >> 4) & 0xf; - c0 = (n >= 10) ? n-10 + 'a' : n + '0'; - n = h->sha1[i] & 0xf; - c1 = (n >= 10) ? n-10 + 'a' : n + '0'; - *p++ = c0; - *p++ = c1; + a = *(struct got_indexed_object **)pa; + b = *(struct got_indexed_object **)pb; + return got_object_id_cmp(&a->id, &b->id); +} +#endif + +static const struct got_error * +resolve_deltified_object(struct got_pack *pack, struct got_packidx *packidx, + struct got_indexed_object *obj) +{ + const struct got_error *err = NULL; + struct got_delta_chain deltas; + struct got_delta *delta; + uint8_t *buf = NULL; + size_t len; + SHA1_CTX ctx; + char *header; + size_t headerlen; + int base_obj_type; + const char *obj_label; + + deltas.nentries = 0; + SIMPLEQ_INIT(&deltas.entries); + + err = got_pack_resolve_delta_chain(&deltas, packidx, pack, + obj->off, obj->tslen, obj->type, obj->size, + GOT_DELTA_CHAIN_RECURSION_MAX); + if (err) + goto done; + + /* XXX TODO reading large objects into memory is bad! */ + err = got_pack_dump_delta_chain_to_mem(&buf, &len, &deltas, pack); + if (err) + goto done; + + SHA1Init(&ctx); + + err = got_delta_chain_get_base_type(&base_obj_type, &deltas); + if (err) + goto done; + err = get_obj_type_label(&obj_label, base_obj_type); + if (err) + goto done; + if (asprintf(&header, "%s %zd", obj_label, len) == -1) { + err = got_error_from_errno("asprintf"); + goto done; } - *p++ = 0; - return out; + headerlen = strlen(header) + 1; + SHA1Update(&ctx, header, headerlen); + SHA1Update(&ctx, buf, len); + SHA1Final(obj->id.sha1, &ctx); +done: + free(buf); + while (!SIMPLEQ_EMPTY(&deltas.entries)) { + delta = SIMPLEQ_FIRST(&deltas.entries); + SIMPLEQ_REMOVE_HEAD(&deltas.entries, entry); + free(delta); + } + return err; } -static void -clear(Object *o) +/* Determine the slot in the pack index a given object ID should use. */ +static int +find_object_idx(struct got_packidx *packidx, uint8_t *sha1) { - if(!o) - return; + u_int8_t id0 = sha1[0]; + uint32_t nindexed = betoh32(packidx->hdr.fanout_table[0xff]); + int left = 0, right = nindexed - 1; + int cmp = 0, i = 0; - assert(o->refs == 0); - assert((o->flag & Ccache) == 0); - assert(o->flag & Cloaded); - switch(o->type){ - case GCommit: - if(!o->commit) - break; - free(o->commit->parent); - free(o->commit->author); - free(o->commit->committer); - free(o->commit); - o->commit = NULL; - break; - case GTree: - if(!o->tree) - break; - free(o->tree->ent); - free(o->tree); - o->tree = NULL; - break; - default: - break; - } + if (id0 > 0) + left = betoh32(packidx->hdr.fanout_table[id0 - 1]); - free(o->all); - o->all = NULL; - o->data = NULL; - o->flag &= ~Cloaded; -} + while (left <= right) { + struct got_packidx_object_id *oid; -static void -unref(Object *o) -{ - if(!o) - return; - o->refs--; - if(!o->refs) - clear(o); -} + i = ((left + right) / 2); + oid = &packidx->hdr.sorted_ids[i]; -static Object* -ref(Object *o) -{ - o->refs++; - return o; -} - -static void -cache(Object *o) -{ - char buf[41]; - Object *p; - - hashfmt(buf, sizeof(buf), &o->hash); - if(o == lruhead) - return; - if(o == lrutail) - lrutail = lrutail->prev; - if(!(o->flag & Cexist)){ - got_object_idset_add(objcache, &o->hash, o); - o->id = next_object_id++; - o->flag |= Cexist; + cmp = memcmp(sha1, oid->sha1, SHA1_DIGEST_LENGTH); + if (cmp == 0) + return -1; /* object already indexed */ + else if (cmp > 0) + left = i + 1; + else if (cmp < 0) + right = i - 1; } - if(o->prev) - o->prev->next = o->next; - if(o->next) - o->next->prev = o->prev; - if(lrutail == o){ - lrutail = o->prev; - lrutail->next = NULL; - }else if(!lrutail) - lrutail = o; - if(lruhead) - lruhead->prev = o; - o->next = lruhead; - o->prev = NULL; - lruhead = o; - if(!(o->flag & Ccache)){ - o->flag |= Ccache; - ref(o); - ncache++; - } - while(ncache > Cachemax){ - p = lrutail; - lrutail = p->prev; - lrutail->next = NULL; - p->flag &= ~Ccache; - p->prev = NULL; - p->next = NULL; - unref(p); - ncache--; - } + return left; } -static int -preadbe32(FILE *b, int *v, off_t off) +#if 0 +static void +print_packidx(struct got_packidx *packidx) { - char buf[4]; + uint32_t nindexed = betoh32(packidx->hdr.fanout_table[0xff]); + int i; - if(fseek(b, off, SEEK_SET) == -1) - return -1; - if(fread(buf, 1, sizeof(buf), b) == -1) - return -1; - *v = GETBE32(buf); - - return 0; -} -static int -preadbe64(FILE *b, off_t *v, off_t off) -{ - char buf[8]; - - if(fseek(b, off, SEEK_SET) == -1) - return -1; - if(fread(buf, 1, sizeof(buf), b) == -1) - return -1; - *v = GETBE64(buf); - return 0; -} - -static int -readvint(char *p, char **pp) -{ - int i, n, c; - - i = 0; - n = 0; - do { - c = *p++; - n |= (c & 0x7f) << i; - i += 7; - } while (c & 0x80); - *pp = p; - - return n; -} - -static int -applydelta(Object *dst, Object *base, char *d, int nd) -{ - char *r, *b, *ed, *er; - int n, nr, c; - off_t o, l; - - ed = d + nd; - b = base->data; - n = readvint(d, &d); - if(n != base->size){ - fprintf(stderr, "mismatched source size\n"); - return -1; + fprintf(stderr, "object IDs:\n"); + for (i = 0; i < nindexed; i++) { + char hex[SHA1_DIGEST_STRING_LENGTH]; + got_sha1_digest_to_str(packidx->hdr.sorted_ids[i].sha1, + hex, sizeof(hex)); + fprintf(stderr, "%s\n", hex); } + fprintf(stderr, "\n"); - nr = readvint(d, &d); - r = emalloc(nr + 64); - n = snprintf(r, 64, "%s %d", typestr(base->type), nr) + 1; - dst->all = r; - dst->type = base->type; - dst->data = r + n; - dst->size = nr; - er = dst->data + nr; - r = dst->data; - - while(1){ - if(d == ed) - break; - c = *d++; - if(!c){ - fprintf(stderr, "bad delta encoding\n"); - return -1; - } - /* copy from base */ - if(c & 0x80){ - o = 0; - l = 0; - /* Offset in base */ - if(c & 0x01 && d != ed) o |= (*d++ << 0) & 0x000000ff; - if(c & 0x02 && d != ed) o |= (*d++ << 8) & 0x0000ff00; - if(c & 0x04 && d != ed) o |= (*d++ << 16) & 0x00ff0000; - if(c & 0x08 && d != ed) o |= (*d++ << 24) & 0xff000000; - - /* Length to copy */ - if(c & 0x10 && d != ed) l |= (*d++ << 0) & 0x0000ff; - if(c & 0x20 && d != ed) l |= (*d++ << 8) & 0x00ff00; - if(c & 0x40 && d != ed) l |= (*d++ << 16) & 0xff0000; - if(l == 0) l = 0x10000; - - assert(o + l <= base->size); - memmove(r, b + o, l); - r += l; - /* inline data */ - }else{ - memmove(r, d, c); - d += c; - r += c; - } - + fprintf(stderr, "object offsets:\n"); + for (i = 0; i < nindexed; i++) { + uint32_t offset = be32toh(packidx->hdr.offsets[i]); + if (offset & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) { + int j = offset & GOT_PACKIDX_OFFSET_VAL_MASK; + fprintf(stderr, "%u -> %llu\n", offset, + be64toh(packidx->hdr.large_offsets[j])); + } else + fprintf(stderr, "%u\n", offset); } - if(r != er){ - fprintf(stderr, "truncated delta (%zd)\n", er - r); - return -1; - } + fprintf(stderr, "\n"); - return nr; + fprintf(stderr, "fanout table:"); + for (i = 0; i <= 0xff; i++) + fprintf(stderr, " %u", be32toh(packidx->hdr.fanout_table[i])); + fprintf(stderr, "\n"); } +#endif -static int -readrdelta(FILE *f, Object *o, int nd, int flag) +static void +update_packidx(int *nlarge, struct got_packidx *packidx, int nobj, + struct got_indexed_object *obj) { - const struct got_error *e; - struct got_object_id h; - Object *b; - uint8_t *d; - size_t n; + int i, n, idx; + uint32_t nindexed = betoh32(packidx->hdr.fanout_table[0xff]); - d = NULL; - if(fread(h.sha1, 1, sizeof(h.sha1), f) != sizeof(h.sha1)) - goto error; - if(hasheq(&o->hash, &h)) - goto error; - if ((e = got_inflate_to_mem(&d, &n, NULL, f)) != NULL) - goto error; - o->len = ftello(f) - o->off; - if(d == NULL || n != nd) - goto error; - if((b = readidxobject(f, h, flag)) == NULL) - goto error; - if(applydelta(o, b, d, n) == -1) - goto error; - free(d); - return 0; -error: - free(d); - return -1; -} - -static int -readodelta(FILE *f, Object *o, off_t nd, off_t p, int flag) -{ - Object b; - uint8_t *d; - off_t r; - size_t n; - int c; - - r = 0; - d = NULL; - while(1){ - if((c = fgetc(f)) == -1) - goto error; - r |= c & 0x7f; - if (!(c & 0x80)) - break; - r++; - r <<= 7; - }while(c & 0x80); - - if(r > p){ - fprintf(stderr, "junk offset -%lld (from %lld)\n", r, p); - goto error; + idx = find_object_idx(packidx, obj->id.sha1); + if (idx == -1) { + char hex[SHA1_DIGEST_STRING_LENGTH]; + got_sha1_digest_to_str(obj->id.sha1, hex, sizeof(hex)); + return; /* object already indexed */ } - if (got_inflate_to_mem(&d, &n, NULL, f) != NULL) - goto error; - o->len = ftello(f) - o->off; - if(d == NULL || n != nd) - goto error; - if(fseek(f, p - r, SEEK_SET) == -1) - goto error; - if(readpacked(f, &b, flag) == -1) - goto error; - if(applydelta(o, &b, d, nd) == -1) - goto error; - free(d); - return 0; -error: - free(d); - return -1; -} - -static int -readpacked(FILE *f, Object *o, int flag) -{ - const struct got_error *e; - int c, s, n; - off_t l, p; - size_t ndata; - uint8_t *data; - Type t; - Buf b; - - p = ftello(f); - c = fgetc(f); - if(c == -1) - return -1; - l = c & 0xf; - s = 4; - t = (c >> 4) & 0x7; - if(!t){ - fprintf(stderr, "unknown type for byte %x\n", c); - return -1; - } - while(c & 0x80){ - if((c = fgetc(f)) == -1) - return -1; - l |= (c & 0x7f) << s; - s += 7; - } - - switch(t){ - default: - fprintf(stderr, "invalid object at %lld\n", ftello(f)); - return -1; - case GCommit: - case GTree: - case GTag: - case GBlob: - b.sz = 64 + l; - - b.data = emalloc(b.sz); - n = snprintf(b.data, 64, "%s %lld", typestr(t), l) + 1; - b.len = n; - e = got_inflate_to_mem(&data, &ndata, NULL, f); - if (e != NULL || n + ndata >= b.sz) { - free(b.data); - return -1; - } - memcpy(b.data + n, data, ndata); - o->len = ftello(f) - o->off; - o->type = t; - o->all = b.data; - o->data = b.data + n; - o->size = ndata; - free(data); - break; - case GOdelta: - if(readodelta(f, o, l, p, flag) == -1) - return -1; - break; - case GRdelta: - if(readrdelta(f, o, l, flag) == -1) - return -1; - break; - } - o->flag |= Cloaded|flag; - return 0; -} - -static int -readloose(FILE *f, Object *o, int flag) -{ - struct { char *tag; int type; } *p, types[] = { - {"blob", GBlob}, - {"tree", GTree}, - {"commit", GCommit}, - {"tag", GTag}, - {NULL}, - }; - char *s, *e; - uint8_t *d; - off_t sz; - size_t n; - int l; - - if (got_inflate_to_mem(&d, &n, NULL, f) != NULL) - return -1; - - s = (char *)d; - o->type = GNone; - for(p = types; p->tag; p++){ - l = strlen(p->tag); - if(strncmp(s, p->tag, l) == 0){ - s += l; - o->type = p->type; - while(!isspace(*s)) - s++; - break; - } - } - if(o->type == GNone){ - free(o->data); - return -1; - } - sz = strtol(s, &e, 0); - if(e == s || *e++ != 0){ - fprintf(stderr, "malformed object header\n"); - goto error; - } - if(sz != n - (e - (char *)d)){ - fprintf(stderr, "mismatched sizes\n"); - goto error; - } - o->size = sz; - o->data = e; - o->all = d; - o->flag |= Cloaded|flag; - return 0; - -error: - free(d); - return -1; -} - -static off_t -searchindex(FILE *f, struct got_object_id h) -{ - int lo, hi, idx, i, nent; - off_t o, oo; - struct got_object_id hh; + memmove(&packidx->hdr.sorted_ids[idx + 1], + &packidx->hdr.sorted_ids[idx], + sizeof(struct got_packidx_object_id) * (nindexed - idx)); + memmove(&packidx->hdr.offsets[idx + 1], &packidx->hdr.offsets[idx], + sizeof(uint32_t) * (nindexed - idx)); - o = 8; - /* - * Read the fanout table. The fanout table - * contains 256 entries, corresponsding to - * the first byte of the hash. Each entry - * is a 4 byte big endian integer, containing - * the total number of entries with a leading - * byte <= the table index, allowing us to - * rapidly do a binary search on them. - */ - if (h.sha1[0] == 0){ - lo = 0; - if(preadbe32(f, &hi, o) == -1) - goto err; - } else { - o += h.sha1[0]*4 - 4; - if(preadbe32(f, &lo, o + 0) == -1) - goto err; - if(preadbe32(f, &hi, o + 4) == -1) - goto err; + memcpy(packidx->hdr.sorted_ids[idx].sha1, obj->id.sha1, + SHA1_DIGEST_LENGTH); + if (obj->off < GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) + packidx->hdr.offsets[idx] = htobe32(obj->off); + else { + packidx->hdr.offsets[idx] = htobe32(*nlarge | + GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX); + packidx->hdr.large_offsets[*nlarge] = htobe64(obj->off); + (*nlarge)++; } - if(hi == lo) - goto notfound; - if(preadbe32(f, &nent, 8 + 255*4) == -1) - goto err; - /* - * Now that we know the range of hashes that the - * entry may exist in, read them in so we can do - * a bsearch. - */ - idx = -1; - fseek(f, Hashsz*lo + 8 + 256*4, SEEK_SET); - for(i = 0; i < hi - lo; i++){ - if(fread(hh.sha1, 1, sizeof(hh.sha1), f) == -1) - goto err; - if(hasheq(&hh, &h)) - idx = lo + i; + for (i = obj->id.sha1[0]; i <= 0xff; i++) { + n = be32toh(packidx->hdr.fanout_table[i]); + packidx->hdr.fanout_table[i] = htobe32(n + 1); } - if(idx == -1) - goto notfound; +} +static const struct got_error * +index_pack(struct got_pack *pack, int idxfd, uint8_t *pack_hash, + struct imsgbuf *ibuf) +{ + const struct got_error *err; + struct got_packfile_hdr hdr; + struct got_packidx packidx; + char buf[8]; + int nobj, nvalid, nloose, nlarge = 0, nresolved = 0, i; + struct got_indexed_object **objects = NULL, *obj; + SHA1_CTX ctx; + uint8_t packidx_hash[SHA1_DIGEST_LENGTH]; + ssize_t r, w; + int pass; + /* Check pack file header. */ + r = read(pack->fd, &hdr, sizeof(hdr)); + if (r == -1) + return got_error_from_errno("read"); + if (r < sizeof(hdr)) + return got_error_msg(GOT_ERR_BAD_PACKFILE, + "short packfile header"); + + if (hdr.signature != htobe32(GOT_PACKFILE_SIGNATURE)) + return got_error_msg(GOT_ERR_BAD_PACKFILE, + "bad packfile signature"); + if (hdr.version != htobe32(GOT_PACKFILE_VERSION)) + return got_error_msg(GOT_ERR_BAD_PACKFILE, + "bad packfile version"); + nobj = betoh32(hdr.nobjects); + if (nobj == 0) + return got_error_msg(GOT_ERR_BAD_PACKFILE, + "bad packfile with zero objects"); + /* - * We found the entry. If it's 32 bits, then we - * can just return the oset, otherwise the 32 - * bit entry contains the oset to the 64 bit - * entry. + * Create an in-memory pack index which will grow as objects + * IDs in the pack file are discovered. Only fields used to + * read deltified objects will be needed by the pack.c library + * code, so setting up just a pack index header is sufficient. */ - oo = 8; /* Header */ - oo += 256*4; /* Fanout table */ - oo += Hashsz*nent; /* Hashes */ - oo += 4*nent; /* Checksums */ - oo += 4*idx; /* Offset offset */ - if(preadbe32(f, &i, oo) == -1) - goto err; - o = i & 0xffffffff; - if(o & (1ull << 31)){ - o &= 0x7fffffff; - if(preadbe64(f, &o, o) == -1) - goto err; + memset(&packidx, 0, sizeof(packidx)); + packidx.hdr.magic = malloc(sizeof(uint32_t)); + if (packidx.hdr.magic == NULL) + return got_error_from_errno("calloc"); + *packidx.hdr.magic = htobe32(GOT_PACKIDX_V2_MAGIC); + packidx.hdr.version = malloc(sizeof(uint32_t)); + if (packidx.hdr.version == NULL) { + err = got_error_from_errno("malloc"); + goto done; } - return o; - -err: - fprintf(stderr, "unable to read packfile\n"); - return -1; -notfound: - { - char hstr[41]; - hashfmt(hstr, sizeof(hstr), &h); - fprintf(stdout, "could not find object %s\n", hstr); + *packidx.hdr.version = htobe32(GOT_PACKIDX_VERSION); + packidx.hdr.fanout_table = calloc(GOT_PACKIDX_V2_FANOUT_TABLE_ITEMS, + sizeof(uint32_t)); + if (packidx.hdr.fanout_table == NULL) { + err = got_error_from_errno("calloc"); + goto done; } - return -1; -} - -/* - * Scans for non-empty word, copying it into buf. - * Strips off word, leading, and trailing space - * from input. - * - * Returns -1 on empty string or error, leaving - * input unmodified. - */ -static int -scanword(char **str, int *nstr, char *buf, int nbuf) -{ - char *p; - int n, r; - - r = -1; - p = *str; - n = *nstr; - while(n && isblank(*p)){ - n--; - p++; + packidx.hdr.sorted_ids = calloc(nobj, + sizeof(struct got_packidx_object_id)); + if (packidx.hdr.sorted_ids == NULL) { + err = got_error_from_errno("calloc"); + goto done; } - - for(; n && *p && !isspace(*p); p++, n--){ - r = 0; - *buf++ = *p; - nbuf--; - if(nbuf == 0) - return -1; + packidx.hdr.offsets = calloc(nobj, sizeof(uint32_t)); + if (packidx.hdr.offsets == NULL) { + err = got_error_from_errno("calloc"); + goto done; } - while(n && isblank(*p)){ - n--; - p++; + /* Large offsets table is empty for pack files < 2 GB. */ + if (pack->filesize >= GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) { + packidx.hdr.large_offsets = calloc(nobj, sizeof(uint64_t)); + if (packidx.hdr.large_offsets == NULL) { + err = got_error_from_errno("calloc"); + goto done; + } } - *buf = 0; - *str = p; - *nstr = n; - return r; -} -static void -nextline(char **str, int *nstr) -{ - char *s; + nvalid = 0; + nloose = 0; + objects = calloc(nobj, sizeof(struct got_indexed_object *)); + if (objects == NULL) + return got_error_from_errno("calloc"); - if((s = strchr(*str, '\n')) != NULL){ - *nstr -= s - *str + 1; - *str = s + 1; - } -} + /* + * First pass: locate all objects and identify un-deltified objects. + * + * When this pass has completed we will know offset, type, size, and + * CRC information for all objects in this pack file. We won't know + * any of the actual object IDs of deltified objects yet since we + * will not yet attempt to combine deltas. + */ + pass = 1; + for (i = 0; i < nobj; i++) { + err = got_privsep_send_index_pack_progress(ibuf, nobj, i + 1, + nloose, 0); + if (err) + goto done; -static int -parseauthor(char **str, int *nstr, char **name, off_t *time) -{ - return 0; -} + obj = calloc(1, sizeof(*obj)); + if (obj == NULL) { + err = got_error_from_errno("calloc"); + goto done; + } -static void -parsecommit(Object *o) -{ - char *p, *t, buf[128]; - int np; + /* Store offset to type+size information for this object. */ + obj->off = lseek(pack->fd, 0, SEEK_CUR); + if (obj->off == -1) { + err = got_error_from_errno("lseek"); + goto done; + } - p = o->data; - np = o->size; - o->commit = emalloc(sizeof(Cinfo)); - while(1){ - if(scanword(&p, &np, buf, sizeof(buf)) == -1) - break; - if(strcmp(buf, "tree") == 0){ - if(scanword(&p, &np, buf, sizeof(buf)) == -1) - errx(1, "invalid commit: tree missing"); - if(hparse(&o->commit->tree, buf) == -1) - errx(1, "invalid commit: garbled tree"); - }else if(strcmp(buf, "parent") == 0){ - if(scanword(&p, &np, buf, sizeof(buf)) == -1) - errx(1, "invalid commit: missing parent"); - o->commit->parent = realloc(o->commit->parent, ++o->commit->nparent * sizeof(struct got_object_id)); - if(!o->commit->parent) - err(1, "unable to malloc: "); - if(hparse(&o->commit->parent[o->commit->nparent - 1], buf) == -1) - errx(1, "invalid commit: garbled parent"); - }else if(strcmp(buf, "author") == 0){ - parseauthor(&p, &np, &o->commit->author, &o->commit->mtime); - }else if(strcmp(buf, "committer") == 0){ - parseauthor(&p, &np, &o->commit->committer, &o->commit->ctime); - }else if(strcmp(buf, "gpgsig") == 0){ - /* just drop it */ - if((t = strstr(p, "-----END PGP SIGNATURE-----")) == NULL) - errx(1, "malformed gpg signature"); - np -= t - p; - p = t; + err = read_packed_object(pack, obj); + if (err) + goto done; + + objects[i] = obj; + + if (0) { + err = object_crc(pack->fd, obj); + if (err) + goto done; } - nextline(&p, &np); - } - while (np && isspace(*p)) { - p++; - np--; - } - o->commit->msg = p; - o->commit->nmsg = np; -} -static void -parsetree(Object *o) -{ - char *p, buf[256]; - int np, nn, m; - Dirent *t; + if (obj->type == GOT_OBJ_TYPE_BLOB || + obj->type == GOT_OBJ_TYPE_TREE || + obj->type == GOT_OBJ_TYPE_COMMIT || + obj->type == GOT_OBJ_TYPE_TAG) { + objects[i]->valid = 1; + nloose++; + update_packidx(&nlarge, &packidx, nobj, obj); + } - p = o->data; - np = o->size; - o->tree = emalloc(sizeof(Tinfo)); - while(np > 0){ - if(scanword(&p, &np, buf, sizeof(buf)) == -1) - break; - o->tree->ent = erealloc(o->tree->ent, ++o->tree->nent * sizeof(Dirent)); - t = &o->tree->ent[o->tree->nent - 1]; - memset(t, 0, sizeof(Dirent)); - m = strtol(buf, NULL, 8); - /* FIXME: symlinks and other BS */ - if(m == 0160000){ - t->mode |= S_IFDIR; - t->modref = 1; + if (lseek(pack->fd, obj->off + obj->tslen + obj->len, + SEEK_SET) == -1) { + err = got_error_from_errno("lseek"); + goto done; } - t->mode = m & 0777; - if(m & 0040000) - t->mode |= S_IFDIR; - t->name = p; - nn = strlen(p) + 1; - p += nn; - np -= nn; - if(np < sizeof(t->h.sha1)) - errx(1, "malformed tree, remaining %d (%s)", np, p); - memcpy(t->h.sha1, p, sizeof(t->h.sha1)); - p += sizeof(t->h.sha1); - np -= sizeof(t->h.sha1); } -} + nvalid = nloose; -void -parseobject(Object *o) -{ - if(o->flag & Cparsed) - return; - switch(o->type){ - case GTree: parsetree(o); break; - case GCommit: parsecommit(o); break; - //case GTag: parsetag(o); break; - default: break; - } - o->flag |= Cparsed; -} + /* + * Second pass: We can now resolve deltas to compute the IDs of + * objects which appear in deltified form. Because deltas can be + * chained this pass may require a couple of iterations until all + * IDs of deltified objects have been discovered. + */ + pass++; + while (nvalid != nobj) { + int n = 0; + for (i = 0; i < nobj; i++) { + if (objects[i]->type != GOT_OBJ_TYPE_REF_DELTA && + objects[i]->type != GOT_OBJ_TYPE_OFFSET_DELTA) + continue; -static Object* -readidxobject(FILE *idx, struct got_object_id h, int flag) -{ - char path[Pathmax]; - char hbuf[41]; - FILE *f; - Object *obj; - int l, n; - off_t o; - struct dirent *ent; - DIR *d; + if (objects[i]->valid) + continue; + obj = objects[i]; + if (lseek(pack->fd, obj->off + obj->tslen, SEEK_SET) == -1) { + err = got_error_from_errno("lseek"); + goto done; + } - if ((obj = got_object_idset_lookup_data(objcache, &h))) { - if(obj->flag & Cloaded) - return obj; - if(obj->flag & Cidx){ - assert(idx != NULL); - o = ftello(idx); - if(fseek(idx, obj->off, SEEK_SET) == -1) - errx(1, "could not seek to object offset"); - if(readpacked(idx, obj, flag) == -1) - errx(1, "could not reload object"); - if(fseek(idx, o, SEEK_SET) == -1) - errx(1, "could not restore offset"); - cache(obj); - return obj; - } - } + err = resolve_deltified_object(pack, &packidx, obj); + if (err) { + if (err->code != GOT_ERR_NO_OBJ) + goto done; + /* + * We cannot resolve this object yet because + * a delta base is unknown. Try again later. + */ + continue; + } - d = NULL; - /* We're not putting it in the cache yet... */ - obj = emalloc(sizeof(Object)); - obj->id = next_object_id + 1; - obj->hash = h; + objects[i]->valid = 1; + n++; + update_packidx(&nlarge, &packidx, nobj, obj); + err = got_privsep_send_index_pack_progress(ibuf, nobj, nobj, + nloose, nresolved + n); + if (err) + goto done; - hashfmt(hbuf, sizeof(hbuf), &h); - snprintf(path, sizeof(path), ".git/objects/%c%c/%s", hbuf[0], hbuf[1], hbuf + 2); - if((f = fopen(path, "r")) != NULL){ - if(readloose(f, obj, flag) == -1) - goto error; - fclose(f); - parseobject(obj); - hashfmt(hbuf, sizeof(hbuf), &obj->hash); - fprintf(stderr, "object %s cached\n", hbuf); - cache(obj); - return obj; + } + if (pass++ > 3 && n == 0) { + static char msg[64]; + snprintf(msg, sizeof(msg), "could not resolve " + "any of deltas; packfile could be corrupt"); + err = got_error_msg(GOT_ERR_BAD_PACKFILE, msg); + goto done; + + } + if (nloose + nresolved == nobj) { + static char msg[64]; + snprintf(msg, sizeof(msg), + "fix point reached too early: %d/%d/%d", nvalid, nresolved, nobj); + err = got_error_msg(GOT_ERR_BAD_PACKFILE, msg); + goto done; + } + nresolved += n; + nvalid += nresolved; } - o = -1; - if ((d = opendir(".git/objects/pack")) == NULL) - err(1, "open pack dir"); - while ((ent = readdir(d)) != NULL) { - l = strlen(ent->d_name); - if(l > 4 && strcmp(ent->d_name + l - 4, ".idx") != 0) - continue; - snprintf(path, sizeof(path), ".git/objects/pack/%s", ent->d_name); - if((f = fopen(path, "r")) == NULL) - continue; - o = searchindex(f, h); - fclose(f); - if(o == -1) - continue; - break; + if (nloose + nresolved != nobj) { + static char msg[64]; + snprintf(msg, sizeof(msg), + "discovered only %d of %d objects", nloose + nresolved, nobj); + err = got_error_msg(GOT_ERR_BAD_PACKFILE, msg); + goto done; } - closedir(d); - if (o == -1) - goto error; + /* We may have seen duplicates. Update our total object count. */ + nobj = betoh32(packidx.hdr.fanout_table[0xff]); - if((n = snprintf(path, sizeof(path), "%s", path)) >= sizeof(path) - 4) - goto error; - memcpy(path + n - 4, ".pack", 6); - if((f = fopen(path, "r")) == NULL) - goto error; - if(fseek(f, o, SEEK_SET) == -1) - goto error; - if(readpacked(f, obj, flag) == -1) - goto error; - fclose(f); - parseobject(obj); - cache(obj); - return obj; -error: - free(obj); - return NULL; -} - -Object* -readobject(struct got_object_id h) -{ - Object *o; - - o = readidxobject(NULL, h, 0); - if(o) - ref(o); - return o; -} - -int -objcmp(const void *pa, const void *pb) -{ - Object *a, *b; - - a = *(Object**)pa; - b = *(Object**)pb; - return memcmp(a->hash.sha1, b->hash.sha1, sizeof(a->hash.sha1)); -} - -static int -hwrite(FILE *b, void *buf, int len, SHA1_CTX *ctx) -{ - SHA1Update(ctx, buf, len); - return fwrite(buf, 1, len, b); -} - -static uint32_t -objectcrc(FILE *f, Object *o) -{ - char buf[8096]; - int n, r; - - o->crc = 0; - fseek(f, o->off, SEEK_SET); - for(n = o->len; n > 0; n -= r){ - r = fread(buf, 1, n > sizeof(buf) ? sizeof(buf) : n, f); - if(r == -1) - return -1; - if(r == 0) - return 0; - o->crc = crc32(o->crc, buf, r); - } - return 0; -} - -int -indexpack(int packfd, int idxfd, struct got_object_id *packhash, - struct imsgbuf *ibuf) -{ - char hdr[4*3], buf[8]; - int nobj, nvalid, nbig, n, i, step; - Object *o, **objects; - char *valid; - SHA1_CTX ctx, objctx; - FILE *f; - struct got_object_id h; - int c; - - if ((f = fdopen(packfd, "r")) == NULL) - return -1; - if (fseek(f, 0, SEEK_SET) == -1) - return -1; - if (fread(hdr, 1, sizeof(hdr), f) != sizeof(hdr)) { - fprintf(stderr, "short read on header\n"); - return -1; - } - if (memcmp(hdr, "PACK\0\0\0\2", 8) != 0) { - fprintf(stderr, "invalid header\n"); - return -1; - } - - nvalid = 0; - nobj = GETBE32(hdr + 8); - objects = calloc(nobj, sizeof(Object*)); - valid = calloc(nobj, sizeof(char)); - step = nobj/100; - if(!step) - step++; - while (nvalid != nobj) { - got_privsep_send_index_pack_progress(ibuf, nobj, nvalid); - n = 0; - for (i = 0; i < nobj; i++) { - if (valid[i]) { - n++; - continue; - } - if (!objects[i]) { - o = emalloc(sizeof(Object)); - o->off = ftello(f); - objects[i] = o; - } - o = objects[i]; - fseek(f, o->off, SEEK_SET); - if (readpacked(f, o, Cidx) == 0){ - SHA1Init(&objctx); - SHA1Update(&objctx, (uint8_t*)o->all, o->size + strlen(o->all) + 1); - SHA1Final(o->hash.sha1, &objctx); - cache(o); - valid[i] = 1; - n++; - } - if(objectcrc(f, o) == -1) - return -1; - } - if (n == nvalid) { - errx(1, "fix point reached too early: %d/%d", nvalid, nobj); - goto error; - } - nvalid = n; - } - fclose(f); - SHA1Init(&ctx); - qsort(objects, nobj, sizeof(Object*), objcmp); - if((f = fdopen(idxfd, "w")) == NULL) - return -1; - if(hwrite(f, "\xfftOc\x00\x00\x00\x02", 8, &ctx) != 8) - goto error; - /* fanout table */ - c = 0; - for(i = 0; i < 256; i++){ - while(c < nobj && (objects[c]->hash.sha1[0] & 0xff) <= i) - c++; - PUTBE32(buf, c); - hwrite(f, buf, 4, &ctx); - } + err = hwrite(idxfd, "\xfftOc\x00\x00\x00\x02", 8, &ctx); + if (err) + goto done; + err = hwrite(idxfd, packidx.hdr.fanout_table, + GOT_PACKIDX_V2_FANOUT_TABLE_ITEMS * sizeof(uint32_t), &ctx); + if (err) + goto done; + err = hwrite(idxfd, packidx.hdr.sorted_ids, + nobj * SHA1_DIGEST_LENGTH, &ctx); + if (err) + goto done; for(i = 0; i < nobj; i++){ - o = objects[i]; - hwrite(f, o->hash.sha1, sizeof(o->hash.sha1), &ctx); - } - - /* pointless, nothing uses this */ - for(i = 0; i < nobj; i++){ PUTBE32(buf, objects[i]->crc); - hwrite(f, buf, 4, &ctx); + err = hwrite(idxfd, buf, 4, &ctx); + if (err) + goto done; } + err = hwrite(idxfd, packidx.hdr.offsets, nobj * sizeof(uint32_t), &ctx); + if (err) + goto done; + if (nlarge > 0) { + err = hwrite(idxfd, packidx.hdr.large_offsets, + nlarge * sizeof(uint64_t), &ctx); + if (err) + goto done; + } + err = hwrite(idxfd, pack_hash, SHA1_DIGEST_LENGTH, &ctx); + if (err) + goto done; - nbig = 0; - for(i = 0; i < nobj; i++){ - if(objects[i]->off <= (1ull<<31)) - PUTBE32(buf, objects[i]->off); - else - PUTBE32(buf, (1ull << 31) | nbig++); - hwrite(f, buf, 4, &ctx); + SHA1Final(packidx_hash, &ctx); + w = write(idxfd, packidx_hash, sizeof(packidx_hash)); + if (w == -1) { + err = got_error_from_errno("write"); + goto done; } - for(i = 0; i < nobj; i++){ - if(objects[i]->off > (1ull<<31)){ - PUTBE64(buf, objects[i]->off); - hwrite(f, buf, 8, &ctx); - } + if (w != sizeof(packidx_hash)) { + err = got_error(GOT_ERR_IO); + goto done; } - hwrite(f, packhash->sha1, sizeof(packhash->sha1), &ctx); - SHA1Final(h.sha1, &ctx); - fwrite(h.sha1, 1, sizeof(h.sha1), f); - - free(objects); - free(valid); - fclose(f); - return 0; - -error: - free(objects); - free(valid); - fclose(f); - return -1; +done: + free(packidx.hdr.magic); + free(packidx.hdr.version); + free(packidx.hdr.fanout_table); + free(packidx.hdr.sorted_ids); + free(packidx.hdr.offsets); + free(packidx.hdr.large_offsets); + return err; } int main(int argc, char **argv) { - const struct got_error *err = NULL; - struct got_object_id packhash; + const struct got_error *err = NULL, *close_err; struct imsgbuf ibuf; struct imsg imsg; - int packfd, idxfd; + int idxfd = -1; + struct got_pack pack; + uint8_t pack_hash[SHA1_DIGEST_LENGTH]; + off_t packfile_size; +#if 0 + static int attached; + while (!attached) + sleep(1); +#endif - objcache = got_object_idset_alloc(); - imsg_init(&ibuf, GOT_IMSG_FD_CHILD); - if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) { - if (err->code == GOT_ERR_PRIVSEP_PIPE) - err = NULL; + memset(&pack, 0, sizeof(pack)); + pack.fd = -1; + pack.delta_cache = got_delta_cache_alloc(100, + GOT_DELTA_RESULT_SIZE_CACHED_MAX); + if (pack.delta_cache == NULL) { + err = got_error_from_errno("got_delta_cache_alloc"); goto done; } + + imsg_init(&ibuf, GOT_IMSG_FD_CHILD); + + err = got_privsep_recv_imsg(&imsg, &ibuf, 0); + if (err) + goto done; if (imsg.hdr.type == GOT_IMSG_STOP) goto done; if (imsg.hdr.type != GOT_IMSG_IDXPACK_REQUEST) { err = got_error(GOT_ERR_PRIVSEP_MSG); goto done; } - if (imsg.hdr.len - IMSG_HEADER_SIZE != SHA1_DIGEST_LENGTH) { + if (imsg.hdr.len - IMSG_HEADER_SIZE != sizeof(pack_hash)) { err = got_error(GOT_ERR_PRIVSEP_LEN); goto done; } - packfd = imsg.fd; - memcpy(packhash.sha1, imsg.data, SHA1_DIGEST_LENGTH); + memcpy(pack_hash, imsg.data, sizeof(pack_hash)); + pack.fd = imsg.fd; - if((err = got_privsep_recv_imsg(&imsg, &ibuf, 0)) != 0) { - if (err->code == GOT_ERR_PRIVSEP_PIPE) - err = NULL; + err = got_privsep_recv_imsg(&imsg, &ibuf, 0); + if (err) goto done; - } if (imsg.hdr.type == GOT_IMSG_STOP) goto done; if (imsg.hdr.type != GOT_IMSG_TMPFD) { @@ -1252,15 +738,37 @@ main(int argc, char **argv) } idxfd = imsg.fd; - indexpack(packfd, idxfd, &packhash, &ibuf); + if (lseek(pack.fd, 0, SEEK_END) == -1) { + err = got_error_from_errno("lseek"); + goto done; + } + packfile_size = lseek(pack.fd, 0, SEEK_CUR); + if (packfile_size == -1) { + err = got_error_from_errno("lseek"); + goto done; + } + pack.filesize = packfile_size; /* XXX off_t vs size_t */ + + if (lseek(pack.fd, 0, SEEK_SET) == -1) { + err = got_error_from_errno("lseek"); + goto done; + } + + err = index_pack(&pack, idxfd, pack_hash, &ibuf); done: - if(err != NULL) - got_privsep_send_error(&ibuf, err); - else + close_err = got_pack_close(&pack); + if (close_err && err == NULL) + err = close_err; + if (idxfd != -1 && close(idxfd) == -1 && err == NULL) + err = got_error_from_errno("close"); + + if (err == NULL) err = got_privsep_send_index_pack_done(&ibuf); - if(err != NULL) { + if (err) { + got_privsep_send_error(&ibuf, err); fprintf(stderr, "%s: %s\n", getprogname(), err->msg); got_privsep_send_error(&ibuf, err); + exit(1); } exit(0);