commit d582f26ced34c492facade402975ceabe4c97adc from: Stefan Sperling date: Wed Mar 18 16:13:48 2020 UTC write large objects to disk when resolving deltas; raise in-mem delta threshold commit - 7132995b43d677a3f6a031f64c4c6b5a2d0df413 commit + d582f26ced34c492facade402975ceabe4c97adc blob - f5838d2c1bad6f108584c87b669ba6ccd51e1ee6 blob + 7b43b9f815acc6fd4e8ce8c6763ee31895a5feed --- lib/fetch.c +++ lib/fetch.c @@ -61,6 +61,10 @@ #include "got_lib_privsep.h" #include "got_lib_object_cache.h" #include "got_lib_repository.h" + +#ifndef nitems +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#endif #define GOT_PROTOMAX 64 #define GOT_HOSTMAX 256 @@ -351,7 +355,7 @@ got_fetch_pack(struct got_object_id **pack_hash, struc { int imsg_fetchfds[2], imsg_idxfds[2]; int packfd = -1, npackfd = -1, idxfd = -1, nidxfd = -1, nfetchfd = -1; - int tmpfd = -1; + int tmpfds[3], i; int fetchstatus, idxstatus, done = 0; const struct got_error *err; struct imsgbuf fetchibuf, idxibuf; @@ -365,6 +369,8 @@ got_fetch_pack(struct got_object_id **pack_hash, struc char *path; *pack_hash = NULL; + for (i = 0; i < nitems(tmpfds); i++) + tmpfds[i] = -1; TAILQ_INIT(&have_refs); @@ -397,10 +403,12 @@ got_fetch_pack(struct got_object_id **pack_hash, struc goto done; } - tmpfd = got_opentempfd(); - if (tmpfd == -1) { - err = got_error_from_errno("got_opentempfd"); - goto done; + for (i = 0; i < nitems(tmpfds); i++) { + tmpfds[i] = got_opentempfd(); + if (tmpfds[i] == -1) { + err = got_error_from_errno("got_opentempfd"); + goto done; + } } if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, imsg_fetchfds) == -1) { @@ -518,10 +526,12 @@ got_fetch_pack(struct got_object_id **pack_hash, struc if (err != NULL) goto done; nidxfd = -1; - err = got_privsep_send_tmpfd(&idxibuf, tmpfd); - if (err != NULL) - goto done; - tmpfd = -1; + for (i = 0; i < nitems(tmpfds); i++) { + err = got_privsep_send_tmpfd(&idxibuf, tmpfds[i]); + if (err != NULL) + goto done; + tmpfds[i] = -1; + } done = 0; while (!done) { int nobj_total, nobj_indexed, nobj_loose, nobj_resolved; @@ -582,8 +592,10 @@ done: err = got_error_from_errno("close"); if (idxfd != -1 && close(idxfd) == -1 && err == NULL) err = got_error_from_errno("close"); - if (tmpfd != -1 && close(tmpfd) == -1 && err == NULL) - err = got_error_from_errno("close"); + for (i = 0; i < nitems(tmpfds); i++) { + if (tmpfds[i] != -1 && close(tmpfds[i]) == -1 && err == NULL) + err = got_error_from_errno("close"); + } free(tmppackpath); free(tmpidxpath); free(idxpath); blob - b04e243f7e28e091a99469856203d981d23691d5 blob + 4a95b0e905ba9cd4de20bfb2bb5cfea429f63b7d --- lib/got_lib_delta.h +++ lib/got_lib_delta.h @@ -44,7 +44,7 @@ const struct got_error *got_delta_apply(FILE *, const * The amount of result data we may keep in RAM while applying deltas. * Data larger than this is written to disk during delta application (slow). */ -#define GOT_DELTA_RESULT_SIZE_CACHED_MAX (4 * 1024 * 1024) /* bytes */ +#define GOT_DELTA_RESULT_SIZE_CACHED_MAX (8 * 1024 * 1024) /* bytes */ /* * Definitions for delta data streams. blob - f95085e6eb708f154a28d9d554aa102e69b4f939 blob + 33296145a15cd5bcb096df05bfba3f21cf7fba49 --- lib/got_lib_pack.h +++ lib/got_lib_pack.h @@ -177,6 +177,8 @@ const struct got_error *got_packidx_match_id_str_prefi const struct got_error *got_packfile_open_object(struct got_object **, struct got_pack *, struct got_packidx *, int, struct got_object_id *); +const struct got_error *got_pack_get_delta_chain_max_size(uint64_t *, + struct got_delta_chain *, struct got_pack *); const struct got_error *got_pack_get_max_delta_object_size(uint64_t *, struct got_object *, struct got_pack *); const struct got_error *got_pack_dump_delta_chain_to_file(size_t *, blob - 9dc1a833c99e6bd6033482c75b22b9472806ff2d blob + 6aab7a59bcf8356a23481fce002c7a4b6278024a --- lib/pack.c +++ lib/pack.c @@ -966,8 +966,8 @@ got_packfile_open_object(struct got_object **obj, stru return err; } -static const struct got_error * -get_delta_chain_max_size(uint64_t *max_size, struct got_delta_chain *deltas, +const struct got_error * +got_pack_get_delta_chain_max_size(uint64_t *max_size, struct got_delta_chain *deltas, struct got_pack *pack) { struct got_delta *delta; @@ -1026,7 +1026,7 @@ got_pack_get_max_delta_object_size(uint64_t *size, str if ((obj->flags & GOT_OBJ_FLAG_DELTIFIED) == 0) return got_error(GOT_ERR_OBJ_TYPE); - return get_delta_chain_max_size(size, &obj->deltas, pack); + return got_pack_get_delta_chain_max_size(size, &obj->deltas, pack); } const struct got_error * @@ -1047,7 +1047,7 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, return got_error(GOT_ERR_BAD_DELTA_CHAIN); /* We process small enough files entirely in memory for speed. */ - err = get_delta_chain_max_size(&max_size, deltas, pack); + err = got_pack_get_delta_chain_max_size(&max_size, deltas, pack); if (err) return err; if (max_size < GOT_DELTA_RESULT_SIZE_CACHED_MAX) { @@ -1215,7 +1215,7 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz if (SIMPLEQ_EMPTY(&deltas->entries)) return got_error(GOT_ERR_BAD_DELTA_CHAIN); - err = get_delta_chain_max_size(&max_size, deltas, pack); + err = got_pack_get_delta_chain_max_size(&max_size, deltas, pack); if (err) return err; accum_buf = malloc(max_size); blob - cbeb792b83ccf50e03c5d50564b72a2b7d707a11 blob + e2ac91ea26b2772a6a81c2d5fb5b7c950157677d --- libexec/got-index-pack/got-index-pack.c +++ libexec/got-index-pack/got-index-pack.c @@ -52,6 +52,10 @@ #include "got_lib_pack.h" #include "got_lib_delta_cache.h" +#ifndef nitems +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#endif + struct got_indexed_object { struct got_object_id id; @@ -145,13 +149,13 @@ read_crc(uint32_t *crc, int fd, size_t len) } static const struct got_error * -read_file_sha1(SHA1_CTX *ctx, FILE *f) +read_file_sha1(SHA1_CTX *ctx, FILE *f, size_t len) { uint8_t buf[8192]; - size_t r; + size_t n, r; - for (;;) { - r = fread(buf, 1, sizeof(buf), f); + for (n = len; n > 0; n -= r) { + r = fread(buf, 1, n > sizeof(buf) ? sizeof(buf) : n, f); if (r == 0) { if (feof(f)) return NULL; @@ -170,7 +174,7 @@ read_packed_object(struct got_pack *pack, struct got_i const struct got_error *err = NULL; SHA1_CTX ctx; uint8_t *data = NULL; - size_t datalen; + size_t datalen = 0; ssize_t n; char *header; size_t headerlen; @@ -238,7 +242,7 @@ read_packed_object(struct got_pack *pack, struct got_i headerlen = strlen(header) + 1; SHA1Update(&ctx, header, headerlen); if (obj->size > GOT_DELTA_RESULT_SIZE_CACHED_MAX) { - err = read_file_sha1(&ctx, tmpfile); + err = read_file_sha1(&ctx, tmpfile, datalen); if (err) break; } else @@ -349,16 +353,18 @@ hwrite(int fd, void *buf, int len, SHA1_CTX *ctx) static const struct got_error * resolve_deltified_object(struct got_pack *pack, struct got_packidx *packidx, - struct got_indexed_object *obj) + struct got_indexed_object *obj, FILE *tmpfile, FILE *delta_base_file, + FILE *delta_accum_file) { const struct got_error *err = NULL; struct got_delta_chain deltas; struct got_delta *delta; uint8_t *buf = NULL; - size_t len; + size_t len = 0; SHA1_CTX ctx; char *header = NULL; size_t headerlen; + uint64_t max_size; int base_obj_type; const char *obj_label; @@ -371,12 +377,23 @@ resolve_deltified_object(struct got_pack *pack, struct if (err) goto done; - /* XXX TODO reading large objects into memory is bad! */ - err = got_pack_dump_delta_chain_to_mem(&buf, &len, &deltas, pack); + err = got_pack_get_delta_chain_max_size(&max_size, &deltas, pack); if (err) goto done; - - SHA1Init(&ctx); + if (max_size > GOT_DELTA_RESULT_SIZE_CACHED_MAX) { + rewind(tmpfile); + rewind(delta_base_file); + rewind(delta_accum_file); + err = got_pack_dump_delta_chain_to_file(&len, &deltas, + pack, tmpfile, delta_base_file, delta_accum_file); + if (err) + goto done; + } else { + err = got_pack_dump_delta_chain_to_mem(&buf, &len, + &deltas, pack); + } + if (err) + goto done; err = got_delta_chain_get_base_type(&base_obj_type, &deltas); if (err) @@ -389,8 +406,14 @@ resolve_deltified_object(struct got_pack *pack, struct goto done; } headerlen = strlen(header) + 1; + SHA1Init(&ctx); SHA1Update(&ctx, header, headerlen); - SHA1Update(&ctx, buf, len); + if (max_size > GOT_DELTA_RESULT_SIZE_CACHED_MAX) { + err = read_file_sha1(&ctx, tmpfile, len); + if (err) + goto done; + } else + SHA1Update(&ctx, buf, len); SHA1Final(obj->id.sha1, &ctx); done: free(buf); @@ -550,7 +573,8 @@ update_packidx(struct got_packidx *packidx, int nobj, static const struct got_error * index_pack(struct got_pack *pack, int idxfd, FILE *tmpfile, - uint8_t *pack_hash, struct imsgbuf *ibuf) + FILE *delta_base_file, FILE *delta_accum_file, uint8_t *pack_hash, + struct imsgbuf *ibuf) { const struct got_error *err; struct got_packfile_hdr hdr; @@ -748,7 +772,8 @@ index_pack(struct got_pack *pack, int idxfd, FILE *tmp goto done; } - err = resolve_deltified_object(pack, &packidx, obj); + err = resolve_deltified_object(pack, &packidx, obj, + tmpfile, delta_base_file, delta_accum_file); if (err) { if (err->code != GOT_ERR_NO_OBJ) goto done; @@ -862,8 +887,8 @@ main(int argc, char **argv) const struct got_error *err = NULL, *close_err; struct imsgbuf ibuf; struct imsg imsg; - int idxfd = -1, tmpfd = -1; - FILE *tmpfile = NULL; + int idxfd = -1, tmpfd = -1, i; + FILE *tmpfiles[3]; struct got_pack pack; uint8_t pack_hash[SHA1_DIGEST_LENGTH]; off_t packfile_size; @@ -873,6 +898,9 @@ main(int argc, char **argv) sleep(1); #endif + for (i = 0; i < nitems(tmpfiles); i++) + tmpfiles[i] = NULL; + memset(&pack, 0, sizeof(pack)); pack.fd = -1; pack.delta_cache = got_delta_cache_alloc(500, @@ -922,26 +950,28 @@ main(int argc, char **argv) } idxfd = imsg.fd; - err = got_privsep_recv_imsg(&imsg, &ibuf, 0); - if (err) - goto done; - if (imsg.hdr.type == GOT_IMSG_STOP) - goto done; - if (imsg.hdr.type != GOT_IMSG_TMPFD) { - err = got_error(GOT_ERR_PRIVSEP_MSG); - goto done; + for (i = 0; i < nitems(tmpfiles); i++) { + err = got_privsep_recv_imsg(&imsg, &ibuf, 0); + if (err) + goto done; + if (imsg.hdr.type == GOT_IMSG_STOP) + goto done; + if (imsg.hdr.type != GOT_IMSG_TMPFD) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + goto done; + } + if (imsg.hdr.len - IMSG_HEADER_SIZE != 0) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + goto done; + } + tmpfd = imsg.fd; + tmpfiles[i] = fdopen(tmpfd, "w+"); + if (tmpfiles[i] == NULL) { + err = got_error_from_errno("fdopen"); + goto done; + } + tmpfd = -1; } - if (imsg.hdr.len - IMSG_HEADER_SIZE != 0) { - err = got_error(GOT_ERR_PRIVSEP_LEN); - goto done; - } - tmpfd = imsg.fd; - tmpfile = fdopen(tmpfd, "w+"); - if (tmpfile == NULL) { - err = got_error_from_errno("fdopen"); - goto done; - } - tmpfd = -1; if (lseek(pack.fd, 0, SEEK_END) == -1) { err = got_error_from_errno("lseek"); @@ -965,7 +995,8 @@ main(int argc, char **argv) if (pack.map == MAP_FAILED) pack.map = NULL; /* fall back to read(2) */ #endif - err = index_pack(&pack, idxfd, tmpfile, pack_hash, &ibuf); + err = index_pack(&pack, idxfd, tmpfiles[0], tmpfiles[1], tmpfiles[2], + pack_hash, &ibuf); done: close_err = got_pack_close(&pack); if (close_err && err == NULL) @@ -974,6 +1005,11 @@ done: err = got_error_from_errno("close"); if (tmpfd != -1 && close(tmpfd) == -1 && err == NULL) err = got_error_from_errno("close"); + for (i = 0; i < nitems(tmpfiles); i++) { + if (tmpfiles[i] != NULL && fclose(tmpfiles[i]) == EOF && + err == NULL) + err = got_error_from_errno("close"); + } if (err == NULL) err = got_privsep_send_index_pack_done(&ibuf);