commit 4fb2bb7d5b77a57f2a55890f472f90ebd61755e0 from: Stefan Sperling date: Mon Feb 27 10:07:50 2023 UTC make gotd repo_read store want/have commit IDs in ID sets rather than arrays Currently only used to detect and avoid storing duplicate IDs sent in want and have lines by the client. If in the future we ever wanted to check which IDs the client has already sent us we could now do O(1) hash table lookups rather than iterating arrays. ok op@ commit - 6685e2e8b2b4865d3b68e933360528e6382eb28e commit + 4fb2bb7d5b77a57f2a55890f472f90ebd61755e0 blob - fa9d0fe7c98493ac9d2d6fa32f34baa5310f2e37 blob + 378a3faff5b94e566a97bc638b8a0b4abdb6ad22 --- gotd/repo_read.c +++ gotd/repo_read.c @@ -71,8 +71,8 @@ static struct repo_read_client { int delta_cache_fd; int report_progress; int pack_pipe; - struct gotd_object_id_array want_ids; - struct gotd_object_id_array have_ids; + struct got_object_idset *want_ids; + struct got_object_idset *have_ids; } repo_read_client; static volatile sig_atomic_t sigint_received; @@ -373,8 +373,9 @@ done: } static const struct got_error * -record_object_id(struct gotd_object_id_array *array, struct got_object_id *id) +append_object_id(struct got_object_id *id, void *data, void *arg) { + struct gotd_object_id_array *array = arg; const size_t alloc_chunksz = 256; if (array->ids == NULL) { @@ -394,25 +395,9 @@ record_object_id(struct gotd_object_id_array *array, s array->nalloc += alloc_chunksz; } - array->ids[array->nids] = got_object_id_dup(id); - if (array->ids[array->nids] == NULL) - return got_error_from_errno("got_object_id_dup"); + array->ids[array->nids] = id; array->nids++; return NULL; -} - -static void -free_object_ids(struct gotd_object_id_array *array) -{ - size_t i; - - for (i = 0; i < array->nids; i++) - free(array->ids[i]); - free(array->ids); - - array->ids = NULL; - array->nalloc = 0; - array->nids = 0; } static const struct got_error * @@ -449,9 +434,11 @@ recv_want(struct imsg *imsg) obj_type != GOT_OBJ_TYPE_TAG) return got_error(GOT_ERR_OBJ_TYPE); - err = record_object_id(&client->want_ids, &id); - if (err) - return err; + if (!got_object_idset_contains(client->want_ids, &id)) { + err = got_object_idset_add(client->want_ids, &id, NULL); + if (err) + return err; + } gotd_imsg_send_ack(&id, &ibuf, PROC_REPO_READ, repo_read.pid); imsg_clear(&ibuf); @@ -501,9 +488,11 @@ recv_have(struct imsg *imsg) goto done; } - err = record_object_id(&client->have_ids, &id); - if (err) - return err; + if (!got_object_idset_contains(client->have_ids, &id)) { + err = got_object_idset_add(client->have_ids, &id, NULL); + if (err) + goto done; + } gotd_imsg_send_ack(&id, &ibuf, PROC_REPO_READ, repo_read.pid); done: @@ -624,9 +613,14 @@ send_packfile(struct imsg *imsg, struct gotd_imsgev *i struct imsgbuf ibuf; struct repo_read_pack_progress_arg pa; struct got_ratelimit rl; + struct gotd_object_id_array want_ids; + struct gotd_object_id_array have_ids; log_debug("packfile request received"); + memset(&want_ids, 0, sizeof(want_ids)); + memset(&have_ids, 0, sizeof(have_ids)); + got_ratelimit_init(&rl, 2, 0); if (client->delta_cache_fd == -1 || client->pack_pipe == -1) @@ -645,9 +639,17 @@ send_packfile(struct imsg *imsg, struct gotd_imsgev *i pa.ibuf = &ibuf; pa.report_progress = client->report_progress; + err = got_object_idset_for_each(client->want_ids, + append_object_id, &want_ids); + if (err) + goto done; + err = got_object_idset_for_each(client->have_ids, + append_object_id, &have_ids); + if (err) + goto done; + err = got_pack_create(packsha1, client->pack_pipe, delta_cache, - client->have_ids.ids, client->have_ids.nids, - client->want_ids.ids, client->want_ids.nids, + have_ids.ids, have_ids.nids, want_ids.ids, want_ids.nids, repo_read.repo, 0, 1, 0, pack_progress, &pa, &rl, check_cancelled, NULL); if (err) @@ -666,6 +668,8 @@ done: if (delta_cache != NULL && fclose(delta_cache) == EOF && err == NULL) err = got_error_from_errno("fclose"); imsg_clear(&ibuf); + free(want_ids.ids); + free(have_ids.ids); return err; } @@ -867,6 +871,16 @@ repo_read_main(const char *title, const char *repo_pat client->fd = -1; client->delta_cache_fd = -1; client->pack_pipe = -1; + client->have_ids = got_object_idset_alloc(); + if (client->have_ids == NULL) { + err = got_error_from_errno("got_object_idset_alloc"); + goto done; + } + client->want_ids = got_object_idset_alloc(); + if (client->want_ids == NULL) { + err = got_error_from_errno("got_object_idset_alloc"); + goto done; + } repo_read.title = title; repo_read.pid = getpid(); @@ -917,8 +931,10 @@ repo_read_shutdown(void) log_debug("%s: shutting down", repo_read.title); - free_object_ids(&client->have_ids); - free_object_ids(&client->want_ids); + if (client->have_ids) + got_object_idset_free(client->have_ids); + if (client->want_ids) + got_object_idset_free(client->want_ids); if (client->fd != -1) close(client->fd); if (client->delta_cache_fd != -1)