commit - e1c219c8e6fb81b93f7385e2b0872cc6ddfc35f7
commit + 619de35f3933b60159828e7115fca7fdb9bbb5e8
blob - 5346f7f2ce1559b966b7c6e2f1bde6da40d729a4
blob + 750be4962021434fb27d2dcc95867ff2e333a776
--- lib/got_lib_object_idset.h
+++ lib/got_lib_object_idset.h
const struct got_error *(*cb)(struct got_object_id *, void *, void *),
void *);
int got_object_idset_num_elements(struct got_object_idset *);
-
-struct got_object_idset_element;
-struct got_object_idset_element *got_object_idset_get_element(
- struct got_object_idset *, struct got_object_id *);
-void *got_object_idset_get_element_data(struct got_object_idset_element *);
-const struct got_error *got_object_idset_for_each_element(struct got_object_idset *,
- const struct got_error *(*cb)(struct got_object_idset_element *, void *), void *);
-void got_object_idset_remove_element(struct got_object_idset *,
- struct got_object_idset_element *);
-
blob - fcd77a0c22a86ae56b534d992bc21b9c76aa5354
blob + 3494cba330f8f89dc343857f019e1140d21ee59f
--- lib/object_idset.c
+++ lib/object_idset.c
/*
- * Copyright (c) 2018, 2019 Stefan Sperling <stsp@openbsd.org>
+ * Copyright (c) 2022 Stefan Sperling <stsp@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
-
#include <stdlib.h>
+#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <zlib.h>
#include <limits.h>
#include <time.h>
+#include <errno.h>
+#include <siphash.h>
#include "got_compat.h"
#include "got_lib_inflate.h"
#include "got_lib_object.h"
#include "got_lib_object_idset.h"
+#include "got_lib_object_parse.h"
-struct got_object_idset_element {
- RB_ENTRY(got_object_idset_element) entry;
- struct got_object_id id;
- void *data; /* API user data */
-};
+#define GOT_OBJECT_IDSET_MIN_BUCKETS 64
-RB_HEAD(got_object_idset_tree, got_object_idset_element);
-
-static int
-cmp_elements(const struct got_object_idset_element *e1,
- const struct got_object_idset_element *e2)
-{
- return got_object_id_cmp(&e1->id, &e2->id);
-}
-
-RB_PROTOTYPE(got_object_idset_tree, got_object_idset_element, entry,
- cmp_elements);
-
struct got_object_idset {
- struct got_object_idset_tree entries;
- int totelem;
-#define GOT_OBJECT_IDSET_MAX_ELEM INT_MAX
+ struct got_object_id_queue *ids;
+ size_t nbuckets;
+ unsigned int totelem;
+ unsigned int flags;
+#define GOT_OBJECT_IDSET_F_TRAVERSAL 0x01
+#define GOT_OBJECT_IDSET_F_NOMEM 0x02
+ SIPHASH_KEY key;
};
struct got_object_idset *
got_object_idset_alloc(void)
{
struct got_object_idset *set;
+ int i;
set = malloc(sizeof(*set));
if (set == NULL)
return NULL;
- RB_INIT(&set->entries);
- set->totelem = 0;
+ set->ids = calloc(sizeof(set->ids[0]), GOT_OBJECT_IDSET_MIN_BUCKETS);
+ if (set->ids == NULL) {
+ free(set);
+ return NULL;
+ }
+ for (i = 0; i < GOT_OBJECT_IDSET_MIN_BUCKETS; i++)
+ STAILQ_INIT(&set->ids[i]);
+ set->totelem = 0;
+ set->nbuckets = GOT_OBJECT_IDSET_MIN_BUCKETS;
+ set->flags = 0;
+ arc4random_buf(&set->key, sizeof(set->key));
return set;
}
void
got_object_idset_free(struct got_object_idset *set)
{
- struct got_object_idset_element *entry;
+ size_t i;
+ struct got_object_qid *qid;
- while ((entry = RB_MIN(got_object_idset_tree, &set->entries))) {
- RB_REMOVE(got_object_idset_tree, &set->entries, entry);
- /* User data should be freed by caller. */
- free(entry);
+ for (i = 0; i < set->nbuckets; i++) {
+ while (!STAILQ_EMPTY(&set->ids[i])) {
+ qid = STAILQ_FIRST(&set->ids[i]);
+ STAILQ_REMOVE(&set->ids[i], qid, got_object_qid, entry);
+ got_object_qid_free(qid);
+ }
}
-
+ /* User data should be freed by caller. */
+ free(set->ids);
free(set);
}
-const struct got_error *
-got_object_idset_add(struct got_object_idset *set, struct got_object_id *id,
- void *data)
+static uint64_t
+idset_hash(struct got_object_idset *set, struct got_object_id *id)
{
- struct got_object_idset_element *new;
+ return SipHash24(&set->key, id->sha1, sizeof(id->sha1));
+}
- if (set->totelem >= GOT_OBJECT_IDSET_MAX_ELEM)
- return got_error(GOT_ERR_NO_SPACE);
+static const struct got_error *
+idset_resize(struct got_object_idset *set, size_t nbuckets)
+{
+ struct got_object_id_queue *ids;
+ size_t i;
- new = malloc(sizeof(*new));
- if (new == NULL)
- return got_error_from_errno("malloc");
+ ids = calloc(nbuckets, sizeof(ids[0]));
+ if (ids == NULL) {
+ if (errno != ENOMEM)
+ return got_error_from_errno("calloc");
+ /* Proceed with our current amount of hash buckets. */
+ set->flags |= GOT_OBJECT_IDSET_F_NOMEM;
+ return NULL;
+ }
- memcpy(&new->id, id, sizeof(new->id));
- new->data = data;
+ for (i = 0; i < nbuckets; i++)
+ STAILQ_INIT(&ids[i]);
- if (RB_INSERT(got_object_idset_tree, &set->entries, new) != NULL) {
- free(new);
- return got_error(GOT_ERR_OBJ_EXISTS);
+ arc4random_buf(&set->key, sizeof(set->key));
+
+ for (i = 0; i < set->nbuckets; i++) {
+ while (!STAILQ_EMPTY(&set->ids[i])) {
+ struct got_object_qid *qid;
+ uint64_t idx;
+ qid = STAILQ_FIRST(&set->ids[i]);
+ STAILQ_REMOVE(&set->ids[i], qid, got_object_qid, entry);
+ idx = idset_hash(set, qid->id) % nbuckets;
+ STAILQ_INSERT_HEAD(&ids[idx], qid, entry);
+ }
}
- set->totelem++;
+ free(set->ids);
+ set->ids = ids;
+ set->nbuckets = nbuckets;
return NULL;
}
-static struct got_object_idset_element *
-find_element(struct got_object_idset *set, struct got_object_id *id)
+static const struct got_error *
+idset_grow(struct got_object_idset *set)
{
- struct got_object_idset_element *entry;
+ size_t nbuckets;
- entry = RB_ROOT(&set->entries);
- while (entry) {
- int cmp = got_object_id_cmp(id, &entry->id);
- if (cmp < 0)
- entry = RB_LEFT(entry, entry);
- else if (cmp > 0)
- entry = RB_RIGHT(entry, entry);
- else
- break;
+ if (set->flags & GOT_OBJECT_IDSET_F_NOMEM)
+ return NULL;
+
+ if (set->nbuckets >= UINT_MAX / 2)
+ nbuckets = UINT_MAX;
+ else
+ nbuckets = set->nbuckets * 2;
+
+ return idset_resize(set, nbuckets);
+}
+
+const struct got_error *
+got_object_idset_add(struct got_object_idset *set, struct got_object_id *id,
+ void *data)
+{
+ const struct got_error *err;
+ struct got_object_qid *qid;
+ uint64_t idx;
+ struct got_object_id_queue *head;
+
+ /* This function may resize the set. */
+ if (set->flags & GOT_OBJECT_IDSET_F_TRAVERSAL)
+ return got_error_msg(GOT_ERR_NOT_IMPL,
+ "cannot add elements to idset during traversal");
+
+ if (set->totelem == UINT_MAX)
+ return got_error(GOT_ERR_NO_SPACE);
+
+ err = got_object_qid_alloc_partial(&qid);
+ if (err)
+ return err;
+ memcpy(qid->id, id, sizeof(*qid->id));
+ qid->data = data;
+
+ idx = idset_hash(set, id) % set->nbuckets;
+ head = &set->ids[idx];
+ STAILQ_INSERT_HEAD(head, qid, entry);
+ set->totelem++;
+
+ if (set->nbuckets < set->totelem)
+ err = idset_grow(set);
+
+ return err;
+}
+
+static struct got_object_qid *
+find_element(struct got_object_idset *set, struct got_object_id *id)
+{
+ uint64_t idx = idset_hash(set, id) % set->nbuckets;
+ struct got_object_id_queue *head = &set->ids[idx];
+ struct got_object_qid *qid;
+
+ STAILQ_FOREACH(qid, head, entry) {
+ if (got_object_id_cmp(qid->id, id) == 0)
+ return qid;
}
- return entry;
+ return NULL;
}
void *
got_object_idset_get(struct got_object_idset *set, struct got_object_id *id)
{
- struct got_object_idset_element *entry = find_element(set, id);
- return entry ? entry->data : NULL;
+ struct got_object_qid *qid = find_element(set, id);
+ return qid ? qid->data : NULL;
}
const struct got_error *
got_object_idset_remove(void **data, struct got_object_idset *set,
struct got_object_id *id)
{
- struct got_object_idset_element *entry;
+ uint64_t idx;
+ struct got_object_id_queue *head;
+ struct got_object_qid *qid;
if (data)
*data = NULL;
return got_error(GOT_ERR_NO_OBJ);
if (id == NULL) {
- entry = RB_ROOT(&set->entries);
- if (entry == NULL)
- return got_error(GOT_ERR_NO_OBJ);
+ /* Remove a "random" element. */
+ for (idx = 0; idx < set->nbuckets; idx++) {
+ head = &set->ids[idx];
+ qid = STAILQ_FIRST(head);
+ if (qid)
+ break;
+ }
} else {
- entry = find_element(set, id);
- if (entry == NULL)
+ idx = idset_hash(set, id) % set->nbuckets;
+ head = &set->ids[idx];
+ STAILQ_FOREACH(qid, head, entry) {
+ if (got_object_id_cmp(qid->id, id) == 0)
+ break;
+ }
+ if (qid == NULL)
return got_error_no_obj(id);
}
- RB_REMOVE(got_object_idset_tree, &set->entries, entry);
if (data)
- *data = entry->data;
- free(entry);
+ *data = qid->data;
+ STAILQ_REMOVE(head, qid, got_object_qid, entry);
+ got_object_qid_free(qid);
set->totelem--;
+
return NULL;
}
got_object_idset_contains(struct got_object_idset *set,
struct got_object_id *id)
{
- struct got_object_idset_element *entry = find_element(set, id);
- return entry ? 1 : 0;
+ struct got_object_qid *qid = find_element(set, id);
+ return qid ? 1 : 0;
}
const struct got_error *
const struct got_error *(*cb)(struct got_object_id *, void *, void *),
void *arg)
{
- const struct got_error *err;
- struct got_object_idset_element *entry, *tmp;
+ const struct got_error *err = NULL;
+ struct got_object_id_queue *head;
+ struct got_object_qid *qid, *tmp;
+ size_t i;
- RB_FOREACH_SAFE(entry, got_object_idset_tree, &set->entries, tmp) {
- err = (*cb)(&entry->id, entry->data, arg);
- if (err)
- return err;
+ set->flags |= GOT_OBJECT_IDSET_F_TRAVERSAL;
+ for (i = 0; i < set->nbuckets; i++) {
+ head = &set->ids[i];
+ STAILQ_FOREACH_SAFE(qid, head, entry, tmp) {
+ err = (*cb)(qid->id, qid->data, arg);
+ if (err)
+ goto done;
+ }
}
- return NULL;
+done:
+ set->flags &= ~GOT_OBJECT_IDSET_F_TRAVERSAL;
+ return err;
}
int
{
return set->totelem;
}
-
-struct got_object_idset_element *
-got_object_idset_get_element(struct got_object_idset *set, struct got_object_id *id)
-{
- return find_element(set, id);
-}
-
-void *
-got_object_idset_get_element_data(struct got_object_idset_element *entry)
-{
- return entry->data;
-}
-
-const struct got_error *
-got_object_idset_for_each_element(struct got_object_idset *set,
- const struct got_error *(*cb)(struct got_object_idset_element *, void *),
- void *arg)
-{
- const struct got_error *err;
- struct got_object_idset_element *entry, *tmp;
-
- RB_FOREACH_SAFE(entry, got_object_idset_tree, &set->entries, tmp) {
- err = (*cb)(entry, arg);
- if (err)
- return err;
- }
- return NULL;
-}
-
-void
-got_object_idset_remove_element(struct got_object_idset *set,
- struct got_object_idset_element *entry)
-{
- RB_REMOVE(got_object_idset_tree, &set->entries, entry);
- free(entry);
- set->totelem--;
-}
-
-RB_GENERATE(got_object_idset_tree, got_object_idset_element, entry,
- cmp_elements);
blob - 2ef1f00d3982e54a3ee1f146da6184a65207f397
blob + 9fea56df7e029b0d0b6d6901001a4de065f35397
--- lib/pack_create.c
+++ lib/pack_create.c
}
static const struct got_error *
-remove_unused_object(struct got_object_idset_element *entry, void *arg)
+remove_unused_object(struct got_object_id *id, void *data, void *arg)
{
struct got_object_idset *idset = arg;
- if (got_object_idset_get_element_data(entry) == NULL)
- got_object_idset_remove_element(idset, entry);
+ if (data == NULL)
+ got_object_idset_remove(NULL, idset, id);
return NULL;
}
static const struct got_error *
-remove_reused_object(struct got_object_idset_element *entry, void *arg)
+remove_reused_object(struct got_object_id *id, void *data, void *arg)
{
struct got_object_idset *idset = arg;
- struct got_pack_meta *m;
+ struct got_pack_meta *m = data;
- m = got_object_idset_get_element_data(entry);
if (m->have_reused_delta)
- got_object_idset_remove_element(idset, entry);
+ got_object_idset_remove(NULL, idset, id);
return NULL;
}
if (err)
return err;
- err = got_object_idset_for_each_element(idset,
- remove_unused_object, idset);
+ err = got_object_idset_for_each(idset, remove_unused_object, idset);
if (err)
goto done;
if (err)
goto done;
if (reuse.nmeta > 0) {
- err = got_object_idset_for_each_element(idset,
+ err = got_object_idset_for_each(idset,
remove_reused_object, idset);
if (err)
goto done;