Blame


1 ab2f42e7 2019-11-10 stsp /*
2 5aa81393 2020-01-06 stsp * Copyright (c) 2019, 2020 Stefan Sperling <stsp@openbsd.org>
3 ab2f42e7 2019-11-10 stsp *
4 ab2f42e7 2019-11-10 stsp * Permission to use, copy, modify, and distribute this software for any
5 ab2f42e7 2019-11-10 stsp * purpose with or without fee is hereby granted, provided that the above
6 ab2f42e7 2019-11-10 stsp * copyright notice and this permission notice appear in all copies.
7 ab2f42e7 2019-11-10 stsp *
8 ab2f42e7 2019-11-10 stsp * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 ab2f42e7 2019-11-10 stsp * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 ab2f42e7 2019-11-10 stsp * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 ab2f42e7 2019-11-10 stsp * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 ab2f42e7 2019-11-10 stsp * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 ab2f42e7 2019-11-10 stsp * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 ab2f42e7 2019-11-10 stsp * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 ab2f42e7 2019-11-10 stsp */
16 ab2f42e7 2019-11-10 stsp
17 ab2f42e7 2019-11-10 stsp #include <sys/queue.h>
18 ab2f42e7 2019-11-10 stsp
19 ab2f42e7 2019-11-10 stsp #include <stdlib.h>
20 ab2f42e7 2019-11-10 stsp #include <string.h>
21 d89a2828 2022-07-05 stsp #include <stdint.h>
22 ab2f42e7 2019-11-10 stsp #include <sha1.h>
23 5822e79e 2023-02-23 op #include <sha2.h>
24 ab2f42e7 2019-11-10 stsp #include <stdio.h>
25 ab2f42e7 2019-11-10 stsp #include <zlib.h>
26 ab2f42e7 2019-11-10 stsp #include <limits.h>
27 ab2f42e7 2019-11-10 stsp #include <time.h>
28 dac5c75e 2022-06-04 stsp #include <errno.h>
29 dac5c75e 2022-06-04 stsp #include <siphash.h>
30 ab2f42e7 2019-11-10 stsp
31 ab2f42e7 2019-11-10 stsp #include "got_object.h"
32 ab2f42e7 2019-11-10 stsp #include "got_error.h"
33 ab2f42e7 2019-11-10 stsp
34 ab2f42e7 2019-11-10 stsp #include "got_lib_delta.h"
35 ab2f42e7 2019-11-10 stsp #include "got_lib_inflate.h"
36 ab2f42e7 2019-11-10 stsp #include "got_lib_object.h"
37 ab2f42e7 2019-11-10 stsp #include "got_lib_delta_cache.h"
38 ab2f42e7 2019-11-10 stsp
39 ab2f42e7 2019-11-10 stsp #ifndef nitems
40 ab2f42e7 2019-11-10 stsp #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
41 ab2f42e7 2019-11-10 stsp #endif
42 ab2f42e7 2019-11-10 stsp
43 dac5c75e 2022-06-04 stsp #define GOT_DELTA_CACHE_MIN_BUCKETS 64
44 99f7567e 2022-11-08 stsp #define GOT_DELTA_CACHE_MAX_BUCKETS 2048
45 dac5c75e 2022-06-04 stsp #define GOT_DELTA_CACHE_MAX_CHAIN 2
46 99f7567e 2022-11-08 stsp #define GOT_DELTA_CACHE_MAX_DELTA_SIZE 1024
47 dac5c75e 2022-06-04 stsp
48 dac5c75e 2022-06-04 stsp struct got_cached_delta {
49 dac5c75e 2022-06-04 stsp off_t offset;
50 dac5c75e 2022-06-04 stsp uint8_t *data;
51 dac5c75e 2022-06-04 stsp size_t len;
52 ab2f42e7 2019-11-10 stsp };
53 ab2f42e7 2019-11-10 stsp
54 dac5c75e 2022-06-04 stsp struct got_delta_cache_head {
55 dac5c75e 2022-06-04 stsp struct got_cached_delta entries[GOT_DELTA_CACHE_MAX_CHAIN];
56 dac5c75e 2022-06-04 stsp unsigned int nchain;
57 dac5c75e 2022-06-04 stsp };
58 ab2f42e7 2019-11-10 stsp
59 ab2f42e7 2019-11-10 stsp struct got_delta_cache {
60 dac5c75e 2022-06-04 stsp struct got_delta_cache_head *buckets;
61 dac5c75e 2022-06-04 stsp unsigned int nbuckets;
62 dac5c75e 2022-06-04 stsp unsigned int totelem;
63 c3b318d0 2019-11-10 stsp int cache_search;
64 c3b318d0 2019-11-10 stsp int cache_hit;
65 c3b318d0 2019-11-10 stsp int cache_miss;
66 c3b318d0 2019-11-10 stsp int cache_evict;
67 c3b318d0 2019-11-10 stsp int cache_toolarge;
68 b77715b7 2023-04-18 stsp int cache_maxtoolarge;
69 dac5c75e 2022-06-04 stsp unsigned int flags;
70 dac5c75e 2022-06-04 stsp #define GOT_DELTA_CACHE_F_NOMEM 0x01
71 dac5c75e 2022-06-04 stsp SIPHASH_KEY key;
72 ab2f42e7 2019-11-10 stsp };
73 ab2f42e7 2019-11-10 stsp
74 dac5c75e 2022-06-04 stsp const struct got_error *
75 dac5c75e 2022-06-04 stsp got_delta_cache_alloc(struct got_delta_cache **new)
76 ab2f42e7 2019-11-10 stsp {
77 dac5c75e 2022-06-04 stsp const struct got_error *err;
78 ab2f42e7 2019-11-10 stsp struct got_delta_cache *cache;
79 ab2f42e7 2019-11-10 stsp
80 dac5c75e 2022-06-04 stsp *new = NULL;
81 dac5c75e 2022-06-04 stsp
82 ab2f42e7 2019-11-10 stsp cache = calloc(1, sizeof(*cache));
83 ab2f42e7 2019-11-10 stsp if (cache == NULL)
84 dac5c75e 2022-06-04 stsp return got_error_from_errno("calloc");
85 20282b02 2022-06-16 tracey
86 dac5c75e 2022-06-04 stsp cache->buckets = calloc(GOT_DELTA_CACHE_MIN_BUCKETS,
87 dac5c75e 2022-06-04 stsp sizeof(cache->buckets[0]));
88 dac5c75e 2022-06-04 stsp if (cache->buckets == NULL) {
89 dac5c75e 2022-06-04 stsp err = got_error_from_errno("calloc");
90 dac5c75e 2022-06-04 stsp free(cache);
91 dac5c75e 2022-06-04 stsp return err;
92 dac5c75e 2022-06-04 stsp }
93 dac5c75e 2022-06-04 stsp cache->nbuckets = GOT_DELTA_CACHE_MIN_BUCKETS;
94 ab2f42e7 2019-11-10 stsp
95 dac5c75e 2022-06-04 stsp arc4random_buf(&cache->key, sizeof(cache->key));
96 dac5c75e 2022-06-04 stsp *new = cache;
97 dac5c75e 2022-06-04 stsp return NULL;
98 ab2f42e7 2019-11-10 stsp }
99 ab2f42e7 2019-11-10 stsp
100 ab2f42e7 2019-11-10 stsp void
101 ab2f42e7 2019-11-10 stsp got_delta_cache_free(struct got_delta_cache *cache)
102 ab2f42e7 2019-11-10 stsp {
103 dac5c75e 2022-06-04 stsp struct got_cached_delta *delta;
104 dac5c75e 2022-06-04 stsp unsigned int i;
105 ab2f42e7 2019-11-10 stsp
106 b5e1b8cd 2022-12-01 stsp #ifdef GOT_DELTA_CACHE_DEBUG
107 dac5c75e 2022-06-04 stsp fprintf(stderr, "%s: delta cache: %u elements, %d searches, %d hits, "
108 b77715b7 2023-04-18 stsp "%d missed, %d evicted, %d too large (max %d)\n", getprogname(),
109 dac5c75e 2022-06-04 stsp cache->totelem, cache->cache_search, cache->cache_hit,
110 b77715b7 2023-04-18 stsp cache->cache_miss, cache->cache_evict, cache->cache_toolarge,
111 b77715b7 2023-04-18 stsp cache->cache_maxtoolarge);
112 c3b318d0 2019-11-10 stsp #endif
113 dac5c75e 2022-06-04 stsp for (i = 0; i < cache->nbuckets; i++) {
114 dac5c75e 2022-06-04 stsp struct got_delta_cache_head *head;
115 dac5c75e 2022-06-04 stsp int j;
116 dac5c75e 2022-06-04 stsp head = &cache->buckets[i];
117 dac5c75e 2022-06-04 stsp for (j = 0; j < head->nchain; j++) {
118 dac5c75e 2022-06-04 stsp delta = &head->entries[j];
119 dac5c75e 2022-06-04 stsp free(delta->data);
120 dac5c75e 2022-06-04 stsp }
121 ab2f42e7 2019-11-10 stsp }
122 dac5c75e 2022-06-04 stsp free(cache->buckets);
123 ab2f42e7 2019-11-10 stsp free(cache);
124 ab2f42e7 2019-11-10 stsp }
125 ab2f42e7 2019-11-10 stsp
126 dac5c75e 2022-06-04 stsp static uint64_t
127 dac5c75e 2022-06-04 stsp delta_cache_hash(struct got_delta_cache *cache, off_t delta_offset)
128 ab2f42e7 2019-11-10 stsp {
129 dac5c75e 2022-06-04 stsp return SipHash24(&cache->key, &delta_offset, sizeof(delta_offset));
130 dac5c75e 2022-06-04 stsp }
131 ab2f42e7 2019-11-10 stsp
132 22f6beb0 2022-11-08 stsp #ifndef GOT_NO_DELTA_CACHE
133 dac5c75e 2022-06-04 stsp static const struct got_error *
134 dac5c75e 2022-06-04 stsp delta_cache_resize(struct got_delta_cache *cache, unsigned int nbuckets)
135 dac5c75e 2022-06-04 stsp {
136 dac5c75e 2022-06-04 stsp struct got_delta_cache_head *buckets;
137 dac5c75e 2022-06-04 stsp size_t i;
138 ab2f42e7 2019-11-10 stsp
139 dac5c75e 2022-06-04 stsp buckets = calloc(nbuckets, sizeof(buckets[0]));
140 dac5c75e 2022-06-04 stsp if (buckets == NULL) {
141 dac5c75e 2022-06-04 stsp if (errno != ENOMEM)
142 dac5c75e 2022-06-04 stsp return got_error_from_errno("calloc");
143 dac5c75e 2022-06-04 stsp /* Proceed with our current amount of hash buckets. */
144 dac5c75e 2022-06-04 stsp cache->flags |= GOT_DELTA_CACHE_F_NOMEM;
145 dac5c75e 2022-06-04 stsp return NULL;
146 dac5c75e 2022-06-04 stsp }
147 dac5c75e 2022-06-04 stsp
148 dac5c75e 2022-06-04 stsp arc4random_buf(&cache->key, sizeof(cache->key));
149 dac5c75e 2022-06-04 stsp
150 dac5c75e 2022-06-04 stsp for (i = 0; i < cache->nbuckets; i++) {
151 dac5c75e 2022-06-04 stsp unsigned int j;
152 dac5c75e 2022-06-04 stsp for (j = 0; j < cache->buckets[i].nchain; j++) {
153 dac5c75e 2022-06-04 stsp struct got_delta_cache_head *head;
154 dac5c75e 2022-06-04 stsp struct got_cached_delta *delta;
155 dac5c75e 2022-06-04 stsp uint64_t idx;
156 dac5c75e 2022-06-04 stsp delta = &cache->buckets[i].entries[j];
157 dac5c75e 2022-06-04 stsp idx = delta_cache_hash(cache, delta->offset) % nbuckets;
158 dac5c75e 2022-06-04 stsp head = &buckets[idx];
159 dac5c75e 2022-06-04 stsp if (head->nchain < nitems(head->entries)) {
160 dac5c75e 2022-06-04 stsp struct got_cached_delta *new_delta;
161 dac5c75e 2022-06-04 stsp new_delta = &head->entries[head->nchain];
162 dac5c75e 2022-06-04 stsp memcpy(new_delta, delta, sizeof(*new_delta));
163 dac5c75e 2022-06-04 stsp head->nchain++;
164 b43d5a6c 2022-11-08 stsp } else {
165 dac5c75e 2022-06-04 stsp free(delta->data);
166 b43d5a6c 2022-11-08 stsp cache->totelem--;
167 b43d5a6c 2022-11-08 stsp }
168 dac5c75e 2022-06-04 stsp }
169 dac5c75e 2022-06-04 stsp }
170 dac5c75e 2022-06-04 stsp
171 dac5c75e 2022-06-04 stsp free(cache->buckets);
172 dac5c75e 2022-06-04 stsp cache->buckets = buckets;
173 dac5c75e 2022-06-04 stsp cache->nbuckets = nbuckets;
174 dac5c75e 2022-06-04 stsp return NULL;
175 ab2f42e7 2019-11-10 stsp }
176 ab2f42e7 2019-11-10 stsp
177 dac5c75e 2022-06-04 stsp static const struct got_error *
178 dac5c75e 2022-06-04 stsp delta_cache_grow(struct got_delta_cache *cache)
179 dac5c75e 2022-06-04 stsp {
180 dac5c75e 2022-06-04 stsp unsigned int nbuckets;
181 dac5c75e 2022-06-04 stsp
182 dac5c75e 2022-06-04 stsp if ((cache->flags & GOT_DELTA_CACHE_F_NOMEM) ||
183 dac5c75e 2022-06-04 stsp cache->nbuckets == GOT_DELTA_CACHE_MAX_BUCKETS)
184 dac5c75e 2022-06-04 stsp return NULL;
185 dac5c75e 2022-06-04 stsp
186 dac5c75e 2022-06-04 stsp if (cache->nbuckets >= GOT_DELTA_CACHE_MAX_BUCKETS / 2)
187 dac5c75e 2022-06-04 stsp nbuckets = GOT_DELTA_CACHE_MAX_BUCKETS;
188 dac5c75e 2022-06-04 stsp else
189 dac5c75e 2022-06-04 stsp nbuckets = cache->nbuckets * 2;
190 dac5c75e 2022-06-04 stsp
191 dac5c75e 2022-06-04 stsp return delta_cache_resize(cache, nbuckets);
192 dac5c75e 2022-06-04 stsp }
193 20282b02 2022-06-16 tracey #endif
194 dac5c75e 2022-06-04 stsp
195 ab2f42e7 2019-11-10 stsp const struct got_error *
196 ab2f42e7 2019-11-10 stsp got_delta_cache_add(struct got_delta_cache *cache,
197 ab2f42e7 2019-11-10 stsp off_t delta_data_offset, uint8_t *delta_data, size_t delta_len)
198 ab2f42e7 2019-11-10 stsp {
199 22f6beb0 2022-11-08 stsp #ifdef GOT_NO_DELTA_CACHE
200 fa7a529e 2020-01-06 stsp return got_error(GOT_ERR_NO_SPACE);
201 fa7a529e 2020-01-06 stsp #else
202 dac5c75e 2022-06-04 stsp const struct got_error *err = NULL;
203 dac5c75e 2022-06-04 stsp struct got_cached_delta *delta;
204 dac5c75e 2022-06-04 stsp struct got_delta_cache_head *head;
205 dac5c75e 2022-06-04 stsp uint64_t idx;
206 ab2f42e7 2019-11-10 stsp
207 4264b841 2022-06-04 stsp if (delta_len > GOT_DELTA_CACHE_MAX_DELTA_SIZE) {
208 c3b318d0 2019-11-10 stsp cache->cache_toolarge++;
209 b77715b7 2023-04-18 stsp if (delta_len > cache->cache_maxtoolarge)
210 b77715b7 2023-04-18 stsp cache->cache_maxtoolarge = delta_len;
211 ab2f42e7 2019-11-10 stsp return got_error(GOT_ERR_NO_SPACE);
212 c3b318d0 2019-11-10 stsp }
213 ab2f42e7 2019-11-10 stsp
214 dac5c75e 2022-06-04 stsp if (cache->nbuckets * 3 < cache->totelem * 4) {
215 dac5c75e 2022-06-04 stsp err = delta_cache_grow(cache);
216 dac5c75e 2022-06-04 stsp if (err)
217 dac5c75e 2022-06-04 stsp return err;
218 dac5c75e 2022-06-04 stsp }
219 ab2f42e7 2019-11-10 stsp
220 dac5c75e 2022-06-04 stsp idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets;
221 dac5c75e 2022-06-04 stsp head = &cache->buckets[idx];
222 dac5c75e 2022-06-04 stsp if (head->nchain >= nitems(head->entries)) {
223 dac5c75e 2022-06-04 stsp delta = &head->entries[head->nchain - 1];
224 dac5c75e 2022-06-04 stsp free(delta->data);
225 dac5c75e 2022-06-04 stsp memset(delta, 0, sizeof(*delta));
226 dac5c75e 2022-06-04 stsp head->nchain--;
227 b43d5a6c 2022-11-08 stsp cache->totelem--;
228 b43d5a6c 2022-11-08 stsp cache->cache_evict++;
229 dac5c75e 2022-06-04 stsp }
230 ab2f42e7 2019-11-10 stsp
231 dac5c75e 2022-06-04 stsp delta = &head->entries[head->nchain];
232 dac5c75e 2022-06-04 stsp delta->offset = delta_data_offset;
233 dac5c75e 2022-06-04 stsp delta->data = delta_data;
234 dac5c75e 2022-06-04 stsp delta->len = delta_len;
235 dac5c75e 2022-06-04 stsp head->nchain++;
236 dac5c75e 2022-06-04 stsp cache->totelem++;
237 ab2f42e7 2019-11-10 stsp
238 ab2f42e7 2019-11-10 stsp return NULL;
239 fa7a529e 2020-01-06 stsp #endif
240 ab2f42e7 2019-11-10 stsp }
241 ab2f42e7 2019-11-10 stsp
242 ab2f42e7 2019-11-10 stsp void
243 ab2f42e7 2019-11-10 stsp got_delta_cache_get(uint8_t **delta_data, size_t *delta_len,
244 ab2f42e7 2019-11-10 stsp struct got_delta_cache *cache, off_t delta_data_offset)
245 ab2f42e7 2019-11-10 stsp {
246 dac5c75e 2022-06-04 stsp uint64_t idx;
247 dac5c75e 2022-06-04 stsp struct got_delta_cache_head *head;
248 dac5c75e 2022-06-04 stsp struct got_cached_delta *delta;
249 dac5c75e 2022-06-04 stsp int i;
250 ab2f42e7 2019-11-10 stsp
251 dac5c75e 2022-06-04 stsp idx = delta_cache_hash(cache, delta_data_offset) % cache->nbuckets;
252 dac5c75e 2022-06-04 stsp head = &cache->buckets[idx];
253 dac5c75e 2022-06-04 stsp
254 c3b318d0 2019-11-10 stsp cache->cache_search++;
255 ab2f42e7 2019-11-10 stsp *delta_data = NULL;
256 ab2f42e7 2019-11-10 stsp *delta_len = 0;
257 dac5c75e 2022-06-04 stsp for (i = 0; i < head->nchain; i++) {
258 dac5c75e 2022-06-04 stsp delta = &head->entries[i];
259 dac5c75e 2022-06-04 stsp if (delta->offset != delta_data_offset)
260 ab2f42e7 2019-11-10 stsp continue;
261 c3b318d0 2019-11-10 stsp cache->cache_hit++;
262 dac5c75e 2022-06-04 stsp if (i > 0) {
263 dac5c75e 2022-06-04 stsp struct got_cached_delta tmp;
264 dac5c75e 2022-06-04 stsp memcpy(&tmp, &head->entries[0], sizeof(tmp));
265 dac5c75e 2022-06-04 stsp memcpy(&head->entries[0], &head->entries[i],
266 dac5c75e 2022-06-04 stsp sizeof(head->entries[0]));
267 dac5c75e 2022-06-04 stsp memcpy(&head->entries[i], &tmp,
268 dac5c75e 2022-06-04 stsp sizeof(head->entries[i]));
269 dac5c75e 2022-06-04 stsp delta = &head->entries[0];
270 ab2f42e7 2019-11-10 stsp }
271 dac5c75e 2022-06-04 stsp *delta_data = delta->data;
272 dac5c75e 2022-06-04 stsp *delta_len = delta->len;
273 ab2f42e7 2019-11-10 stsp return;
274 ab2f42e7 2019-11-10 stsp }
275 c3b318d0 2019-11-10 stsp
276 c3b318d0 2019-11-10 stsp cache->cache_miss++;
277 ab2f42e7 2019-11-10 stsp }