Commit Diff


commit - 54e38878133e773338308e227bc839a2515f14ba
commit + 2633cf30b7cd97910764fc4c016df7959858171d
blob - 1e15dde9d8968aefdeb7e7e8b17ed022ff243ff8
blob + cb91e27f4d5c2c10d9e0bf5fe16e63a697213aad
--- lib/deltify.c
+++ lib/deltify.c
@@ -88,6 +88,9 @@ static const uint32_t geartab[256] = {
     0xf1f6e72c, 0x5551128a, 0x83af87e2, 0x6f0342ba,
 };
 
+static const struct got_error *addblk(struct got_delta_table *, FILE *,
+    uint8_t *, off_t, off_t, off_t, uint32_t);
+
 static uint32_t
 hashblk(const unsigned char *p, off_t n, uint32_t seed)
 {
@@ -95,148 +98,153 @@ hashblk(const unsigned char *p, off_t n, uint32_t seed
 }
 
 static const struct got_error *
-addblk(struct got_delta_table *dt, FILE *f, off_t file_offset0, off_t len,
-    off_t offset, uint32_t h)
+lookup(int *n, struct got_delta_table *dt, FILE *f, off_t file_offset0,
+    off_t len, off_t offset, uint32_t h)
 {
-	const struct got_error *err = NULL;
-	int i;
+	struct got_delta_block *block;
 	uint8_t buf[GOT_DELTIFY_MAXCHUNK];
 	uint8_t buf2[GOT_DELTIFY_MAXCHUNK];
 	size_t r = 0;
+	int i;
 
-	if (len == 0)
-		return NULL;
+	for (i = h % dt->size; dt->offs[i] != 0; i = (i + 1) % dt->size) {
+		block = &dt->blocks[dt->offs[i] - 1];
+		if (block->len != len || block->hash != h)
+			continue;
 
-	i = h % dt->nalloc;
-	while (dt->blocks[i].len != 0) {
-		/*
-		 * Avoid adding duplicate blocks.
-		 * NB: A matching hash is insufficient for detecting equality.
-		 * The hash can only detect inequality.
-		 */
-		if (len == dt->blocks[i].len && h == dt->blocks[i].hash) {
-			if (r == 0) {
-				if (fseeko(f, file_offset0 + offset, SEEK_SET) == -1)
-					return got_error_from_errno("fseeko");
-				r = fread(buf, 1, len, f);
-				if (r != len) {
-					if (!ferror(f))
-						return NULL;
-					return got_ferror(f, GOT_ERR_IO);
-				}
-			}
-			if (fseeko(f, file_offset0 + dt->blocks[i].offset,
-			    SEEK_SET) == -1)
+		if (r == 0) {
+			if (fseeko(f, file_offset0 + offset, SEEK_SET) == -1)
 				return got_error_from_errno("fseeko");
-			if (fread(buf2, 1, len, f) != len)
+			r = fread(buf, 1, len, f);
+			if (r != len) {
+				if (!ferror(f))
+					break;		/* why? */
 				return got_ferror(f, GOT_ERR_IO);
-			if (memcmp(buf, buf2, len) == 0)
-				return NULL;
+			}
 		}
 
-		i = (i + 1) % dt->nalloc;
+		if (fseeko(f, file_offset0 + block->offset, SEEK_SET) == -1)
+			return got_error_from_errno("fseeko");
+		if (fread(buf2, 1, len, f) != len)
+			return got_ferror(f, GOT_ERR_IO);
+		if (memcmp(buf, buf2, len) == 0)
+			break;
 	}
-	assert(dt->blocks[i].len == 0);
-	dt->blocks[i].len = len;
-	dt->blocks[i].offset = offset;
-	dt->blocks[i].hash = h;
-	dt->nblocks++;
-	if (dt->nalloc < dt->nblocks + 256) {
-		struct got_delta_block *db;
-		size_t old_size = dt->nalloc;
-		db = dt->blocks;
-		dt->blocks = calloc(dt->nalloc + 256,
-		    sizeof(struct got_delta_block));
-		if (dt->blocks == NULL) {
-			err = got_error_from_errno("calloc");
-			dt->blocks = db;
-			return err;
-		}
-		dt->nalloc += 256;
-		/*
-		 * Recompute all block positions. Hash-based indices of blocks
-		 * in the array depend on the allocated length of the array.
-		 */
-		dt->nblocks = 0;
-		for (i = 0; i < old_size; i++) {
-			if (db[i].len == 0)
-				continue;
-			err = addblk(dt, f, file_offset0, db[i].len,
-			    db[i].offset, db[i].hash);
-			if (err)
+
+	*n = i;
+	return NULL;
+}
+
+static const struct got_error *
+lookup_mem(int *n, struct got_delta_table *dt, uint8_t *basedata,
+    off_t basefile_offset0, uint8_t *p, off_t len, uint32_t h)
+{
+	struct got_delta_block *block;
+	uint8_t *d;
+	int i;
+
+	for (i = h % dt->size; dt->offs[i] != 0; i = (i + 1) % dt->size) {
+		block = &dt->blocks[dt->offs[i] - 1];
+		if (len == block->len && h == block->hash) {
+			d = basedata + basefile_offset0 + block->offset;
+			if (memcmp(d, p, len) == 0)
 				break;
 		}
-		free(db);
 	}
 
-	return err;
+	*n = i;
+	return NULL;
 }
 
 static const struct got_error *
-addblk_mem(struct got_delta_table *dt, uint8_t *data, off_t file_offset0,
-    off_t len, off_t offset, uint32_t h)
+resizeht(struct got_delta_table *dt, FILE *f, off_t offset0, uint8_t *data,
+    off_t len)
 {
-	const struct got_error *err = NULL;
-	int i;
-	uint8_t *block1;
-	uint8_t *block2;
+	const struct got_error *err;
+	struct got_delta_block *b;
+	size_t newsize, oldsize;
+	int i, n;
 
-	if (len == 0)
-		return NULL;
+	if (dt->nblocks == dt->nalloc) {
+		newsize = dt->nalloc + 256;
+		b = reallocarray(dt->blocks, newsize, sizeof(*dt->blocks));
+		if (b == NULL)
+			return got_error_from_errno("reallocarray");
+		dt->blocks = b;
+		dt->nalloc = newsize;
+	}
 
-	i = h % dt->nalloc;
-	while (dt->blocks[i].len != 0) {
-		/*
-		 * Avoid adding duplicate blocks.
-		 * NB: A matching hash is insufficient for detecting equality.
-		 * The hash can only detect inequality.
-		 */
-		if (len == dt->blocks[i].len && h == dt->blocks[i].hash) {
-			block1 = data + file_offset0 + dt->blocks[i].offset;
-			block2 = data + file_offset0 + offset;
-			if (memcmp(block1, block2, len) == 0)
-				return NULL;
+	if (dt->blocks == NULL || dt->len * 100 / dt->size > 70) {
+		oldsize = dt->size;
+		dt->size *= 2;
+		free(dt->offs);
+		dt->offs = calloc(dt->size, sizeof(*dt->offs));
+		if (dt->offs == NULL) {
+			dt->size = oldsize;
+			return got_error_from_errno("calloc");
 		}
 
-		i = (i + 1) % dt->nalloc;
-	}
-	assert(dt->blocks[i].len == 0);
-	dt->blocks[i].len = len;
-	dt->blocks[i].offset = offset;
-	dt->blocks[i].hash = h;
-	dt->nblocks++;
-	if (dt->nalloc < dt->nblocks + 256) {
-		struct got_delta_block *db;
-		size_t old_size = dt->nalloc;
-		db = dt->blocks;
-		dt->blocks = calloc(dt->nalloc + 256,
-		    sizeof(struct got_delta_block));
-		if (dt->blocks == NULL) {
-			err = got_error_from_errno("calloc");
-			dt->blocks = db;
-			return err;
+		for (i = 0; i < dt->nblocks; ++i) {
+			b = &dt->blocks[i];
+			if (f)
+				err = lookup(&n, dt, f, offset0, b->len,
+				    b->offset, b->hash);
+			else
+				err = lookup_mem(&n, dt, data, offset0,
+				    data + b->offset, b->len, b->hash);
+			if (err) {
+				free(dt->offs);
+				dt->offs = NULL;
+				dt->size = oldsize;
+				return err;
+			}
+			assert(dt->offs[n] == 0);
+			dt->offs[n] = i + 1;
 		}
-		dt->nalloc += 256;
-		/*
-		 * Recompute all block positions. Hash-based indices of blocks
-		 * in the array depend on the allocated length of the array.
-		 */
-		dt->nblocks = 0;
-		for (i = 0; i < old_size; i++) {
-			if (db[i].len == 0)
-				continue;
-			err = addblk_mem(dt, data, file_offset0, db[i].len,
-			    db[i].offset, db[i].hash);
-			if (err)
-				break;
-		}
-		free(db);
 	}
 
-	return err;
+	return NULL;
 }
 
 static const struct got_error *
+addblk(struct got_delta_table *dt, FILE *f, uint8_t *data, off_t file_offset0,
+    off_t len, off_t offset, uint32_t h)
+{
+	const struct got_error *err;
+	struct got_delta_block *block;
+	int i;
+
+	if (len == 0)
+		return NULL;
+
+	err = resizeht(dt, f, file_offset0, data, len);
+	if (err)
+		return err;
+
+	if (f)
+		err = lookup(&i, dt, f, file_offset0, len, offset, h);
+	else
+		err = lookup_mem(&i, dt, data, file_offset0, data + offset,
+		    len, h);
+	if (err)
+		return err;
+
+	if (dt->offs[i] != 0)
+		return NULL;	/* found */
+
+	dt->offs[i] = dt->nblocks + 1;
+	block = &dt->blocks[dt->nblocks];
+	block->len = len;
+	block->offset = offset;
+	block->hash = h;
+
+	dt->nblocks++;
+	dt->len++;
+
+	return NULL;
+}
+
+static const struct got_error *
 lookupblk(struct got_delta_block **block, struct got_delta_table *dt,
     unsigned char *p, off_t len, uint32_t seed, FILE *basefile,
     off_t basefile_offset0)
@@ -244,24 +252,25 @@ lookupblk(struct got_delta_block **block, struct got_d
 	int i;
 	uint32_t h;
 	uint8_t buf[GOT_DELTIFY_MAXCHUNK];
+	struct got_delta_block *b;
 	size_t r;
 
 	*block = NULL;
 
 	h = hashblk(p, len, seed);
-	for (i = h % dt->nalloc; dt->blocks[i].len != 0;
-	     i = (i + 1) % dt->nalloc) {
-		if (dt->blocks[i].hash != h ||
-		    dt->blocks[i].len != len)
+	
+	for (i = h % dt->size; dt->offs[i] != 0; i = (i + 1) % dt->size) {
+		b = &dt->blocks[dt->offs[i] - 1];
+		if (b->hash != h || b->len != len)
 			continue;
-		if (fseeko(basefile, basefile_offset0 + dt->blocks[i].offset,
+		if (fseeko(basefile, basefile_offset0 + b->offset,
 		    SEEK_SET) == -1)
 			return got_error_from_errno("fseeko");
 		r = fread(buf, 1, len, basefile);
 		if (r != len)
 			return got_ferror(basefile, GOT_ERR_IO);
 		if (memcmp(p, buf, len) == 0) {
-			*block = &dt->blocks[i];
+			*block = b;
 			break;
 		}
 	}
@@ -273,24 +282,17 @@ lookupblk_mem(struct got_delta_block **block, struct g
     unsigned char *p, off_t len, uint32_t seed, uint8_t *basedata,
     off_t basefile_offset0)
 {
-	int i;
+	const struct got_error *err;
+	int n;
 	uint32_t h;
-	uint8_t *b;
 
 	*block = NULL;
-
 	h = hashblk(p, len, seed);
-	for (i = h % dt->nalloc; dt->blocks[i].len != 0;
-	     i = (i + 1) % dt->nalloc) {
-		if (dt->blocks[i].hash != h ||
-		    dt->blocks[i].len != len)
-			continue;
-		b = basedata + basefile_offset0 + dt->blocks[i].offset;
-		if (memcmp(p, b, len) == 0) {
-			*block = &dt->blocks[i];
-			break;
-		}
-	}
+	err = lookup_mem(&n, dt, basedata, basefile_offset0, p, len, h);
+	if (err)
+		return err;
+	if (dt->offs[n] != 0)
+		*block = &dt->blocks[dt->offs[n] - 1];
 	return NULL;
 }
 
@@ -371,6 +373,13 @@ got_deltify_init(struct got_delta_table **dt, FILE *f,
 		goto done;
 	}
 
+	(*dt)->size = 128;
+	(*dt)->offs = calloc((*dt)->size, sizeof(*(*dt)->offs));
+	if ((*dt)->offs == NULL) {
+		err = got_error_from_errno("calloc");
+		goto done;
+	}
+
 	if (fseeko(f, fileoffset, SEEK_SET) == -1)
 		return got_error_from_errno("fseeko");
 
@@ -383,7 +392,7 @@ got_deltify_init(struct got_delta_table **dt, FILE *f,
 		if (blocklen == 0)
 			break;
 		h = hashblk(buf, blocklen, seed);
-		err = addblk(*dt, f, offset0, blocklen,
+		err = addblk(*dt, f, NULL, offset0, blocklen,
 		    fileoffset - offset0, h);
 		if (err)
 			goto done;
@@ -417,6 +426,13 @@ got_deltify_init_mem(struct got_delta_table **dt, uint
 	(*dt)->nalloc = 128;
 	(*dt)->blocks = calloc((*dt)->nalloc, sizeof(struct got_delta_block));
 	if ((*dt)->blocks == NULL) {
+		err = got_error_from_errno("calloc");
+		goto done;
+	}
+
+	(*dt)->size = 128;
+	(*dt)->offs = calloc((*dt)->size, sizeof(*(*dt)->offs));
+	if ((*dt)->offs == NULL) {
 		err = got_error_from_errno("calloc");
 		goto done;
 	}
@@ -429,7 +445,7 @@ got_deltify_init_mem(struct got_delta_table **dt, uint
 		if (blocklen == 0)
 			break;
 		h = hashblk(data + fileoffset, blocklen, seed);
-		err = addblk_mem(*dt, data, offset0, blocklen,
+		err = addblk(*dt, NULL, data, offset0, blocklen,
 		    fileoffset - offset0, h);
 		if (err)
 			goto done;
@@ -451,6 +467,7 @@ got_deltify_free(struct got_delta_table *dt)
 	if (dt == NULL)
 		return;
 	free(dt->blocks);
+	free(dt->offs);
 	free(dt);
 }
 
blob - 23d51d665edd5927c4b9ca18e2ebebf3473b1795
blob + 973f1682b45bdf0b31cfe37ef1473caa9819418d
--- lib/got_lib_deltify.h
+++ lib/got_lib_deltify.h
@@ -25,6 +25,14 @@ struct got_delta_table {
 	struct got_delta_block	*blocks;
 	int			nblocks;
 	int			nalloc;
+
+	/*
+	 * Index for blocks.  offs[n] is zero when the slot is free,
+	 * otherwise it points to blocks[offs[n] - 1].
+	 */
+	uint32_t		*offs;
+	int			 len;
+	int			 size;
 };
 
 struct got_delta_instruction {