Commit Diff


commit - ea4ee74a619549cdfbbf7f824599834f31b72838
commit + 9a7c12cfb1e1a7e36813ba8898c5281d5a0dbc30
blob - 5278c5830ad6b4af4c8d43dbbe466b6a4851dd25
blob + 030c504a6a789b22f0cf84f810b9b06dd2a011a0
--- gotadmin/gotadmin.1
+++ gotadmin/gotadmin.1
@@ -232,8 +232,9 @@ and a break-down of the number of objects per object t
 .Op Fl r Ar repository-path
 .Xc
 .Dl Pq alias: Cm cl
-Purge unreferenced loose objects from the repository and display
-the amount of disk space which has been freed as a result.
+Purge unreferenced loose objects and redundant pack files from the
+repository and display the amount of disk space which has been freed
+as a result.
 .Pp
 Unreferenced objects are present in the repository but cannot be
 reached via any reference in the entire
@@ -246,12 +247,18 @@ directory,
 spread across 256 sub-directories named after the 256 possible
 hexadecimal values of the first byte of an object identifier.
 .Pp
-Packed objects stored in pack files under
-.Pa objects/pack/
-will not be purged.
-However, if redundant copies of packed objects exist in loose form,
-such redundant copies will be purged.
+Packed objects are stored in pack files under
+.Pa objects/pack/ .
+.Pp
+If redundant copies of packed objects exist in loose form, such
+redundant copies will be purged.
+If all the objects of a pack file are present in other pack files,
+the redundant pack file will be purged.
 .Pp
+For compatibility with Git, if a matching file
+.Pa .keep
+exists for a given pack file, this pack file won't be removed.
+.Pp
 Objects will usually become unreferenced as a result of deleting
 branches or tags with
 .Cm got branch -d
@@ -261,6 +268,11 @@ Deleting arbitrary references with
 .Cm got ref -d
 may also leave unreferenced objects behind.
 .Pp
+Pack files will usually become redundant as a result of repacking the
+repository with
+.Nm
+.Cm pack Fl a .
+.Pp
 In order to determine the set of objects which are referenced, search
 all references for commit objects and tag objects, and traverse the
 corresponding tree object hierarchies.
blob - dee750d952183c77a15074c748c77e5a2510a5e5
blob + b374606457024caef32ca71ee66bd3057552e4b1
--- gotadmin/gotadmin.c
+++ gotadmin/gotadmin.c
@@ -1136,16 +1136,19 @@ struct got_cleanup_progress_arg {
 	int last_nloose;
 	int last_ncommits;
 	int last_npurged;
+	int last_nredundant;
 	int verbosity;
 	int printed_something;
 	int dry_run;
 };
 
 static const struct got_error *
-cleanup_progress(void *arg, int nloose, int ncommits, int npurged)
+cleanup_progress(void *arg, int nloose, int ncommits, int npurged,
+    int nredundant)
 {
 	struct got_cleanup_progress_arg *a = arg;
 	int print_loose = 0, print_commits = 0, print_purged = 0;
+	int print_redundant = 0;
 
 	if (a->last_nloose != nloose) {
 		print_loose = 1;
@@ -1162,11 +1165,15 @@ cleanup_progress(void *arg, int nloose, int ncommits, 
 		print_purged = 1;
 		a->last_npurged = npurged;
 	}
+	if (a->last_nredundant != nredundant) {
+		print_redundant = 1;
+		a->last_nredundant = nredundant;
+	}
 
 	if (a->verbosity < 0)
 		return NULL;
 
-	if (print_loose || print_commits || print_purged)
+	if (print_loose || print_commits || print_purged || print_redundant)
 		printf("\r");
 	if (print_loose)
 		printf("%d loose object%s", nloose, nloose == 1 ? "" : "s");
@@ -1182,7 +1189,16 @@ cleanup_progress(void *arg, int nloose, int ncommits, 
 			    npurged == 1 ? "" : "s");
 		}
 	}
-	if (print_loose || print_commits || print_purged) {
+	if (print_redundant) {
+		if (a->dry_run) {
+			printf("%d pack file%s could be purged", nredundant,
+			    nredundant == 1 ? "" : "s");
+		} else {
+			printf("%d pack file%s purged", nredundant,
+			    nredundant == 1 ? "" : "s");
+		}
+	}
+	if (print_loose || print_commits || print_purged || print_redundant) {
 		a->printed_something = 1;
 		fflush(stdout);
 	}
@@ -1222,10 +1238,14 @@ cmd_cleanup(int argc, char *argv[])
 	int remove_lonely_packidx = 0, ignore_mtime = 0;
 	struct got_cleanup_progress_arg cpa;
 	struct got_lonely_packidx_progress_arg lpa;
-	off_t size_before, size_after;
-	char scaled_before[FMT_SCALED_STRSIZE];
-	char scaled_after[FMT_SCALED_STRSIZE];
-	char scaled_diff[FMT_SCALED_STRSIZE];
+	off_t loose_before, loose_after;
+	off_t pack_before, pack_after;
+	off_t total_size;
+	char loose_before_scaled[FMT_SCALED_STRSIZE];
+	char loose_after_scaled[FMT_SCALED_STRSIZE];
+	char pack_before_scaled[FMT_SCALED_STRSIZE];
+	char pack_after_scaled[FMT_SCALED_STRSIZE];
+	char total_size_scaled[FMT_SCALED_STRSIZE];
 	int *pack_fds = NULL;
 
 #ifndef PROFILE
@@ -1299,37 +1319,67 @@ cmd_cleanup(int argc, char *argv[])
 	memset(&cpa, 0, sizeof(cpa));
 	cpa.last_ncommits = -1;
 	cpa.last_npurged = -1;
+	cpa.last_nredundant = -1;
 	cpa.dry_run = dry_run;
 	cpa.verbosity = verbosity;
+
 	error = got_repo_purge_unreferenced_loose_objects(repo,
-	    &size_before, &size_after, &npacked, dry_run, ignore_mtime,
+	    &loose_before, &loose_after, &npacked, dry_run, ignore_mtime,
 	    cleanup_progress, &cpa, check_cancelled, NULL);
 	if (cpa.printed_something)
 		printf("\n");
+	if (error)
+		goto done;
+
+	cpa.printed_something = 0;
+	cpa.last_ncommits = -1;
+	cpa.last_npurged = -1;
+	cpa.last_nloose = -1;
+	cpa.last_nredundant = -1;
+	error = got_repo_purge_redundant_packfiles(repo, &pack_before,
+	    &pack_after, dry_run, cleanup_progress, &cpa,
+	    check_cancelled, NULL);
 	if (error)
 		goto done;
+	if (cpa.printed_something)
+		printf("\n");
+
+	total_size = (loose_before - loose_after) + (pack_before - pack_after);
+
 	if (cpa.printed_something) {
-		if (fmt_scaled(size_before, scaled_before) == -1) {
+		if (fmt_scaled(loose_before, loose_before_scaled) == -1) {
 			error = got_error_from_errno("fmt_scaled");
 			goto done;
 		}
-		if (fmt_scaled(size_after, scaled_after) == -1) {
+		if (fmt_scaled(loose_after, loose_after_scaled) == -1) {
 			error = got_error_from_errno("fmt_scaled");
 			goto done;
 		}
-		if (fmt_scaled(size_before - size_after, scaled_diff) == -1) {
+		if (fmt_scaled(pack_before, pack_before_scaled) == -1) {
 			error = got_error_from_errno("fmt_scaled");
 			goto done;
 		}
-		printf("loose total size before: %s\n", scaled_before);
-		printf("loose total size after: %s\n", scaled_after);
+		if (fmt_scaled(pack_after, pack_after_scaled) == -1) {
+			error = got_error_from_errno("fmt_scaled");
+			goto done;
+		}
+		if (fmt_scaled(total_size, total_size_scaled) == -1) {
+			error = got_error_from_errno("fmt_scaled");
+			goto done;
+		}
+		printf("loose total size before: %s\n", loose_before_scaled);
+		printf("loose total size after: %s\n", loose_after_scaled);
+		printf("pack files total size before: %s\n",
+		    pack_before_scaled);
+		printf("pack files total size after: %s\n", pack_after_scaled);
 		if (dry_run) {
 			printf("disk space which would be freed: %s\n",
-			    scaled_diff);
+			    total_size_scaled);
 		} else
-			printf("disk space freed: %s\n", scaled_diff);
+			printf("disk space freed: %s\n", total_size_scaled);
 		printf("loose objects also found in pack files: %d\n", npacked);
 	}
+
 done:
 	if (repo)
 		got_repo_close(repo);
blob - 5ddd50191b8acfb903877b148bb856de99014b07
blob + 57bb3f85a90ec6fdf48dc338be1a61ccab5a133c
--- include/got_repository_admin.h
+++ include/got_repository_admin.h
@@ -70,7 +70,7 @@ got_repo_list_pack(FILE *packfile, struct got_object_i
 
 /* A callback function which gets invoked with cleanup information to print. */
 typedef const struct got_error *(*got_cleanup_progress_cb)(void *arg,
-    int nloose, int ncommits, int npurged);
+    int nloose, int ncommits, int npurged, int nredundant);
 
 /*
  * Walk objects reachable via references to determine whether any loose
@@ -88,6 +88,12 @@ got_repo_purge_unreferenced_loose_objects(struct got_r
     int ignore_mtime, got_cleanup_progress_cb progress_cb, void *progress_arg,
     got_cancel_cb cancel_cb, void *cancel_arg);
 
+const struct got_error *
+got_repo_purge_redundant_packfiles(struct got_repository *repo,
+    off_t *before, off_t *size_after, int dry_run,
+    got_cleanup_progress_cb progress_cb, void *progress_arg,
+    got_cancel_cb cancel_cb, void *cancel_arg);
+
 /* A callback function which gets invoked with cleanup information to print. */
 typedef const struct got_error *(*got_lonely_packidx_progress_cb)(void *arg,
     const char *path);
blob - 48bed8a15e5f885c25ca796741f37fb70f1b87e3
blob + 213bc1e83451612e1e2ba0be6bf664e3f4f44050
--- lib/repository_admin.c
+++ lib/repository_admin.c
@@ -628,7 +628,7 @@ report_cleanup_progress(got_cleanup_progress_cb progre
 	if (err || !elapsed)
 		return err;
 
-	return progress_cb(progress_arg, nloose, ncommits, npurged);
+	return progress_cb(progress_arg, nloose, ncommits, npurged, -1);
 }
 
 static const struct got_error *
@@ -1153,7 +1153,7 @@ got_repo_purge_unreferenced_loose_objects(struct got_r
 	if (nloose == 0) {
 		got_object_idset_free(loose_ids);
 		if (progress_cb) {
-			err = progress_cb(progress_arg, 0, 0, 0);
+			err = progress_cb(progress_arg, 0, 0, 0, -1);
 			if (err)
 				return err;
 		}
@@ -1217,13 +1217,205 @@ got_repo_purge_unreferenced_loose_objects(struct got_r
 
 	/* Produce a final progress report. */
 	if (progress_cb) {
-		err = progress_cb(progress_arg, nloose, ncommits, arg.npurged);
+		err = progress_cb(progress_arg, nloose, ncommits, arg.npurged,
+		    -1);
 		if (err)
 			goto done;
 	}
 done:
 	got_object_idset_free(loose_ids);
 	got_object_idset_free(traversed_ids);
+	return err;
+}
+
+static const struct got_error *
+purge_redundant_pack(struct got_repository *repo, const char *packidx_path,
+    int dry_run, int *remove, off_t *size_before, off_t *size_after)
+{
+	static const char *ext[] = {".idx", ".pack", ".rev", ".bitmap",
+	    ".promisor", ".mtimes"};
+	struct stat sb;
+	char *dot, path[PATH_MAX];
+	size_t i;
+
+	if (strlcpy(path, packidx_path, sizeof(path)) >=
+	    sizeof(path))
+		return got_error(GOT_ERR_NO_SPACE);
+
+	/*
+	 * For compatibility with Git, if a matching .keep file exist
+	 * don't delete the packfile.
+	 */
+	dot = strrchr(path, '.');
+	*dot = '\0';
+	if (strlcat(path, ".keep", sizeof(path)) >= sizeof(path))
+		return got_error(GOT_ERR_NO_SPACE);
+	if (faccessat(got_repo_get_fd(repo), path, F_OK, 0) == 0)
+		*remove = 0;
+
+	for (i = 0; i < nitems(ext); ++i) {
+		*dot = '\0';
+
+		if (strlcat(path, ext[i], sizeof(path)) >=
+		    sizeof(path))
+			return got_error(GOT_ERR_NO_SPACE);
+
+		if (fstatat(got_repo_get_fd(repo), path, &sb, 0) ==
+		    -1) {
+			if (errno == ENOENT &&
+			    strcmp(ext[i], ".pack") != 0 &&
+			    strcmp(ext[i], ".idx") != 0)
+				continue;
+			return got_error_from_errno2("fstatat", path);
+		}
+
+		*size_before += sb.st_size;
+		if (!*remove) {
+			*size_after += sb.st_size;
+			continue;
+		}
+
+		if (dry_run)
+			continue;
+
+		if (unlinkat(got_repo_get_fd(repo), path, 0) == -1) {
+			if (errno == ENOENT)
+				continue;
+			return got_error_from_errno2("unlinkat",
+			    path);
+		}
+	}
+
+	return NULL;
+}
+
+static const struct got_error *
+pack_is_redundant(int *redundant, struct got_repository *repo,
+    const char *packidx_path, struct got_object_idset *idset)
+{
+	const struct got_error *err;
+	struct got_packidx *packidx;
+	struct got_packidx_object_id *pid;
+	struct got_object_id id;
+	size_t i, nobjects;
+
+	*redundant = 1;
+
+	err = got_repo_get_packidx(&packidx, packidx_path, repo);
+	if (err)
+		return err;
+
+	nobjects = be32toh(packidx->hdr.fanout_table[0xff]);
+	for (i = 0; i < nobjects; ++i) {
+		pid = &packidx->hdr.sorted_ids[i];
+
+		memset(&id, 0, sizeof(id));
+		memcpy(&id.sha1, pid->sha1, sizeof(id.sha1));
+
+		if (got_object_idset_contains(idset, &id))
+			continue;
+
+		*redundant = 0;
+		err = got_object_idset_add(idset, &id, NULL);
+		if (err)
+			return err;
+	}
+
+	return NULL;
+}
+
+struct pack_info {
+	const char	*path;
+	size_t		 nobjects;
+};
+
+static int
+pack_info_cmp(const void *a, const void *b)
+{
+	const struct pack_info	*pa, *pb;
+
+	pa = a;
+	pb = b;
+	if (pa->nobjects == pb->nobjects)
+		return strcmp(pa->path, pb->path);
+	if (pa->nobjects > pb->nobjects)
+		return -1;
+	return 1;
+}
+
+const struct got_error *
+got_repo_purge_redundant_packfiles(struct got_repository *repo,
+    off_t *size_before, off_t *size_after, int dry_run,
+    got_cleanup_progress_cb progress_cb, void *progress_arg,
+    got_cancel_cb cancel_cb, void *cancel_arg)
+{
+	const struct got_error *err;
+	struct pack_info *pinfo, *sorted = NULL;
+	struct got_packidx *packidx;
+	struct got_object_idset *idset = NULL;
+	struct got_pathlist_entry *pe;
+	size_t i, npacks;
+	int remove, redundant_packs = 0;
+
+	*size_before = 0;
+	*size_after = 0;
+
+	npacks = 0;
+	TAILQ_FOREACH(pe, &repo->packidx_paths, entry)
+		npacks++;
+
+	if (npacks == 0)
+		return NULL;
+
+	sorted = calloc(npacks, sizeof(*sorted));
+	if (sorted == NULL)
+		return got_error_from_errno("calloc");
+
+	i = 0;
+	TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
+		err = got_repo_get_packidx(&packidx, pe->path, repo);
+		if (err)
+			goto done;
+
+		pinfo = &sorted[i++];
+		pinfo->path = pe->path;
+		pinfo->nobjects = be32toh(packidx->hdr.fanout_table[0xff]);
+	}
+	qsort(sorted, npacks, sizeof(*sorted), pack_info_cmp);
+
+	idset = got_object_idset_alloc();
+	if (idset == NULL) {
+		err = got_error_from_errno("got_object_idset_alloc");
+		goto done;
+	}
+
+	for (i = 0; i < npacks; ++i) {
+		if (cancel_cb) {
+			err = (*cancel_cb)(cancel_arg);
+			if (err)
+				break;
+		}
+
+		err = pack_is_redundant(&remove, repo, sorted[i].path, idset);
+		if (err)
+			goto done;
+		err = purge_redundant_pack(repo, sorted[i].path, dry_run,
+		    &remove, size_before, size_after);
+		if (err)
+			goto done;
+		if (!remove)
+			continue;
+		err = progress_cb(progress_arg, -1, -1, -1,
+		    ++redundant_packs);
+		if (err)
+			goto done;
+	}
+
+	err = progress_cb(progress_arg, -1, -1, -1, redundant_packs);
+ done:
+	free(sorted);
+	if (idset)
+		got_object_idset_free(idset);
 	return err;
 }
 
blob - 2381958704d73cb1ebaeb0f7af8d2bf01b7d434f
blob + f6ddbb9657a29cfb443245457d038aee5b06d364
--- regress/cmdline/cleanup.sh
+++ regress/cmdline/cleanup.sh
@@ -236,7 +236,123 @@ test_cleanup_redundant_loose_objects() {
 
 	test_done "$testroot" "$ret"
 }
+
+test_cleanup_redundant_pack_files() {
+	local testroot=`test_init cleanup_redundant_pack_files`
+
+	# no pack files should exist yet
+
+	n=$(gotadmin info -r "$testroot/repo" | awk '/^pack files/{print $3}')
+	if [ "$n" -ne 0 ]; then
+		echo "expected no pack file to exists, $n found" >&2
+		test_done "$testroot" 1
+		return 1
+	fi
+
+	# create a redundant pack with an associated .keep file
+	hash=$(gotadmin pack -a -r "$testroot/repo" \
+		| awk '/^Indexed/{print $2}')
+	kpack="$testroot/repo/.git/objects/pack/pack-$hash"
+	touch "${kpack%.pack}.keep"
+
+	# create a few pack files with different objects
+	for i in `jot 5`; do
+		echo "alpha $i" > $testroot/repo/alpha
+		git_commit "$testroot/repo" -m "edit #$i"
+		gotadmin pack -r "$testroot/repo" >/dev/null
+	done
+
+	# create two packs with all the objects
+	gotadmin pack -a -r "$testroot/repo" >/dev/null
+	gotadmin pack -a -r "$testroot/repo" >/dev/null
+
+	gotadmin cleanup -r "$testroot/repo" | grep 'pack files? purged' \
+		| tail -1 > $testroot/stdout
 
+	echo "5 pack files purged" > $testroot/stdout.expected
+	if cmp -s "$testroot/stdout.expected" "$testroot/stdout"; then
+		diff -u "$testroot/stdout.expected" "$testroot/stdout"
+		test_done "$testroot" 1
+		return 1
+	fi
+
+	n=$(gotadmin info -r "$testroot/repo" | awk '/^pack files/{print $3}')
+	if [ "$n" -ne 2 ]; then
+		echo "expected 2 pack files left, $n found instead" >&2
+		test_done "$testroot" 1
+		return 1
+	fi
+
+	if [ ! -f "$kpack" ]; then
+		echo "$kpack disappeared unexpectedly" >&2
+		test_done "$testroot" 1
+		return 1
+	fi
+
+	if [ ! -f "${kpack%.pack}.keep" ]; then
+		echo "${kpack%.pack}.keep disappeared unexpectedly" >&2
+		test_done "$testroot" 1
+		return 1
+	fi
+
+	# create one more non-redundant pack
+	for i in `jot 5`; do
+		echo "alpha again $i" > $testroot/repo/alpha
+		git_commit "$testroot/repo" -m "edit $i"
+	done
+	gotadmin pack -r "$testroot/repo" >/dev/null
+
+	gotadmin cleanup -r "$testroot/repo" | grep 'pack files? purged' \
+	    | tail -1 > $testroot/stdout
+
+	echo "0 pack files purged" > $testroot/stdout.expected
+	if cmp -s "$testroot/stdout.expected" "$testroot/stdout"; then
+		diff -u "$testroot/stdout.expected" "$testroot/stdout"
+		test_done "$testroot" 1
+		return 1
+	fi
+
+	n=$(gotadmin info -r "$testroot/repo" | awk '/^pack files/{print $3}')
+	if [ "$n" -ne 3 ]; then
+		echo "expected 3 pack files left, $n found instead" >&2
+		test_done "$testroot" 1
+		return 1
+	fi
+
+	# remove the .keep file
+	rm "${kpack%.pack}.keep"
+
+	# create some commits on a separate branch
+	(cd "$testroot/repo" && git checkout -q -b newbranch)
+
+	for i in `jot 5`; do
+		echo "alpha $i" > $testroot/repo/alpha
+		git_commit "$testroot/repo" -m "edit #$i"
+		gotadmin pack -r "$testroot/repo" >/dev/null
+	done
+
+	gotadmin pack -a -x master -r "$testroot/repo" >/dev/null
+
+	gotadmin cleanup -r "$testroot/repo" | grep 'pack files? purged' \
+		| tail -1 > $testroot/stdout
+
+	echo "6 pack files purged" > $testroot/stdout.expected
+	if cmp -s "$testroot/stdout.expected" "$testroot/stdout"; then
+		diff -u "$testroot/stdout.expected" "$testroot/stdout"
+		test_done "$testroot" 1
+		return 1
+	fi
+
+	n=$(gotadmin info -r "$testroot/repo" | awk '/^pack files/{print $3}')
+	if [ "$n" -ne 3 ]; then
+		echo "expected 3 pack files left, $n found instead" >&2
+		test_done "$testroot" 1
+		return 1
+	fi
+
+	test_done "$testroot" 0
+}
+
 test_cleanup_precious_objects() {
 	local testroot=`test_init cleanup_precious_objects`
 
@@ -365,5 +481,6 @@ test_cleanup_missing_pack_file() {
 test_parseargs "$@"
 run_test test_cleanup_unreferenced_loose_objects
 run_test test_cleanup_redundant_loose_objects
+run_test test_cleanup_redundant_pack_files
 run_test test_cleanup_precious_objects
 run_test test_cleanup_missing_pack_file