commit - 0b47703514fc6cbbe5318d575bb1cce7152aadcb
commit + afd0da3878d7acfc3872448325112986efb1dbb9
blob - 5278c5830ad6b4af4c8d43dbbe466b6a4851dd25
blob + 030c504a6a789b22f0cf84f810b9b06dd2a011a0
--- gotadmin/gotadmin.1
+++ gotadmin/gotadmin.1
.Op Fl r Ar repository-path
.Xc
.Dl Pq alias: Cm cl
-Purge unreferenced loose objects from the repository and display
-the amount of disk space which has been freed as a result.
+Purge unreferenced loose objects and redundant pack files from the
+repository and display the amount of disk space which has been freed
+as a result.
.Pp
Unreferenced objects are present in the repository but cannot be
reached via any reference in the entire
spread across 256 sub-directories named after the 256 possible
hexadecimal values of the first byte of an object identifier.
.Pp
-Packed objects stored in pack files under
-.Pa objects/pack/
-will not be purged.
-However, if redundant copies of packed objects exist in loose form,
-such redundant copies will be purged.
+Packed objects are stored in pack files under
+.Pa objects/pack/ .
+.Pp
+If redundant copies of packed objects exist in loose form, such
+redundant copies will be purged.
+If all the objects of a pack file are present in other pack files,
+the redundant pack file will be purged.
.Pp
+For compatibility with Git, if a matching file
+.Pa .keep
+exists for a given pack file, this pack file won't be removed.
+.Pp
Objects will usually become unreferenced as a result of deleting
branches or tags with
.Cm got branch -d
.Cm got ref -d
may also leave unreferenced objects behind.
.Pp
+Pack files will usually become redundant as a result of repacking the
+repository with
+.Nm
+.Cm pack Fl a .
+.Pp
In order to determine the set of objects which are referenced, search
all references for commit objects and tag objects, and traverse the
corresponding tree object hierarchies.
blob - a72a024f791dd1baf5ea67c1f1416a7f42950866
blob + 5be1d1bbffc9287159df30b4dc3e4420bda2539b
--- gotadmin/gotadmin.c
+++ gotadmin/gotadmin.c
int last_nloose;
int last_ncommits;
int last_npurged;
+ int last_nredundant;
int verbosity;
int printed_something;
int dry_run;
};
static const struct got_error *
-cleanup_progress(void *arg, int nloose, int ncommits, int npurged)
+cleanup_progress(void *arg, int nloose, int ncommits, int npurged,
+ int nredundant)
{
struct got_cleanup_progress_arg *a = arg;
int print_loose = 0, print_commits = 0, print_purged = 0;
+ int print_redundant = 0;
if (a->last_nloose != nloose) {
print_loose = 1;
print_purged = 1;
a->last_npurged = npurged;
}
+ if (a->last_nredundant != nredundant) {
+ print_redundant = 1;
+ a->last_nredundant = nredundant;
+ }
if (a->verbosity < 0)
return NULL;
- if (print_loose || print_commits || print_purged)
+ if (print_loose || print_commits || print_purged || print_redundant)
printf("\r");
if (print_loose)
printf("%d loose object%s", nloose, nloose == 1 ? "" : "s");
npurged == 1 ? "" : "s");
}
}
- if (print_loose || print_commits || print_purged) {
+ if (print_redundant) {
+ if (a->dry_run) {
+ printf("%d pack file%s could be purged", nredundant,
+ nredundant == 1 ? "" : "s");
+ } else {
+ printf("%d pack file%s purged", nredundant,
+ nredundant == 1 ? "" : "s");
+ }
+ }
+ if (print_loose || print_commits || print_purged || print_redundant) {
a->printed_something = 1;
fflush(stdout);
}
int remove_lonely_packidx = 0, ignore_mtime = 0;
struct got_cleanup_progress_arg cpa;
struct got_lonely_packidx_progress_arg lpa;
- off_t size_before, size_after;
- char scaled_before[FMT_SCALED_STRSIZE];
- char scaled_after[FMT_SCALED_STRSIZE];
- char scaled_diff[FMT_SCALED_STRSIZE];
+ off_t loose_before, loose_after;
+ off_t pack_before, pack_after;
+ off_t total_size;
+ char loose_before_scaled[FMT_SCALED_STRSIZE];
+ char loose_after_scaled[FMT_SCALED_STRSIZE];
+ char pack_before_scaled[FMT_SCALED_STRSIZE];
+ char pack_after_scaled[FMT_SCALED_STRSIZE];
+ char total_size_scaled[FMT_SCALED_STRSIZE];
int *pack_fds = NULL;
#ifndef PROFILE
memset(&cpa, 0, sizeof(cpa));
cpa.last_ncommits = -1;
cpa.last_npurged = -1;
+ cpa.last_nredundant = -1;
cpa.dry_run = dry_run;
cpa.verbosity = verbosity;
+
error = got_repo_purge_unreferenced_loose_objects(repo,
- &size_before, &size_after, &npacked, dry_run, ignore_mtime,
+ &loose_before, &loose_after, &npacked, dry_run, ignore_mtime,
cleanup_progress, &cpa, check_cancelled, NULL);
if (cpa.printed_something)
printf("\n");
+ if (error)
+ goto done;
+
+ cpa.printed_something = 0;
+ cpa.last_ncommits = -1;
+ cpa.last_npurged = -1;
+ cpa.last_nloose = -1;
+ cpa.last_nredundant = -1;
+ error = got_repo_purge_redundant_packfiles(repo, &pack_before,
+ &pack_after, dry_run, cleanup_progress, &cpa,
+ check_cancelled, NULL);
if (error)
goto done;
+ if (cpa.printed_something)
+ printf("\n");
+
+ total_size = (loose_before - loose_after) + (pack_before - pack_after);
+
if (cpa.printed_something) {
- if (fmt_scaled(size_before, scaled_before) == -1) {
+ if (fmt_scaled(loose_before, loose_before_scaled) == -1) {
error = got_error_from_errno("fmt_scaled");
goto done;
}
- if (fmt_scaled(size_after, scaled_after) == -1) {
+ if (fmt_scaled(loose_after, loose_after_scaled) == -1) {
error = got_error_from_errno("fmt_scaled");
goto done;
}
- if (fmt_scaled(size_before - size_after, scaled_diff) == -1) {
+ if (fmt_scaled(pack_before, pack_before_scaled) == -1) {
error = got_error_from_errno("fmt_scaled");
goto done;
}
- printf("loose total size before: %s\n", scaled_before);
- printf("loose total size after: %s\n", scaled_after);
+ if (fmt_scaled(pack_after, pack_after_scaled) == -1) {
+ error = got_error_from_errno("fmt_scaled");
+ goto done;
+ }
+ if (fmt_scaled(total_size, total_size_scaled) == -1) {
+ error = got_error_from_errno("fmt_scaled");
+ goto done;
+ }
+ printf("loose total size before: %s\n", loose_before_scaled);
+ printf("loose total size after: %s\n", loose_after_scaled);
+ printf("pack files total size before: %s\n",
+ pack_before_scaled);
+ printf("pack files total size after: %s\n", pack_after_scaled);
if (dry_run) {
printf("disk space which would be freed: %s\n",
- scaled_diff);
+ total_size_scaled);
} else
- printf("disk space freed: %s\n", scaled_diff);
+ printf("disk space freed: %s\n", total_size_scaled);
printf("loose objects also found in pack files: %d\n", npacked);
}
+
done:
if (repo)
got_repo_close(repo);
blob - 5ddd50191b8acfb903877b148bb856de99014b07
blob + 57bb3f85a90ec6fdf48dc338be1a61ccab5a133c
--- include/got_repository_admin.h
+++ include/got_repository_admin.h
/* A callback function which gets invoked with cleanup information to print. */
typedef const struct got_error *(*got_cleanup_progress_cb)(void *arg,
- int nloose, int ncommits, int npurged);
+ int nloose, int ncommits, int npurged, int nredundant);
/*
* Walk objects reachable via references to determine whether any loose
int ignore_mtime, got_cleanup_progress_cb progress_cb, void *progress_arg,
got_cancel_cb cancel_cb, void *cancel_arg);
+const struct got_error *
+got_repo_purge_redundant_packfiles(struct got_repository *repo,
+ off_t *before, off_t *size_after, int dry_run,
+ got_cleanup_progress_cb progress_cb, void *progress_arg,
+ got_cancel_cb cancel_cb, void *cancel_arg);
+
/* A callback function which gets invoked with cleanup information to print. */
typedef const struct got_error *(*got_lonely_packidx_progress_cb)(void *arg,
const char *path);
blob - 96c3b2f9a5dbefe4d4825ddaaa5115c2a7d187ff
blob + 2187b12aa9b0522477825183261b30c78d674291
--- lib/repository_admin.c
+++ lib/repository_admin.c
if (err || !elapsed)
return err;
- return progress_cb(progress_arg, nloose, ncommits, npurged);
+ return progress_cb(progress_arg, nloose, ncommits, npurged, -1);
}
static const struct got_error *
if (nloose == 0) {
got_object_idset_free(loose_ids);
if (progress_cb) {
- err = progress_cb(progress_arg, 0, 0, 0);
+ err = progress_cb(progress_arg, 0, 0, 0, -1);
if (err)
return err;
}
/* Produce a final progress report. */
if (progress_cb) {
- err = progress_cb(progress_arg, nloose, ncommits, arg.npurged);
+ err = progress_cb(progress_arg, nloose, ncommits, arg.npurged,
+ -1);
if (err)
goto done;
}
done:
got_object_idset_free(loose_ids);
got_object_idset_free(traversed_ids);
+ return err;
+}
+
+static const struct got_error *
+purge_redundant_pack(struct got_repository *repo, const char *packidx_path,
+ int dry_run, int *remove, off_t *size_before, off_t *size_after)
+{
+ static const char *ext[] = {".idx", ".pack", ".rev", ".bitmap",
+ ".promisor", ".mtimes"};
+ struct stat sb;
+ char *dot, path[PATH_MAX];
+ size_t i;
+
+ if (strlcpy(path, packidx_path, sizeof(path)) >=
+ sizeof(path))
+ return got_error(GOT_ERR_NO_SPACE);
+
+ /*
+ * For compatibility with Git, if a matching .keep file exist
+ * don't delete the packfile.
+ */
+ dot = strrchr(path, '.');
+ *dot = '\0';
+ if (strlcat(path, ".keep", sizeof(path)) >= sizeof(path))
+ return got_error(GOT_ERR_NO_SPACE);
+ if (faccessat(got_repo_get_fd(repo), path, F_OK, 0) == 0)
+ *remove = 0;
+
+ for (i = 0; i < nitems(ext); ++i) {
+ *dot = '\0';
+
+ if (strlcat(path, ext[i], sizeof(path)) >=
+ sizeof(path))
+ return got_error(GOT_ERR_NO_SPACE);
+
+ if (fstatat(got_repo_get_fd(repo), path, &sb, 0) ==
+ -1) {
+ if (errno == ENOENT &&
+ strcmp(ext[i], ".pack") != 0 &&
+ strcmp(ext[i], ".idx") != 0)
+ continue;
+ return got_error_from_errno2("fstatat", path);
+ }
+
+ *size_before += sb.st_size;
+ if (!*remove) {
+ *size_after += sb.st_size;
+ continue;
+ }
+
+ if (dry_run)
+ continue;
+
+ if (unlinkat(got_repo_get_fd(repo), path, 0) == -1) {
+ if (errno == ENOENT)
+ continue;
+ return got_error_from_errno2("unlinkat",
+ path);
+ }
+ }
+
+ return NULL;
+}
+
+static const struct got_error *
+pack_is_redundant(int *redundant, struct got_repository *repo,
+ const char *packidx_path, struct got_object_idset *idset)
+{
+ const struct got_error *err;
+ struct got_packidx *packidx;
+ struct got_packidx_object_id *pid;
+ struct got_object_id id;
+ size_t i, nobjects;
+
+ *redundant = 1;
+
+ err = got_repo_get_packidx(&packidx, packidx_path, repo);
+ if (err)
+ return err;
+
+ nobjects = be32toh(packidx->hdr.fanout_table[0xff]);
+ for (i = 0; i < nobjects; ++i) {
+ pid = &packidx->hdr.sorted_ids[i];
+
+ memset(&id, 0, sizeof(id));
+ memcpy(&id.sha1, pid->sha1, sizeof(id.sha1));
+
+ if (got_object_idset_contains(idset, &id))
+ continue;
+
+ *redundant = 0;
+ err = got_object_idset_add(idset, &id, NULL);
+ if (err)
+ return err;
+ }
+
+ return NULL;
+}
+
+struct pack_info {
+ const char *path;
+ size_t nobjects;
+};
+
+static int
+pack_info_cmp(const void *a, const void *b)
+{
+ const struct pack_info *pa, *pb;
+
+ pa = a;
+ pb = b;
+ if (pa->nobjects == pb->nobjects)
+ return strcmp(pa->path, pb->path);
+ if (pa->nobjects > pb->nobjects)
+ return -1;
+ return 1;
+}
+
+const struct got_error *
+got_repo_purge_redundant_packfiles(struct got_repository *repo,
+ off_t *size_before, off_t *size_after, int dry_run,
+ got_cleanup_progress_cb progress_cb, void *progress_arg,
+ got_cancel_cb cancel_cb, void *cancel_arg)
+{
+ const struct got_error *err;
+ struct pack_info *pinfo, *sorted = NULL;
+ struct got_packidx *packidx;
+ struct got_object_idset *idset = NULL;
+ struct got_pathlist_entry *pe;
+ size_t i, npacks;
+ int remove, redundant_packs = 0;
+
+ *size_before = 0;
+ *size_after = 0;
+
+ npacks = 0;
+ TAILQ_FOREACH(pe, &repo->packidx_paths, entry)
+ npacks++;
+
+ if (npacks == 0)
+ return NULL;
+
+ sorted = calloc(npacks, sizeof(*sorted));
+ if (sorted == NULL)
+ return got_error_from_errno("calloc");
+
+ i = 0;
+ TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
+ err = got_repo_get_packidx(&packidx, pe->path, repo);
+ if (err)
+ goto done;
+
+ pinfo = &sorted[i++];
+ pinfo->path = pe->path;
+ pinfo->nobjects = be32toh(packidx->hdr.fanout_table[0xff]);
+ }
+ qsort(sorted, npacks, sizeof(*sorted), pack_info_cmp);
+
+ idset = got_object_idset_alloc();
+ if (idset == NULL) {
+ err = got_error_from_errno("got_object_idset_alloc");
+ goto done;
+ }
+
+ for (i = 0; i < npacks; ++i) {
+ if (cancel_cb) {
+ err = (*cancel_cb)(cancel_arg);
+ if (err)
+ break;
+ }
+
+ err = pack_is_redundant(&remove, repo, sorted[i].path, idset);
+ if (err)
+ goto done;
+ err = purge_redundant_pack(repo, sorted[i].path, dry_run,
+ &remove, size_before, size_after);
+ if (err)
+ goto done;
+ if (!remove)
+ continue;
+ err = progress_cb(progress_arg, -1, -1, -1,
+ ++redundant_packs);
+ if (err)
+ goto done;
+ }
+
+ err = progress_cb(progress_arg, -1, -1, -1, redundant_packs);
+ done:
+ free(sorted);
+ if (idset)
+ got_object_idset_free(idset);
return err;
}
blob - 2381958704d73cb1ebaeb0f7af8d2bf01b7d434f
blob + f6ddbb9657a29cfb443245457d038aee5b06d364
--- regress/cmdline/cleanup.sh
+++ regress/cmdline/cleanup.sh
test_done "$testroot" "$ret"
}
+
+test_cleanup_redundant_pack_files() {
+ local testroot=`test_init cleanup_redundant_pack_files`
+
+ # no pack files should exist yet
+
+ n=$(gotadmin info -r "$testroot/repo" | awk '/^pack files/{print $3}')
+ if [ "$n" -ne 0 ]; then
+ echo "expected no pack file to exists, $n found" >&2
+ test_done "$testroot" 1
+ return 1
+ fi
+
+ # create a redundant pack with an associated .keep file
+ hash=$(gotadmin pack -a -r "$testroot/repo" \
+ | awk '/^Indexed/{print $2}')
+ kpack="$testroot/repo/.git/objects/pack/pack-$hash"
+ touch "${kpack%.pack}.keep"
+
+ # create a few pack files with different objects
+ for i in `jot 5`; do
+ echo "alpha $i" > $testroot/repo/alpha
+ git_commit "$testroot/repo" -m "edit #$i"
+ gotadmin pack -r "$testroot/repo" >/dev/null
+ done
+
+ # create two packs with all the objects
+ gotadmin pack -a -r "$testroot/repo" >/dev/null
+ gotadmin pack -a -r "$testroot/repo" >/dev/null
+
+ gotadmin cleanup -r "$testroot/repo" | grep 'pack files? purged' \
+ | tail -1 > $testroot/stdout
+ echo "5 pack files purged" > $testroot/stdout.expected
+ if cmp -s "$testroot/stdout.expected" "$testroot/stdout"; then
+ diff -u "$testroot/stdout.expected" "$testroot/stdout"
+ test_done "$testroot" 1
+ return 1
+ fi
+
+ n=$(gotadmin info -r "$testroot/repo" | awk '/^pack files/{print $3}')
+ if [ "$n" -ne 2 ]; then
+ echo "expected 2 pack files left, $n found instead" >&2
+ test_done "$testroot" 1
+ return 1
+ fi
+
+ if [ ! -f "$kpack" ]; then
+ echo "$kpack disappeared unexpectedly" >&2
+ test_done "$testroot" 1
+ return 1
+ fi
+
+ if [ ! -f "${kpack%.pack}.keep" ]; then
+ echo "${kpack%.pack}.keep disappeared unexpectedly" >&2
+ test_done "$testroot" 1
+ return 1
+ fi
+
+ # create one more non-redundant pack
+ for i in `jot 5`; do
+ echo "alpha again $i" > $testroot/repo/alpha
+ git_commit "$testroot/repo" -m "edit $i"
+ done
+ gotadmin pack -r "$testroot/repo" >/dev/null
+
+ gotadmin cleanup -r "$testroot/repo" | grep 'pack files? purged' \
+ | tail -1 > $testroot/stdout
+
+ echo "0 pack files purged" > $testroot/stdout.expected
+ if cmp -s "$testroot/stdout.expected" "$testroot/stdout"; then
+ diff -u "$testroot/stdout.expected" "$testroot/stdout"
+ test_done "$testroot" 1
+ return 1
+ fi
+
+ n=$(gotadmin info -r "$testroot/repo" | awk '/^pack files/{print $3}')
+ if [ "$n" -ne 3 ]; then
+ echo "expected 3 pack files left, $n found instead" >&2
+ test_done "$testroot" 1
+ return 1
+ fi
+
+ # remove the .keep file
+ rm "${kpack%.pack}.keep"
+
+ # create some commits on a separate branch
+ (cd "$testroot/repo" && git checkout -q -b newbranch)
+
+ for i in `jot 5`; do
+ echo "alpha $i" > $testroot/repo/alpha
+ git_commit "$testroot/repo" -m "edit #$i"
+ gotadmin pack -r "$testroot/repo" >/dev/null
+ done
+
+ gotadmin pack -a -x master -r "$testroot/repo" >/dev/null
+
+ gotadmin cleanup -r "$testroot/repo" | grep 'pack files? purged' \
+ | tail -1 > $testroot/stdout
+
+ echo "6 pack files purged" > $testroot/stdout.expected
+ if cmp -s "$testroot/stdout.expected" "$testroot/stdout"; then
+ diff -u "$testroot/stdout.expected" "$testroot/stdout"
+ test_done "$testroot" 1
+ return 1
+ fi
+
+ n=$(gotadmin info -r "$testroot/repo" | awk '/^pack files/{print $3}')
+ if [ "$n" -ne 3 ]; then
+ echo "expected 3 pack files left, $n found instead" >&2
+ test_done "$testroot" 1
+ return 1
+ fi
+
+ test_done "$testroot" 0
+}
+
test_cleanup_precious_objects() {
local testroot=`test_init cleanup_precious_objects`
test_parseargs "$@"
run_test test_cleanup_unreferenced_loose_objects
run_test test_cleanup_redundant_loose_objects
+run_test test_cleanup_redundant_pack_files
run_test test_cleanup_precious_objects
run_test test_cleanup_missing_pack_file