commit - 8775a682f984b3741057362377b06011b56c085d
commit + b3d68e7f99c78cbcf672100fb14310e4b4f18482
blob - 9dcb7b3ff76d3dc250925d02e4d75ab276eb5f62
blob + ffead95c145f5b7ed6e82153c2de67b9f2606c78
--- gotadmin/gotadmin.1
+++ gotadmin/gotadmin.1
.It Cm ls
Short alias for
.Cm listpack .
+.It Cm cleanup Oo Fl n Oc Oo Fl r Ar repository-path Oc Oo Fl q Oc
+Purge unreferenced loose objects from the repository and display
+the amount of disk space which has been freed as a result.
+.Pp
+Unreferenced objects are present in the repository but cannot be
+reached via any reference in the entire
+.Pa refs/
+namespace.
+.Pp
+Loose objects are stored as individual files beneath the repository's
+.Pa objects/
+directory,
+spread across 256 sub-directories named after the 256 possible
+hexadecimal values of the first byte of an object identifier.
+.Pp
+Packed objects stored in pack files under
+.Pa objects/pack/
+will not be purged.
+However, if redundant copies of packed objects exist in loose form,
+such redundant copies will be purged.
+.Pp
+Objects will usually become unreferenced as a result of deleting
+branches or tags with
+.Cm got branch -d
+or
+.Cm got tag -d .
+Deleting arbitrary references with
+.Cm got ref -d
+may also leave unreferenced objects behind.
+.Pp
+In order to determine the set of objects which are referenced, search
+all references for commit objects and tag objects, and traverse the
+corresponding tree object hierarchies.
+Any loose object IDs not encountered during this search are unreferenced
+and thus subject to removal.
+Display the number of commits which have been searched to indicate progress.
+.Pp
+References in the
+.Pa refs/got
+namespace may prevent objects from being purged.
+This includes references in the
+.Pa refs/got/worktree
+namespace created by
+.Cm got checkout
+and
+.Cm got update ,
+as well as references in the
+.Pa refs/got/backup
+namespace created by
+.Cm got rebase
+and
+.Cm got histedit .
+.Cm gotadmin cleanup
+will only purge corresponding objects once such references have been
+deleted with
+.Cm got ref -d .
+.Pp
+The options for
+.Cm gotadmin listpack
+are as follows:
+.Bl -tag -width Ds
+.It Fl n
+Display the usual progress output and summary information but do not actually
+purge any objects.
+.It Fl r Ar repository-path
+Use the repository at the specified path.
+If not specified, assume the repository is located at or above the current
+working directory.
+.It Fl q
+Suppress progress reporting and disk space summary output.
.El
+.It Cm cl
+Short alias for
+.Cm cleanup .
+.El
.Sh EXIT STATUS
.Ex -std gotadmin
.Sh SEE ALSO
In particular:
.Bl -bullet
.It
-Reducing the size of repositories by removing redundant or unreferenced
-data requires
-.Xr git-gc 1 .
+Removing redundant or unreferenced packed objects requires
+.Xr git-gc 1
+and perhaps
+.Xr git-repack 1 .
.It
Exporting data from repositories requires
.Xr git-fast-export 1 .
blob - 624efcfd12132d41ed3c551e9867041820d77686
blob + 22ab038974a7575228d4b2fc26a06d6feb886e0f
--- gotadmin/gotadmin.c
+++ gotadmin/gotadmin.c
__dead static void usage_pack(void);
__dead static void usage_indexpack(void);
__dead static void usage_listpack(void);
+__dead static void usage_cleanup(void);
static const struct got_error* cmd_info(int, char *[]);
static const struct got_error* cmd_pack(int, char *[]);
static const struct got_error* cmd_indexpack(int, char *[]);
static const struct got_error* cmd_listpack(int, char *[]);
+static const struct got_error* cmd_cleanup(int, char *[]);
static struct gotadmin_cmd gotadmin_commands[] = {
{ "info", cmd_info, usage_info, "" },
{ "pack", cmd_pack, usage_pack, "" },
{ "indexpack", cmd_indexpack, usage_indexpack,"ix" },
{ "listpack", cmd_listpack, usage_listpack, "ls" },
+ { "cleanup", cmd_cleanup, usage_cleanup, "cl" },
};
static void
free(pack_hash);
free(packfile_path);
return error;
+}
+
+__dead static void
+usage_cleanup(void)
+{
+ fprintf(stderr, "usage: %s cleanup [-n] [-r repository-path] [-q]\n",
+ getprogname());
+ exit(1);
+}
+
+struct got_cleanup_progress_arg {
+ int last_nloose;
+ int last_ncommits;
+ int last_npurged;
+ int verbosity;
+ int printed_something;
+ int dry_run;
+};
+
+static const struct got_error *
+cleanup_progress(void *arg, int nloose, int ncommits, int npurged)
+{
+ struct got_cleanup_progress_arg *a = arg;
+ int print_loose = 0, print_commits = 0, print_purged = 0;
+
+ if (a->last_nloose != nloose) {
+ print_loose = 1;
+ a->last_nloose = nloose;
+ }
+ if (a->last_ncommits != ncommits) {
+ print_loose = 1;
+ print_commits = 1;
+ a->last_ncommits = ncommits;
+ }
+ if (a->last_npurged != npurged) {
+ print_loose = 1;
+ print_commits = 1;
+ print_purged = 1;
+ a->last_npurged = npurged;
+ }
+
+ if (a->verbosity < 0)
+ return NULL;
+
+ if (print_loose || print_commits || print_purged)
+ printf("\r");
+ if (print_loose)
+ printf("%d loose object%s", nloose, nloose == 1 ? "" : "s");
+ if (print_commits)
+ printf("; %d commit%s scanned", ncommits,
+ ncommits == 1 ? "" : "s");
+ if (print_purged) {
+ if (a->dry_run) {
+ printf("; %d object%s could be purged", npurged,
+ npurged == 1 ? "" : "s");
+ } else {
+ printf("; %d object%s purged", npurged,
+ npurged == 1 ? "" : "s");
+ }
+ }
+ if (print_loose || print_commits || print_purged) {
+ a->printed_something = 1;
+ fflush(stdout);
+ }
+ return NULL;
}
+
+static const struct got_error *
+cmd_cleanup(int argc, char *argv[])
+{
+ const struct got_error *error = NULL;
+ char *cwd = NULL, *repo_path = NULL;
+ struct got_repository *repo = NULL;
+ int ch, dry_run = 0, npacked = 0, verbosity = 0;
+ struct got_cleanup_progress_arg cpa;
+ off_t size_before, size_after;
+ char scaled_before[FMT_SCALED_STRSIZE];
+ char scaled_after[FMT_SCALED_STRSIZE];
+ char scaled_diff[FMT_SCALED_STRSIZE];
+
+ while ((ch = getopt(argc, argv, "r:nq")) != -1) {
+ switch (ch) {
+ case 'r':
+ repo_path = realpath(optarg, NULL);
+ if (repo_path == NULL)
+ return got_error_from_errno2("realpath",
+ optarg);
+ got_path_strip_trailing_slashes(repo_path);
+ break;
+ case 'n':
+ dry_run = 1;
+ break;
+ case 'q':
+ verbosity = -1;
+ break;
+ default:
+ usage_cleanup();
+ /* NOTREACHED */
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+#ifndef PROFILE
+ if (pledge("stdio rpath wpath cpath flock proc exec sendfd unveil",
+ NULL) == -1)
+ err(1, "pledge");
+#endif
+ cwd = getcwd(NULL, 0);
+ if (cwd == NULL) {
+ error = got_error_from_errno("getcwd");
+ goto done;
+ }
+
+ error = got_repo_open(&repo, repo_path ? repo_path : cwd, NULL);
+ if (error)
+ goto done;
+
+ error = apply_unveil(got_repo_get_path_git_dir(repo), 0);
+ if (error)
+ goto done;
+
+ memset(&cpa, 0, sizeof(cpa));
+ cpa.last_ncommits = -1;
+ cpa.last_npurged = -1;
+ cpa.dry_run = dry_run;
+ cpa.verbosity = verbosity;
+ error = got_repo_purge_unreferenced_loose_objects(repo,
+ &size_before, &size_after, &npacked, dry_run,
+ cleanup_progress, &cpa, check_cancelled, NULL);
+ if (cpa.printed_something)
+ printf("\n");
+ if (error)
+ goto done;
+ if (cpa.printed_something) {
+ if (fmt_scaled(size_before, scaled_before) == -1) {
+ error = got_error_from_errno("fmt_scaled");
+ goto done;
+ }
+ if (fmt_scaled(size_after, scaled_after) == -1) {
+ error = got_error_from_errno("fmt_scaled");
+ goto done;
+ }
+ if (fmt_scaled(size_before - size_after, scaled_diff) == -1) {
+ error = got_error_from_errno("fmt_scaled");
+ goto done;
+ }
+ printf("loose total size before: %s\n", scaled_before);
+ printf("loose total size after: %s\n", scaled_after);
+ if (dry_run) {
+ printf("disk space which would be freed: %s\n",
+ scaled_diff);
+ } else
+ printf("disk space freed: %s\n", scaled_diff);
+ printf("loose objects also found in pack files: %d\n", npacked);
+ }
+done:
+ if (repo)
+ got_repo_close(repo);
+ free(cwd);
+ return error;
+}
blob - 6c27cd2e90cece2a2487a786135e4cb6b269f1ad
blob + 99fa6426ad538680697e303f69032b8c33f9266a
--- include/got_repository_admin.h
+++ include/got_repository_admin.h
got_repo_list_pack(FILE *packfile, struct got_object_id *pack_hash,
struct got_repository *repo, got_pack_list_cb list_cb, void *list_arg,
got_cancel_cb cancel_cb, void *cancel_arg);
+
+/* A callback function which gets invoked with cleanup information to print. */
+typedef const struct got_error *(*got_cleanup_progress_cb)(void *arg,
+ int nloose, int ncommits, int npurged);
+
+/*
+ * Walk objects reachable via references to determine whether any loose
+ * objects can be removed from disk. Do remove such objects from disk
+ * unless the dry_run parameter is set.
+ * Return the disk space size occupied by loose objects before and after
+ * the operation.
+ * Return the number of loose objects which are also stored in a pack file.
+ */
+const struct got_error *
+got_repo_purge_unreferenced_loose_objects(struct got_repository *repo,
+ off_t *size_before, off_t *size_after, int *npacked, int dry_run,
+ got_cleanup_progress_cb progress_cb, void *progress_arg,
+ got_cancel_cb cancel_cb, void *cancel_arg);
blob - 6b796d3554548cbf04c5c143d4e5311d6545dd03
blob + 7f91b3bcc5e247a002e805274461026d32267fe5
--- lib/got_lib_object.h
+++ lib/got_lib_object.h
struct got_repository *);
const struct got_error *got_object_open_loose_fd(int *, struct got_object_id *,
struct got_repository *);
+const struct got_error *got_object_open_packed(struct got_object **,
+ struct got_object_id *, struct got_repository *);
+const struct got_error *got_object_read_header_privsep(struct got_object **,
+ struct got_repository *, int);
const struct got_error *got_object_open(struct got_object **,
struct got_repository *, struct got_object_id *);
const struct got_error *got_object_raw_open(struct got_raw_object **,
blob - 4dff06d265c9aca4dc4216121372ec79495fcd96
blob + 5c5f3bb280d592509f3a929c1b15d6003d2faa97
--- lib/object.c
+++ lib/object.c
idx, id);
}
-static const struct got_error *
-open_packed_object(struct got_object **obj, struct got_object_id *id,
+const struct got_error *
+got_object_open_packed(struct got_object **obj, struct got_object_id *id,
struct got_repository *repo)
{
const struct got_error *err = NULL;
return NULL;
}
-static const struct got_error *
-read_object_header_privsep(struct got_object **obj, struct got_repository *repo,
- int obj_fd)
+const struct got_error *
+got_object_read_header_privsep(struct got_object **obj,
+ struct got_repository *repo, int obj_fd)
{
const struct got_error *err;
return NULL;
}
- err = open_packed_object(obj, id, repo);
+ err = got_object_open_packed(obj, id, repo);
if (err && err->code != GOT_ERR_NO_OBJ)
return err;
if (*obj) {
return err;
}
- err = read_object_header_privsep(obj, repo, fd);
+ err = got_object_read_header_privsep(obj, repo, fd);
if (err)
return err;
err = got_object_open_loose_fd(&fd, id, repo);
if (err)
return err;
- err = read_object_header_privsep(&obj, repo, fd);
+ err = got_object_read_header_privsep(&obj, repo, fd);
if (err)
return err;
obj_type = obj->type;
blob - a4af1c4099e0f496d253d2ae12845c1dc18fa809
blob + 66d89c5aedcc5401648b2b7866c1bea733341d2b
--- lib/repository_admin.c
+++ lib/repository_admin.c
#include "got_lib_delta.h"
#include "got_lib_object.h"
+#include "got_lib_object_idset.h"
#include "got_lib_object_cache.h"
#include "got_lib_pack.h"
#include "got_lib_privsep.h"
#include "got_lib_repository.h"
#include "got_lib_pack_create.h"
#include "got_lib_sha1.h"
+#include "got_lib_lockfile.h"
#ifndef nitems
#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0]))
free(packpath);
if (packidx)
got_packidx_close(packidx);
+ return err;
+}
+
+static const struct got_error *
+get_loose_object_ids(struct got_object_idset **loose_ids, off_t *ondisk_size,
+ got_cleanup_progress_cb progress_cb, void *progress_arg,
+ struct got_repository *repo)
+{
+ const struct got_error *err = NULL;
+ char *path_objects = NULL, *path = NULL;
+ DIR *dir = NULL;
+ struct got_object *obj = NULL;
+ struct got_object_id id;
+ int i, fd = -1;
+ struct stat sb;
+
+ *ondisk_size = 0;
+ *loose_ids = got_object_idset_alloc();
+ if (*loose_ids == NULL)
+ return got_error_from_errno("got_object_idset_alloc");
+
+ path_objects = got_repo_get_path_objects(repo);
+ if (path_objects == NULL) {
+ err = got_error_from_errno("got_repo_get_path_objects");
+ goto done;
+ }
+
+ for (i = 0; i <= 0xff; i++) {
+ struct dirent *dent;
+
+ if (asprintf(&path, "%s/%.2x", path_objects, i) == -1) {
+ err = got_error_from_errno("asprintf");
+ break;
+ }
+
+ dir = opendir(path);
+ if (dir == NULL) {
+ if (errno == ENOENT) {
+ err = NULL;
+ continue;
+ }
+ err = got_error_from_errno2("opendir", path);
+ break;
+ }
+
+ while ((dent = readdir(dir)) != NULL) {
+ char *id_str;
+
+ if (strcmp(dent->d_name, ".") == 0 ||
+ strcmp(dent->d_name, "..") == 0)
+ continue;
+
+ if (asprintf(&id_str, "%.2x%s", i, dent->d_name) == -1) {
+ err = got_error_from_errno("asprintf");
+ goto done;
+ }
+
+ memset(&id, 0, sizeof(id));
+ if (!got_parse_sha1_digest(id.sha1, id_str)) {
+ free(id_str);
+ continue;
+ }
+ free(id_str);
+
+ err = got_object_open_loose_fd(&fd, &id, repo);
+ if (err)
+ goto done;
+ if (fstat(fd, &sb) == -1) {
+ err = got_error_from_errno("fstat");
+ goto done;
+ }
+ err = got_object_read_header_privsep(&obj, repo, fd);
+ if (err)
+ goto done;
+ fd = -1; /* already closed */
+
+ switch (obj->type) {
+ case GOT_OBJ_TYPE_COMMIT:
+ case GOT_OBJ_TYPE_TREE:
+ case GOT_OBJ_TYPE_BLOB:
+ case GOT_OBJ_TYPE_TAG:
+ break;
+ default:
+ err = got_error_fmt(GOT_ERR_OBJ_TYPE,
+ "%d", obj->type);
+ goto done;
+ }
+ got_object_close(obj);
+ obj = NULL;
+ (*ondisk_size) += sb.st_size;
+ err = got_object_idset_add(*loose_ids, &id, NULL);
+ if (err)
+ goto done;
+ if (progress_cb) {
+ err = progress_cb(progress_arg,
+ got_object_idset_num_elements(*loose_ids),
+ -1, -1);
+ if (err)
+ goto done;
+ }
+ }
+
+ if (closedir(dir) != 0) {
+ err = got_error_from_errno("closedir");
+ goto done;
+ }
+ dir = NULL;
+
+ free(path);
+ path = NULL;
+ }
+done:
+ if (dir && closedir(dir) != 0 && err == NULL)
+ err = got_error_from_errno("closedir");
+ if (fd != -1 && close(fd) == -1 && err == NULL)
+ err = got_error_from_errno("close");
+ if (err) {
+ got_object_idset_free(*loose_ids);
+ *loose_ids = NULL;
+ }
+ if (obj)
+ got_object_close(obj);
+ free(path_objects);
+ free(path);
return err;
}
+
+static const struct got_error *
+search_packidx(int *found, struct got_object_id *id,
+ struct got_repository *repo)
+{
+ const struct got_error *err = NULL;
+ struct got_packidx *packidx = NULL;
+ int idx;
+
+ *found = 0;
+
+ err = got_repo_search_packidx(&packidx, &idx, repo, id);
+ if (err == NULL)
+ *found = 1; /* object is already packed */
+ else if (err->code == GOT_ERR_NO_OBJ)
+ err = NULL;
+ return err;
+}
+
+static const struct got_error *
+preserve_loose_object(struct got_object_idset *loose_ids,
+ struct got_object_id *id, struct got_repository *repo, int *npacked)
+{
+ const struct got_error *err = NULL;
+ int is_packed;
+
+ if (!got_object_idset_contains(loose_ids, id))
+ return NULL;
+
+ err = search_packidx(&is_packed, id, repo);
+ if (err)
+ return err;
+ if (is_packed) {
+ struct got_object *obj;
+
+ /*
+ * Sanity check: Open the packed object to prevent a
+ * corrupt pack index from misleading us.
+ */
+ err = got_object_open_packed(&obj, id, repo);
+ if (err == NULL) {
+ got_object_close(obj);
+ /*
+ * The object is referenced and packed.
+ * We can purge the redundantly stored loose object.
+ */
+ (*npacked)++;
+ return NULL;
+ } else if (err->code != GOT_ERR_NO_OBJ)
+ return err;
+ }
+
+ /*
+ * This object is referenced and not packed.
+ * Remove it from our purge set.
+ */
+ return got_object_idset_remove(NULL, loose_ids, id);
+}
+
+static const struct got_error *
+load_tree_entries(struct got_object_id_queue *ids,
+ struct got_object_idset *loose_ids,
+ struct got_object_idset *traversed_ids, struct got_object_id *tree_id,
+ const char *dpath, struct got_repository *repo, int *npacked,
+ got_cancel_cb cancel_cb, void *cancel_arg)
+{
+ const struct got_error *err;
+ struct got_tree_object *tree;
+ char *p = NULL;
+ int i;
+
+ err = got_object_open_as_tree(&tree, repo, tree_id);
+ if (err)
+ return err;
+
+ for (i = 0; i < got_object_tree_get_nentries(tree); i++) {
+ struct got_tree_entry *e = got_object_tree_get_entry(tree, i);
+ struct got_object_id *id = got_tree_entry_get_id(e);
+ mode_t mode = got_tree_entry_get_mode(e);
+
+ if (cancel_cb) {
+ err = (*cancel_cb)(cancel_arg);
+ if (err)
+ break;
+ }
+
+ if (got_object_tree_entry_is_symlink(e) ||
+ got_object_tree_entry_is_submodule(e) ||
+ got_object_idset_contains(traversed_ids, id))
+ continue;
+
+ if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "",
+ got_tree_entry_get_name(e)) == -1) {
+ err = got_error_from_errno("asprintf");
+ break;
+ }
+
+ if (S_ISDIR(mode)) {
+ struct got_object_qid *qid;
+ err = got_object_qid_alloc(&qid, id);
+ if (err)
+ break;
+ STAILQ_INSERT_TAIL(ids, qid, entry);
+ } else if (S_ISREG(mode)) {
+ /* This blob is referenced. */
+ err = preserve_loose_object(loose_ids, id, repo,
+ npacked);
+ if (err)
+ break;
+ err = got_object_idset_add(traversed_ids, id, NULL);
+ if (err)
+ break;
+
+ }
+ free(p);
+ p = NULL;
+ }
+
+ got_object_tree_close(tree);
+ free(p);
+ return err;
+}
+
+static const struct got_error *
+load_tree(struct got_object_idset *loose_ids,
+ struct got_object_idset *traversed_ids, struct got_object_id *tree_id,
+ const char *dpath, struct got_repository *repo, int *npacked,
+ got_cancel_cb cancel_cb, void *cancel_arg)
+{
+ const struct got_error *err = NULL;
+ struct got_object_id_queue tree_ids;
+ struct got_object_qid *qid;
+
+ err = got_object_qid_alloc(&qid, tree_id);
+ if (err)
+ return err;
+
+ STAILQ_INIT(&tree_ids);
+ STAILQ_INSERT_TAIL(&tree_ids, qid, entry);
+
+ while (!STAILQ_EMPTY(&tree_ids)) {
+ if (cancel_cb) {
+ err = (*cancel_cb)(cancel_arg);
+ if (err)
+ break;
+ }
+
+ qid = STAILQ_FIRST(&tree_ids);
+ STAILQ_REMOVE_HEAD(&tree_ids, entry);
+
+ if (got_object_idset_contains(traversed_ids, qid->id)) {
+ got_object_qid_free(qid);
+ continue;
+ }
+
+ err = got_object_idset_add(traversed_ids, qid->id, NULL);
+ if (err) {
+ got_object_qid_free(qid);
+ break;
+ }
+
+ /* This tree is referenced. */
+ err = preserve_loose_object(loose_ids, qid->id, repo, npacked);
+ if (err)
+ break;
+
+ err = load_tree_entries(&tree_ids, loose_ids, traversed_ids,
+ qid->id, dpath, repo, npacked, cancel_cb, cancel_arg);
+ got_object_qid_free(qid);
+ if (err)
+ break;
+ }
+
+ got_object_id_queue_free(&tree_ids);
+ return err;
+}
+
+static const struct got_error *
+load_commit_or_tag(struct got_object_idset *loose_ids, int *ncommits,
+ int *npacked, struct got_object_idset *traversed_ids,
+ struct got_object_id *id, struct got_repository *repo,
+ got_cleanup_progress_cb progress_cb, void *progress_arg, int nloose,
+ got_cancel_cb cancel_cb, void *cancel_arg)
+{
+ const struct got_error *err;
+ struct got_commit_object *commit = NULL;
+ struct got_tag_object *tag = NULL;
+ struct got_object_id *tree_id = NULL;
+ struct got_object_id_queue ids;
+ struct got_object_qid *qid;
+ int obj_type;
+
+ err = got_object_qid_alloc(&qid, id);
+ if (err)
+ return err;
+
+ STAILQ_INIT(&ids);
+ STAILQ_INSERT_TAIL(&ids, qid, entry);
+
+ while (!STAILQ_EMPTY(&ids)) {
+ if (cancel_cb) {
+ err = (*cancel_cb)(cancel_arg);
+ if (err)
+ break;
+ }
+
+ qid = STAILQ_FIRST(&ids);
+ STAILQ_REMOVE_HEAD(&ids, entry);
+
+ if (got_object_idset_contains(traversed_ids, qid->id)) {
+ got_object_qid_free(qid);
+ qid = NULL;
+ continue;
+ }
+
+ err = got_object_idset_add(traversed_ids, qid->id, NULL);
+ if (err)
+ break;
+
+ /* This commit or tag is referenced. */
+ err = preserve_loose_object(loose_ids, qid->id, repo, npacked);
+ if (err)
+ break;
+
+ err = got_object_get_type(&obj_type, repo, qid->id);
+ if (err)
+ break;
+ switch (obj_type) {
+ case GOT_OBJ_TYPE_COMMIT:
+ err = got_object_open_as_commit(&commit, repo, qid->id);
+ if (err)
+ goto done;
+ break;
+ case GOT_OBJ_TYPE_TAG:
+ err = got_object_open_as_tag(&tag, repo, qid->id);
+ if (err)
+ goto done;
+ break;
+ default:
+ /* should not happen */
+ err = got_error(GOT_ERR_OBJ_TYPE);
+ goto done;
+ }
+
+ /* Find a tree object to scan. */
+ if (commit) {
+ tree_id = got_object_commit_get_tree_id(commit);
+ } else if (tag) {
+ obj_type = got_object_tag_get_object_type(tag);
+ switch (obj_type) {
+ case GOT_OBJ_TYPE_COMMIT:
+ err = got_object_open_as_commit(&commit, repo,
+ got_object_tag_get_object_id(tag));
+ if (err)
+ goto done;
+ tree_id = got_object_commit_get_tree_id(commit);
+ break;
+ case GOT_OBJ_TYPE_TREE:
+ tree_id = got_object_tag_get_object_id(tag);
+ break;
+ default:
+ /*
+ * Tag points at something other than a
+ * commit or tree. Leave this weird tag object
+ * and the object it points to on disk.
+ */
+ err = got_object_idset_remove(NULL, loose_ids,
+ qid->id);
+ if (err && err->code != GOT_ERR_NO_OBJ)
+ goto done;
+ err = got_object_idset_remove(NULL, loose_ids,
+ got_object_tag_get_object_id(tag));
+ if (err && err->code != GOT_ERR_NO_OBJ)
+ goto done;
+ err = NULL;
+ break;
+ }
+ }
+
+ if (tree_id) {
+ err = load_tree(loose_ids, traversed_ids, tree_id, "",
+ repo, npacked, cancel_cb, cancel_arg);
+ if (err)
+ break;
+ }
+
+ if (commit || tag)
+ (*ncommits)++; /* scanned tags are counted as commits */
+
+ if (progress_cb) {
+ err = progress_cb(progress_arg, nloose, *ncommits, -1);
+ if (err)
+ break;
+ }
+
+ if (commit) {
+ /* Find parent commits to scan. */
+ const struct got_object_id_queue *parent_ids;
+ parent_ids = got_object_commit_get_parent_ids(commit);
+ err = got_object_id_queue_copy(parent_ids, &ids);
+ if (err)
+ break;
+ got_object_commit_close(commit);
+ commit = NULL;
+ }
+ if (tag) {
+ got_object_tag_close(tag);
+ tag = NULL;
+ }
+ got_object_qid_free(qid);
+ qid = NULL;
+ }
+done:
+ if (qid)
+ got_object_qid_free(qid);
+ if (commit)
+ got_object_commit_close(commit);
+ if (tag)
+ got_object_tag_close(tag);
+ return err;
+}
+
+struct purge_loose_object_arg {
+ struct got_repository *repo;
+ got_cleanup_progress_cb progress_cb;
+ void *progress_arg;
+ int nloose;
+ int ncommits;
+ int npurged;
+ off_t size_purged;
+ int dry_run;
+};
+
+static const struct got_error *
+purge_loose_object(struct got_object_id *id, void *data, void *arg)
+{
+ struct purge_loose_object_arg *a = arg;
+ const struct got_error *err, *unlock_err = NULL;
+ char *path = NULL;
+ int fd = -1;
+ struct stat sb;
+ struct got_lockfile *lf = NULL;
+
+ err = got_object_get_path(&path, id, a->repo);
+ if (err)
+ return err;
+
+ err = got_object_open_loose_fd(&fd, id, a->repo);
+ if (err)
+ goto done;
+
+ if (fstat(fd, &sb) == -1) {
+ err = got_error_from_errno("fstat");
+ goto done;
+ }
+
+ if (!a->dry_run) {
+ err = got_lockfile_lock(&lf, path);
+ if (err)
+ goto done;
+ if (unlink(path) == -1) {
+ err = got_error_from_errno2("unlink", path);
+ goto done;
+ }
+ }
+
+ a->npurged++;
+ a->size_purged += sb.st_size;
+ if (a->progress_cb) {
+ err = a->progress_cb(a->progress_arg, a->nloose,
+ a->ncommits, a->npurged);
+ }
+done:
+ if (fd != -1 && close(fd) == -1 && err == NULL)
+ err = got_error_from_errno("close");
+ free(path);
+ if (lf)
+ unlock_err = got_lockfile_unlock(lf);
+ return err ? err : unlock_err;
+}
+
+const struct got_error *
+got_repo_purge_unreferenced_loose_objects(struct got_repository *repo,
+ off_t *size_before, off_t *size_after, int *npacked, int dry_run,
+ got_cleanup_progress_cb progress_cb, void *progress_arg,
+ got_cancel_cb cancel_cb, void *cancel_arg)
+{
+ const struct got_error *err;
+ struct got_object_idset *loose_ids;
+ struct got_object_idset *traversed_ids;
+ struct got_object_id **referenced_ids;
+ int i, nreferenced, nloose, ncommits = 0;
+ struct got_reflist_head refs;
+ struct purge_loose_object_arg arg;
+
+ TAILQ_INIT(&refs);
+
+ *size_before = 0;
+ *size_after = 0;
+ *npacked = 0;
+
+ err = get_loose_object_ids(&loose_ids, size_before,
+ progress_cb, progress_arg, repo);
+ if (err)
+ return err;
+ nloose = got_object_idset_num_elements(loose_ids);
+ if (nloose == 0) {
+ got_object_idset_free(loose_ids);
+ return NULL;
+ }
+
+ traversed_ids = got_object_idset_alloc();
+ if (traversed_ids == NULL) {
+ err = got_error_from_errno("got_object_idset_alloc");
+ goto done;
+ }
+
+ err = got_ref_list(&refs, repo, "", got_ref_cmp_by_name, NULL);
+ if (err)
+ goto done;
+
+ err = get_reflist_object_ids(&referenced_ids, &nreferenced,
+ (1 << GOT_OBJ_TYPE_COMMIT) | (1 << GOT_OBJ_TYPE_TAG),
+ &refs, repo, cancel_cb, cancel_arg);
+ if (err)
+ goto done;
+
+ for (i = 0; i < nreferenced; i++) {
+ struct got_object_id *id = referenced_ids[i];
+ err = load_commit_or_tag(loose_ids, &ncommits, npacked,
+ traversed_ids, id, repo, progress_cb, progress_arg, nloose,
+ cancel_cb, cancel_arg);
+ if (err)
+ goto done;
+ }
+
+ /* Produce a final progress report in case no objects can be purged. */
+ if (got_object_idset_num_elements(loose_ids) == 0 && progress_cb) {
+ err = progress_cb(progress_arg, nloose, ncommits, 0);
+ if (err)
+ goto done;
+ }
+
+ /* Any remaining loose objects are unreferenced and can be purged. */
+ arg.repo = repo;
+ arg.progress_arg = progress_arg;
+ arg.progress_cb = progress_cb;
+ arg.nloose = nloose;
+ arg.npurged = 0;
+ arg.size_purged = 0;
+ arg.ncommits = ncommits;
+ arg.dry_run = dry_run;
+ err = got_object_idset_for_each(loose_ids, purge_loose_object, &arg);
+ if (err)
+ goto done;
+ *size_after = *size_before - arg.size_purged;
+done:
+ got_object_idset_free(loose_ids);
+ got_object_idset_free(traversed_ids);
+ return err;
+}
blob - 994b3c728968b83c996b8eb7fd40f01354a38383
blob + ef0efe1dbbe320c62fff14e7f2e0b94db43f38ff
--- regress/cmdline/Makefile
+++ regress/cmdline/Makefile
REGRESS_TARGETS=checkout update status log add rm diff blame branch tag \
ref commit revert cherrypick backout rebase import histedit \
- integrate stage unstage cat clone fetch tree pack
+ integrate stage unstage cat clone fetch tree pack cleanup
NOOBJ=Yes
GOT_TEST_ROOT=/tmp
pack:
./pack.sh -q -r "$(GOT_TEST_ROOT)"
+cleanup:
+ ./cleanup.sh -q -r "$(GOT_TEST_ROOT)"
+
+
.include <bsd.regress.mk>
blob - /dev/null
blob + d68d9728b364b87e77f221ab917cc0da304d30d2 (mode 755)
--- /dev/null
+++ regress/cmdline/cleanup.sh
+#!/bin/sh
+#
+# Copyright (c) 2021 Stefan Sperling <stsp@openbsd.org>
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+. ./common.sh
+
+test_cleanup_unreferenced_loose_objects() {
+ local testroot=`test_init cleanup_unreferenced_loose_objects`
+
+ nloose0=`gotadmin info -r $testroot/repo | grep '^loose objects:' | \
+ cut -d ':' -f 2 | tr -d ' '`
+ if [ "$nloose0" != "8" ]; then
+ echo "unexpected number of loose objects: $nloose0" >&2
+ test_done "$testroot" "1"
+ return 1
+ fi
+
+ # create a branch with some changes
+ got branch -r $testroot/repo newbranch >/dev/null
+
+ got checkout -b newbranch $testroot/repo $testroot/wt >/dev/null
+ ret="$?"
+ if [ "$ret" != "0" ]; then
+ echo "got checkout command failed unexpectedly"
+ test_done "$testroot" "$ret"
+ return 1
+ fi
+
+ echo 'foo' > $testroot/wt/foo
+ (cd $testroot/wt && got add foo > /dev/null)
+ echo 'modified alpha' > $testroot/wt/alpha
+ (cd $testroot/wt && got commit -m 'newbranch commit' > /dev/null)
+ local commit1=`git_show_branch_head $testroot/repo newbranch`
+ local tree1=`got cat -r $testroot/repo $newbranch_commit | \
+ grep ^tree | cut -d ' ' -f2`
+ local alpha1=`got tree -r $testroot/repo -i -c $commit1 | \
+ grep "[0-9a-f] alpha$" | cut -d' ' -f 1`
+ local foo1=`got tree -r $testroot/repo -i -c $commit1 | \
+ grep "[0-9a-f] foo$" | cut -d' ' -f 1`
+
+ nloose1=`gotadmin info -r $testroot/repo | grep '^loose objects:' | \
+ cut -d ':' -f 2 | tr -d ' '`
+ if [ "$nloose1" != "12" ]; then
+ echo "unexpected number of loose objects: $nloose1" >&2
+ test_done "$testroot" "1"
+ return 1
+ fi
+
+ # delete the branch
+ got branch -r $testroot/repo -d newbranch >/dev/null
+
+ # remove worktree's base commit reference, which points at the branch
+ wt_uuid=`(cd $testroot/wt && got info | grep 'UUID:' | \
+ cut -d ':' -f 2 | tr -d ' ')`
+ got ref -r $testroot/repo -d "refs/got/worktree/base-$wt_uuid"
+
+ # cleanup -n should not remove any objects
+ ls -1 -R $testroot/repo/.git/objects > $testroot/objects-before
+ gotadmin cleanup -n -q -r $testroot/repo > $testroot/stdout
+ echo -n > $testroot/stdout.expected
+ cmp -s $testroot/stdout.expected $testroot/stdout
+ ret="$?"
+ if [ "$ret" != "0" ]; then
+ diff -u $testroot/stdout.expected $testroot/stdout
+ test_done "$testroot" "$ret"
+ return 1
+ fi
+ ls -1 -R $testroot/repo/.git/objects > $testroot/objects-after
+ cmp -s $testroot/objects-before $testroot/objects-after
+ ret="$?"
+ if [ "$ret" != "0" ]; then
+ diff -u $testroot/objects-before $testroot/objects-after
+ test_done "$testroot" "$ret"
+ return 1
+ fi
+
+ # cleanup should remove loose objects that belonged to the branch
+ gotadmin cleanup -q -r $testroot/repo > $testroot/stdout
+ ret="$?"
+ if [ "$ret" != "0" ]; then
+ echo "gotadmin cleanup failed unexpectedly" >&2
+ test_done "$testroot" "$ret"
+ return 1
+ fi
+ echo -n > $testroot/stdout.expected
+ cmp -s $testroot/stdout.expected $testroot/stdout
+ ret="$?"
+ if [ "$ret" != "0" ]; then
+ diff -u $testroot/stdout.expected $testroot/stdout
+ test_done "$testroot" "$ret"
+ return 1
+ fi
+
+ nloose2=`gotadmin info -r $testroot/repo | grep '^loose objects:' | \
+ cut -d ':' -f 2 | tr -d ' '`
+ if [ "$nloose2" != "$nloose0" ]; then
+ echo "unexpected number of loose objects: $nloose2" >&2
+ test_done "$testroot" "1"
+ return 1
+ fi
+
+ for id in $commit1 $tree1 $alpha1 $foo1; do
+ path=`get_loose_object_path $testroot/repo $id`
+ if [ -e "$path" ]; then
+ echo "loose object $path was not purged" >&2
+ ret=1
+ break
+ fi
+ done
+
+ test_done "$testroot" "$ret"
+}
+
+test_cleanup_redundant_loose_objects() {
+ local testroot=`test_init cleanup_redundant_loose_objects`
+
+ # tags should also be packed
+ got tag -r $testroot/repo -m 1.0 1.0 >/dev/null
+
+ nloose0=`gotadmin info -r $testroot/repo | grep '^loose objects:' | \
+ cut -d ':' -f 2 | tr -d ' '`
+ if [ "$nloose0" != "9" ]; then
+ echo "unexpected number of loose objects: $nloose0" >&2
+ test_done "$testroot" "1"
+ return 1
+ fi
+
+ # no pack files should exist yet
+ ls $testroot/repo/.git/objects/pack/ > $testroot/stdout
+ ret="$?"
+ if [ "$ret" != "0" ]; then
+ test_done "$testroot" "$ret"
+ return 1
+ fi
+ echo -n > $testroot/stdout.expected
+ cmp -s $testroot/stdout.expected $testroot/stdout
+ ret="$?"
+ if [ "$ret" != "0" ]; then
+ diff -u $testroot/stdout.expected $testroot/stdout
+ test_done "$testroot" "$ret"
+ return 1
+ fi
+
+ gotadmin pack -r $testroot/repo > /dev/null
+
+ npacked0=`gotadmin info -r $testroot/repo | grep '^packed objects:' | \
+ cut -d ':' -f 2 | tr -d ' '`
+ if [ "$npacked0" != "9" ]; then
+ echo "unexpected number of loose objects: $npacked0" >&2
+ test_done "$testroot" "1"
+ return 1
+ fi
+
+ # cleanup -n should not remove any objects
+ ls -1 -R $testroot/repo/.git/objects > $testroot/objects-before
+ gotadmin cleanup -n -q -r $testroot/repo > $testroot/stdout
+ echo -n > $testroot/stdout.expected
+ cmp -s $testroot/stdout.expected $testroot/stdout
+ ret="$?"
+ if [ "$ret" != "0" ]; then
+ diff -u $testroot/stdout.expected $testroot/stdout
+ test_done "$testroot" "$ret"
+ return 1
+ fi
+ ls -1 -R $testroot/repo/.git/objects > $testroot/objects-after
+ cmp -s $testroot/objects-before $testroot/objects-after
+ ret="$?"
+ if [ "$ret" != "0" ]; then
+ diff -u $testroot/objects-before $testroot/objects-after
+ test_done "$testroot" "$ret"
+ return 1
+ fi
+
+ nloose1=`gotadmin info -r $testroot/repo | grep '^loose objects:' | \
+ cut -d ':' -f 2 | tr -d ' '`
+ if [ "$nloose1" != "$nloose0" ]; then
+ echo "unexpected number of loose objects: $nloose1" >&2
+ test_done "$testroot" "1"
+ return 1
+ fi
+
+ # cleanup should remove all loose objects
+ gotadmin cleanup -q -r $testroot/repo > $testroot/stdout
+ ret="$?"
+ if [ "$ret" != "0" ]; then
+ echo "gotadmin cleanup failed unexpectedly" >&2
+ test_done "$testroot" "$ret"
+ return 1
+ fi
+ echo -n > $testroot/stdout.expected
+ cmp -s $testroot/stdout.expected $testroot/stdout
+ ret="$?"
+ if [ "$ret" != "0" ]; then
+ diff -u $testroot/stdout.expected $testroot/stdout
+ test_done "$testroot" "$ret"
+ return 1
+ fi
+
+ nloose2=`gotadmin info -r $testroot/repo | grep '^loose objects:' | \
+ cut -d ':' -f 2 | tr -d ' '`
+ if [ "$nloose2" != "0" ]; then
+ echo "unexpected number of loose objects: $nloose2" >&2
+ test_done "$testroot" "1"
+ return 1
+ fi
+
+ for d in $testroot/repo/.git/objects/[0-9a-f][0-9a-f]; do
+ id0=`basename $d`
+ ret=0
+ for e in `ls $d`; do
+ obj_id=${id0}${e}
+ echo "loose object $obj_id was not purged" >&2
+ ret=1
+ break
+ done
+ if [ "$ret" == "1" ]; then
+ break
+ fi
+ done
+
+ test_done "$testroot" "$ret"
+}
+
+test_parseargs "$@"
+run_test test_cleanup_unreferenced_loose_objects
+run_test test_cleanup_redundant_loose_objects