commit b3d68e7f99c78cbcf672100fb14310e4b4f18482 from: Stefan Sperling date: Sat Jul 03 19:49:37 2021 UTC implement 'gotadmin cleanup' commit - 8775a682f984b3741057362377b06011b56c085d commit + b3d68e7f99c78cbcf672100fb14310e4b4f18482 blob - 9dcb7b3ff76d3dc250925d02e4d75ab276eb5f62 blob + ffead95c145f5b7ed6e82153c2de67b9f2606c78 --- gotadmin/gotadmin.1 +++ gotadmin/gotadmin.1 @@ -181,7 +181,81 @@ and a break-down of the number of objects per object t .It Cm ls Short alias for .Cm listpack . +.It Cm cleanup Oo Fl n Oc Oo Fl r Ar repository-path Oc Oo Fl q Oc +Purge unreferenced loose objects from the repository and display +the amount of disk space which has been freed as a result. +.Pp +Unreferenced objects are present in the repository but cannot be +reached via any reference in the entire +.Pa refs/ +namespace. +.Pp +Loose objects are stored as individual files beneath the repository's +.Pa objects/ +directory, +spread across 256 sub-directories named after the 256 possible +hexadecimal values of the first byte of an object identifier. +.Pp +Packed objects stored in pack files under +.Pa objects/pack/ +will not be purged. +However, if redundant copies of packed objects exist in loose form, +such redundant copies will be purged. +.Pp +Objects will usually become unreferenced as a result of deleting +branches or tags with +.Cm got branch -d +or +.Cm got tag -d . +Deleting arbitrary references with +.Cm got ref -d +may also leave unreferenced objects behind. +.Pp +In order to determine the set of objects which are referenced, search +all references for commit objects and tag objects, and traverse the +corresponding tree object hierarchies. +Any loose object IDs not encountered during this search are unreferenced +and thus subject to removal. +Display the number of commits which have been searched to indicate progress. +.Pp +References in the +.Pa refs/got +namespace may prevent objects from being purged. +This includes references in the +.Pa refs/got/worktree +namespace created by +.Cm got checkout +and +.Cm got update , +as well as references in the +.Pa refs/got/backup +namespace created by +.Cm got rebase +and +.Cm got histedit . +.Cm gotadmin cleanup +will only purge corresponding objects once such references have been +deleted with +.Cm got ref -d . +.Pp +The options for +.Cm gotadmin listpack +are as follows: +.Bl -tag -width Ds +.It Fl n +Display the usual progress output and summary information but do not actually +purge any objects. +.It Fl r Ar repository-path +Use the repository at the specified path. +If not specified, assume the repository is located at or above the current +working directory. +.It Fl q +Suppress progress reporting and disk space summary output. .El +.It Cm cl +Short alias for +.Cm cleanup . +.El .Sh EXIT STATUS .Ex -std gotadmin .Sh SEE ALSO @@ -202,9 +276,10 @@ to perform some tasks. In particular: .Bl -bullet .It -Reducing the size of repositories by removing redundant or unreferenced -data requires -.Xr git-gc 1 . +Removing redundant or unreferenced packed objects requires +.Xr git-gc 1 +and perhaps +.Xr git-repack 1 . .It Exporting data from repositories requires .Xr git-fast-export 1 . blob - 624efcfd12132d41ed3c551e9867041820d77686 blob + 22ab038974a7575228d4b2fc26a06d6feb886e0f --- gotadmin/gotadmin.c +++ gotadmin/gotadmin.c @@ -82,17 +82,20 @@ __dead static void usage_info(void); __dead static void usage_pack(void); __dead static void usage_indexpack(void); __dead static void usage_listpack(void); +__dead static void usage_cleanup(void); static const struct got_error* cmd_info(int, char *[]); static const struct got_error* cmd_pack(int, char *[]); static const struct got_error* cmd_indexpack(int, char *[]); static const struct got_error* cmd_listpack(int, char *[]); +static const struct got_error* cmd_cleanup(int, char *[]); static struct gotadmin_cmd gotadmin_commands[] = { { "info", cmd_info, usage_info, "" }, { "pack", cmd_pack, usage_pack, "" }, { "indexpack", cmd_indexpack, usage_indexpack,"ix" }, { "listpack", cmd_listpack, usage_listpack, "ls" }, + { "cleanup", cmd_cleanup, usage_cleanup, "cl" }, }; static void @@ -889,4 +892,166 @@ done: free(pack_hash); free(packfile_path); return error; +} + +__dead static void +usage_cleanup(void) +{ + fprintf(stderr, "usage: %s cleanup [-n] [-r repository-path] [-q]\n", + getprogname()); + exit(1); +} + +struct got_cleanup_progress_arg { + int last_nloose; + int last_ncommits; + int last_npurged; + int verbosity; + int printed_something; + int dry_run; +}; + +static const struct got_error * +cleanup_progress(void *arg, int nloose, int ncommits, int npurged) +{ + struct got_cleanup_progress_arg *a = arg; + int print_loose = 0, print_commits = 0, print_purged = 0; + + if (a->last_nloose != nloose) { + print_loose = 1; + a->last_nloose = nloose; + } + if (a->last_ncommits != ncommits) { + print_loose = 1; + print_commits = 1; + a->last_ncommits = ncommits; + } + if (a->last_npurged != npurged) { + print_loose = 1; + print_commits = 1; + print_purged = 1; + a->last_npurged = npurged; + } + + if (a->verbosity < 0) + return NULL; + + if (print_loose || print_commits || print_purged) + printf("\r"); + if (print_loose) + printf("%d loose object%s", nloose, nloose == 1 ? "" : "s"); + if (print_commits) + printf("; %d commit%s scanned", ncommits, + ncommits == 1 ? "" : "s"); + if (print_purged) { + if (a->dry_run) { + printf("; %d object%s could be purged", npurged, + npurged == 1 ? "" : "s"); + } else { + printf("; %d object%s purged", npurged, + npurged == 1 ? "" : "s"); + } + } + if (print_loose || print_commits || print_purged) { + a->printed_something = 1; + fflush(stdout); + } + return NULL; } + +static const struct got_error * +cmd_cleanup(int argc, char *argv[]) +{ + const struct got_error *error = NULL; + char *cwd = NULL, *repo_path = NULL; + struct got_repository *repo = NULL; + int ch, dry_run = 0, npacked = 0, verbosity = 0; + struct got_cleanup_progress_arg cpa; + off_t size_before, size_after; + char scaled_before[FMT_SCALED_STRSIZE]; + char scaled_after[FMT_SCALED_STRSIZE]; + char scaled_diff[FMT_SCALED_STRSIZE]; + + while ((ch = getopt(argc, argv, "r:nq")) != -1) { + switch (ch) { + case 'r': + repo_path = realpath(optarg, NULL); + if (repo_path == NULL) + return got_error_from_errno2("realpath", + optarg); + got_path_strip_trailing_slashes(repo_path); + break; + case 'n': + dry_run = 1; + break; + case 'q': + verbosity = -1; + break; + default: + usage_cleanup(); + /* NOTREACHED */ + } + } + + argc -= optind; + argv += optind; + +#ifndef PROFILE + if (pledge("stdio rpath wpath cpath flock proc exec sendfd unveil", + NULL) == -1) + err(1, "pledge"); +#endif + cwd = getcwd(NULL, 0); + if (cwd == NULL) { + error = got_error_from_errno("getcwd"); + goto done; + } + + error = got_repo_open(&repo, repo_path ? repo_path : cwd, NULL); + if (error) + goto done; + + error = apply_unveil(got_repo_get_path_git_dir(repo), 0); + if (error) + goto done; + + memset(&cpa, 0, sizeof(cpa)); + cpa.last_ncommits = -1; + cpa.last_npurged = -1; + cpa.dry_run = dry_run; + cpa.verbosity = verbosity; + error = got_repo_purge_unreferenced_loose_objects(repo, + &size_before, &size_after, &npacked, dry_run, + cleanup_progress, &cpa, check_cancelled, NULL); + if (cpa.printed_something) + printf("\n"); + if (error) + goto done; + if (cpa.printed_something) { + if (fmt_scaled(size_before, scaled_before) == -1) { + error = got_error_from_errno("fmt_scaled"); + goto done; + } + if (fmt_scaled(size_after, scaled_after) == -1) { + error = got_error_from_errno("fmt_scaled"); + goto done; + } + if (fmt_scaled(size_before - size_after, scaled_diff) == -1) { + error = got_error_from_errno("fmt_scaled"); + goto done; + } + printf("loose total size before: %s\n", scaled_before); + printf("loose total size after: %s\n", scaled_after); + if (dry_run) { + printf("disk space which would be freed: %s\n", + scaled_diff); + } else + printf("disk space freed: %s\n", scaled_diff); + printf("loose objects also found in pack files: %d\n", npacked); + } +done: + if (repo) + got_repo_close(repo); + free(cwd); + return error; +} blob - 6c27cd2e90cece2a2487a786135e4cb6b269f1ad blob + 99fa6426ad538680697e303f69032b8c33f9266a --- include/got_repository_admin.h +++ include/got_repository_admin.h @@ -67,3 +67,21 @@ const struct got_error * got_repo_list_pack(FILE *packfile, struct got_object_id *pack_hash, struct got_repository *repo, got_pack_list_cb list_cb, void *list_arg, got_cancel_cb cancel_cb, void *cancel_arg); + +/* A callback function which gets invoked with cleanup information to print. */ +typedef const struct got_error *(*got_cleanup_progress_cb)(void *arg, + int nloose, int ncommits, int npurged); + +/* + * Walk objects reachable via references to determine whether any loose + * objects can be removed from disk. Do remove such objects from disk + * unless the dry_run parameter is set. + * Return the disk space size occupied by loose objects before and after + * the operation. + * Return the number of loose objects which are also stored in a pack file. + */ +const struct got_error * +got_repo_purge_unreferenced_loose_objects(struct got_repository *repo, + off_t *size_before, off_t *size_after, int *npacked, int dry_run, + got_cleanup_progress_cb progress_cb, void *progress_arg, + got_cancel_cb cancel_cb, void *cancel_arg); blob - 6b796d3554548cbf04c5c143d4e5311d6545dd03 blob + 7f91b3bcc5e247a002e805274461026d32267fe5 --- lib/got_lib_object.h +++ lib/got_lib_object.h @@ -100,6 +100,10 @@ const struct got_error *got_object_get_path(char **, s struct got_repository *); const struct got_error *got_object_open_loose_fd(int *, struct got_object_id *, struct got_repository *); +const struct got_error *got_object_open_packed(struct got_object **, + struct got_object_id *, struct got_repository *); +const struct got_error *got_object_read_header_privsep(struct got_object **, + struct got_repository *, int); const struct got_error *got_object_open(struct got_object **, struct got_repository *, struct got_object_id *); const struct got_error *got_object_raw_open(struct got_raw_object **, blob - 4dff06d265c9aca4dc4216121372ec79495fcd96 blob + 5c5f3bb280d592509f3a929c1b15d6003d2faa97 --- lib/object.c +++ lib/object.c @@ -332,8 +332,8 @@ read_packed_object_raw_privsep(uint8_t **outbuf, off_t idx, id); } -static const struct got_error * -open_packed_object(struct got_object **obj, struct got_object_id *id, +const struct got_error * +got_object_open_packed(struct got_object **obj, struct got_object_id *id, struct got_repository *repo) { const struct got_error *err = NULL; @@ -450,9 +450,9 @@ start_read_object_child(struct got_repository *repo) return NULL; } -static const struct got_error * -read_object_header_privsep(struct got_object **obj, struct got_repository *repo, - int obj_fd) +const struct got_error * +got_object_read_header_privsep(struct got_object **obj, + struct got_repository *repo, int obj_fd) { const struct got_error *err; @@ -498,7 +498,7 @@ got_object_open(struct got_object **obj, struct got_re return NULL; } - err = open_packed_object(obj, id, repo); + err = got_object_open_packed(obj, id, repo); if (err && err->code != GOT_ERR_NO_OBJ) return err; if (*obj) { @@ -513,7 +513,7 @@ got_object_open(struct got_object **obj, struct got_re return err; } - err = read_object_header_privsep(obj, repo, fd); + err = got_object_read_header_privsep(obj, repo, fd); if (err) return err; @@ -1785,7 +1785,7 @@ open_tag(struct got_tag_object **tag, struct got_repos err = got_object_open_loose_fd(&fd, id, repo); if (err) return err; - err = read_object_header_privsep(&obj, repo, fd); + err = got_object_read_header_privsep(&obj, repo, fd); if (err) return err; obj_type = obj->type; blob - a4af1c4099e0f496d253d2ae12845c1dc18fa809 blob + 66d89c5aedcc5401648b2b7866c1bea733341d2b --- lib/repository_admin.c +++ lib/repository_admin.c @@ -45,12 +45,14 @@ #include "got_lib_delta.h" #include "got_lib_object.h" +#include "got_lib_object_idset.h" #include "got_lib_object_cache.h" #include "got_lib_pack.h" #include "got_lib_privsep.h" #include "got_lib_repository.h" #include "got_lib_pack_create.h" #include "got_lib_sha1.h" +#include "got_lib_lockfile.h" #ifndef nitems #define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) @@ -591,5 +593,590 @@ done: free(packpath); if (packidx) got_packidx_close(packidx); + return err; +} + +static const struct got_error * +get_loose_object_ids(struct got_object_idset **loose_ids, off_t *ondisk_size, + got_cleanup_progress_cb progress_cb, void *progress_arg, + struct got_repository *repo) +{ + const struct got_error *err = NULL; + char *path_objects = NULL, *path = NULL; + DIR *dir = NULL; + struct got_object *obj = NULL; + struct got_object_id id; + int i, fd = -1; + struct stat sb; + + *ondisk_size = 0; + *loose_ids = got_object_idset_alloc(); + if (*loose_ids == NULL) + return got_error_from_errno("got_object_idset_alloc"); + + path_objects = got_repo_get_path_objects(repo); + if (path_objects == NULL) { + err = got_error_from_errno("got_repo_get_path_objects"); + goto done; + } + + for (i = 0; i <= 0xff; i++) { + struct dirent *dent; + + if (asprintf(&path, "%s/%.2x", path_objects, i) == -1) { + err = got_error_from_errno("asprintf"); + break; + } + + dir = opendir(path); + if (dir == NULL) { + if (errno == ENOENT) { + err = NULL; + continue; + } + err = got_error_from_errno2("opendir", path); + break; + } + + while ((dent = readdir(dir)) != NULL) { + char *id_str; + + if (strcmp(dent->d_name, ".") == 0 || + strcmp(dent->d_name, "..") == 0) + continue; + + if (asprintf(&id_str, "%.2x%s", i, dent->d_name) == -1) { + err = got_error_from_errno("asprintf"); + goto done; + } + + memset(&id, 0, sizeof(id)); + if (!got_parse_sha1_digest(id.sha1, id_str)) { + free(id_str); + continue; + } + free(id_str); + + err = got_object_open_loose_fd(&fd, &id, repo); + if (err) + goto done; + if (fstat(fd, &sb) == -1) { + err = got_error_from_errno("fstat"); + goto done; + } + err = got_object_read_header_privsep(&obj, repo, fd); + if (err) + goto done; + fd = -1; /* already closed */ + + switch (obj->type) { + case GOT_OBJ_TYPE_COMMIT: + case GOT_OBJ_TYPE_TREE: + case GOT_OBJ_TYPE_BLOB: + case GOT_OBJ_TYPE_TAG: + break; + default: + err = got_error_fmt(GOT_ERR_OBJ_TYPE, + "%d", obj->type); + goto done; + } + got_object_close(obj); + obj = NULL; + (*ondisk_size) += sb.st_size; + err = got_object_idset_add(*loose_ids, &id, NULL); + if (err) + goto done; + if (progress_cb) { + err = progress_cb(progress_arg, + got_object_idset_num_elements(*loose_ids), + -1, -1); + if (err) + goto done; + } + } + + if (closedir(dir) != 0) { + err = got_error_from_errno("closedir"); + goto done; + } + dir = NULL; + + free(path); + path = NULL; + } +done: + if (dir && closedir(dir) != 0 && err == NULL) + err = got_error_from_errno("closedir"); + if (fd != -1 && close(fd) == -1 && err == NULL) + err = got_error_from_errno("close"); + if (err) { + got_object_idset_free(*loose_ids); + *loose_ids = NULL; + } + if (obj) + got_object_close(obj); + free(path_objects); + free(path); return err; } + +static const struct got_error * +search_packidx(int *found, struct got_object_id *id, + struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_packidx *packidx = NULL; + int idx; + + *found = 0; + + err = got_repo_search_packidx(&packidx, &idx, repo, id); + if (err == NULL) + *found = 1; /* object is already packed */ + else if (err->code == GOT_ERR_NO_OBJ) + err = NULL; + return err; +} + +static const struct got_error * +preserve_loose_object(struct got_object_idset *loose_ids, + struct got_object_id *id, struct got_repository *repo, int *npacked) +{ + const struct got_error *err = NULL; + int is_packed; + + if (!got_object_idset_contains(loose_ids, id)) + return NULL; + + err = search_packidx(&is_packed, id, repo); + if (err) + return err; + if (is_packed) { + struct got_object *obj; + + /* + * Sanity check: Open the packed object to prevent a + * corrupt pack index from misleading us. + */ + err = got_object_open_packed(&obj, id, repo); + if (err == NULL) { + got_object_close(obj); + /* + * The object is referenced and packed. + * We can purge the redundantly stored loose object. + */ + (*npacked)++; + return NULL; + } else if (err->code != GOT_ERR_NO_OBJ) + return err; + } + + /* + * This object is referenced and not packed. + * Remove it from our purge set. + */ + return got_object_idset_remove(NULL, loose_ids, id); +} + +static const struct got_error * +load_tree_entries(struct got_object_id_queue *ids, + struct got_object_idset *loose_ids, + struct got_object_idset *traversed_ids, struct got_object_id *tree_id, + const char *dpath, struct got_repository *repo, int *npacked, + got_cancel_cb cancel_cb, void *cancel_arg) +{ + const struct got_error *err; + struct got_tree_object *tree; + char *p = NULL; + int i; + + err = got_object_open_as_tree(&tree, repo, tree_id); + if (err) + return err; + + for (i = 0; i < got_object_tree_get_nentries(tree); i++) { + struct got_tree_entry *e = got_object_tree_get_entry(tree, i); + struct got_object_id *id = got_tree_entry_get_id(e); + mode_t mode = got_tree_entry_get_mode(e); + + if (cancel_cb) { + err = (*cancel_cb)(cancel_arg); + if (err) + break; + } + + if (got_object_tree_entry_is_symlink(e) || + got_object_tree_entry_is_submodule(e) || + got_object_idset_contains(traversed_ids, id)) + continue; + + if (asprintf(&p, "%s%s%s", dpath, dpath[0] != '\0' ? "/" : "", + got_tree_entry_get_name(e)) == -1) { + err = got_error_from_errno("asprintf"); + break; + } + + if (S_ISDIR(mode)) { + struct got_object_qid *qid; + err = got_object_qid_alloc(&qid, id); + if (err) + break; + STAILQ_INSERT_TAIL(ids, qid, entry); + } else if (S_ISREG(mode)) { + /* This blob is referenced. */ + err = preserve_loose_object(loose_ids, id, repo, + npacked); + if (err) + break; + err = got_object_idset_add(traversed_ids, id, NULL); + if (err) + break; + + } + free(p); + p = NULL; + } + + got_object_tree_close(tree); + free(p); + return err; +} + +static const struct got_error * +load_tree(struct got_object_idset *loose_ids, + struct got_object_idset *traversed_ids, struct got_object_id *tree_id, + const char *dpath, struct got_repository *repo, int *npacked, + got_cancel_cb cancel_cb, void *cancel_arg) +{ + const struct got_error *err = NULL; + struct got_object_id_queue tree_ids; + struct got_object_qid *qid; + + err = got_object_qid_alloc(&qid, tree_id); + if (err) + return err; + + STAILQ_INIT(&tree_ids); + STAILQ_INSERT_TAIL(&tree_ids, qid, entry); + + while (!STAILQ_EMPTY(&tree_ids)) { + if (cancel_cb) { + err = (*cancel_cb)(cancel_arg); + if (err) + break; + } + + qid = STAILQ_FIRST(&tree_ids); + STAILQ_REMOVE_HEAD(&tree_ids, entry); + + if (got_object_idset_contains(traversed_ids, qid->id)) { + got_object_qid_free(qid); + continue; + } + + err = got_object_idset_add(traversed_ids, qid->id, NULL); + if (err) { + got_object_qid_free(qid); + break; + } + + /* This tree is referenced. */ + err = preserve_loose_object(loose_ids, qid->id, repo, npacked); + if (err) + break; + + err = load_tree_entries(&tree_ids, loose_ids, traversed_ids, + qid->id, dpath, repo, npacked, cancel_cb, cancel_arg); + got_object_qid_free(qid); + if (err) + break; + } + + got_object_id_queue_free(&tree_ids); + return err; +} + +static const struct got_error * +load_commit_or_tag(struct got_object_idset *loose_ids, int *ncommits, + int *npacked, struct got_object_idset *traversed_ids, + struct got_object_id *id, struct got_repository *repo, + got_cleanup_progress_cb progress_cb, void *progress_arg, int nloose, + got_cancel_cb cancel_cb, void *cancel_arg) +{ + const struct got_error *err; + struct got_commit_object *commit = NULL; + struct got_tag_object *tag = NULL; + struct got_object_id *tree_id = NULL; + struct got_object_id_queue ids; + struct got_object_qid *qid; + int obj_type; + + err = got_object_qid_alloc(&qid, id); + if (err) + return err; + + STAILQ_INIT(&ids); + STAILQ_INSERT_TAIL(&ids, qid, entry); + + while (!STAILQ_EMPTY(&ids)) { + if (cancel_cb) { + err = (*cancel_cb)(cancel_arg); + if (err) + break; + } + + qid = STAILQ_FIRST(&ids); + STAILQ_REMOVE_HEAD(&ids, entry); + + if (got_object_idset_contains(traversed_ids, qid->id)) { + got_object_qid_free(qid); + qid = NULL; + continue; + } + + err = got_object_idset_add(traversed_ids, qid->id, NULL); + if (err) + break; + + /* This commit or tag is referenced. */ + err = preserve_loose_object(loose_ids, qid->id, repo, npacked); + if (err) + break; + + err = got_object_get_type(&obj_type, repo, qid->id); + if (err) + break; + switch (obj_type) { + case GOT_OBJ_TYPE_COMMIT: + err = got_object_open_as_commit(&commit, repo, qid->id); + if (err) + goto done; + break; + case GOT_OBJ_TYPE_TAG: + err = got_object_open_as_tag(&tag, repo, qid->id); + if (err) + goto done; + break; + default: + /* should not happen */ + err = got_error(GOT_ERR_OBJ_TYPE); + goto done; + } + + /* Find a tree object to scan. */ + if (commit) { + tree_id = got_object_commit_get_tree_id(commit); + } else if (tag) { + obj_type = got_object_tag_get_object_type(tag); + switch (obj_type) { + case GOT_OBJ_TYPE_COMMIT: + err = got_object_open_as_commit(&commit, repo, + got_object_tag_get_object_id(tag)); + if (err) + goto done; + tree_id = got_object_commit_get_tree_id(commit); + break; + case GOT_OBJ_TYPE_TREE: + tree_id = got_object_tag_get_object_id(tag); + break; + default: + /* + * Tag points at something other than a + * commit or tree. Leave this weird tag object + * and the object it points to on disk. + */ + err = got_object_idset_remove(NULL, loose_ids, + qid->id); + if (err && err->code != GOT_ERR_NO_OBJ) + goto done; + err = got_object_idset_remove(NULL, loose_ids, + got_object_tag_get_object_id(tag)); + if (err && err->code != GOT_ERR_NO_OBJ) + goto done; + err = NULL; + break; + } + } + + if (tree_id) { + err = load_tree(loose_ids, traversed_ids, tree_id, "", + repo, npacked, cancel_cb, cancel_arg); + if (err) + break; + } + + if (commit || tag) + (*ncommits)++; /* scanned tags are counted as commits */ + + if (progress_cb) { + err = progress_cb(progress_arg, nloose, *ncommits, -1); + if (err) + break; + } + + if (commit) { + /* Find parent commits to scan. */ + const struct got_object_id_queue *parent_ids; + parent_ids = got_object_commit_get_parent_ids(commit); + err = got_object_id_queue_copy(parent_ids, &ids); + if (err) + break; + got_object_commit_close(commit); + commit = NULL; + } + if (tag) { + got_object_tag_close(tag); + tag = NULL; + } + got_object_qid_free(qid); + qid = NULL; + } +done: + if (qid) + got_object_qid_free(qid); + if (commit) + got_object_commit_close(commit); + if (tag) + got_object_tag_close(tag); + return err; +} + +struct purge_loose_object_arg { + struct got_repository *repo; + got_cleanup_progress_cb progress_cb; + void *progress_arg; + int nloose; + int ncommits; + int npurged; + off_t size_purged; + int dry_run; +}; + +static const struct got_error * +purge_loose_object(struct got_object_id *id, void *data, void *arg) +{ + struct purge_loose_object_arg *a = arg; + const struct got_error *err, *unlock_err = NULL; + char *path = NULL; + int fd = -1; + struct stat sb; + struct got_lockfile *lf = NULL; + + err = got_object_get_path(&path, id, a->repo); + if (err) + return err; + + err = got_object_open_loose_fd(&fd, id, a->repo); + if (err) + goto done; + + if (fstat(fd, &sb) == -1) { + err = got_error_from_errno("fstat"); + goto done; + } + + if (!a->dry_run) { + err = got_lockfile_lock(&lf, path); + if (err) + goto done; + if (unlink(path) == -1) { + err = got_error_from_errno2("unlink", path); + goto done; + } + } + + a->npurged++; + a->size_purged += sb.st_size; + if (a->progress_cb) { + err = a->progress_cb(a->progress_arg, a->nloose, + a->ncommits, a->npurged); + } +done: + if (fd != -1 && close(fd) == -1 && err == NULL) + err = got_error_from_errno("close"); + free(path); + if (lf) + unlock_err = got_lockfile_unlock(lf); + return err ? err : unlock_err; +} + +const struct got_error * +got_repo_purge_unreferenced_loose_objects(struct got_repository *repo, + off_t *size_before, off_t *size_after, int *npacked, int dry_run, + got_cleanup_progress_cb progress_cb, void *progress_arg, + got_cancel_cb cancel_cb, void *cancel_arg) +{ + const struct got_error *err; + struct got_object_idset *loose_ids; + struct got_object_idset *traversed_ids; + struct got_object_id **referenced_ids; + int i, nreferenced, nloose, ncommits = 0; + struct got_reflist_head refs; + struct purge_loose_object_arg arg; + + TAILQ_INIT(&refs); + + *size_before = 0; + *size_after = 0; + *npacked = 0; + + err = get_loose_object_ids(&loose_ids, size_before, + progress_cb, progress_arg, repo); + if (err) + return err; + nloose = got_object_idset_num_elements(loose_ids); + if (nloose == 0) { + got_object_idset_free(loose_ids); + return NULL; + } + + traversed_ids = got_object_idset_alloc(); + if (traversed_ids == NULL) { + err = got_error_from_errno("got_object_idset_alloc"); + goto done; + } + + err = got_ref_list(&refs, repo, "", got_ref_cmp_by_name, NULL); + if (err) + goto done; + + err = get_reflist_object_ids(&referenced_ids, &nreferenced, + (1 << GOT_OBJ_TYPE_COMMIT) | (1 << GOT_OBJ_TYPE_TAG), + &refs, repo, cancel_cb, cancel_arg); + if (err) + goto done; + + for (i = 0; i < nreferenced; i++) { + struct got_object_id *id = referenced_ids[i]; + err = load_commit_or_tag(loose_ids, &ncommits, npacked, + traversed_ids, id, repo, progress_cb, progress_arg, nloose, + cancel_cb, cancel_arg); + if (err) + goto done; + } + + /* Produce a final progress report in case no objects can be purged. */ + if (got_object_idset_num_elements(loose_ids) == 0 && progress_cb) { + err = progress_cb(progress_arg, nloose, ncommits, 0); + if (err) + goto done; + } + + /* Any remaining loose objects are unreferenced and can be purged. */ + arg.repo = repo; + arg.progress_arg = progress_arg; + arg.progress_cb = progress_cb; + arg.nloose = nloose; + arg.npurged = 0; + arg.size_purged = 0; + arg.ncommits = ncommits; + arg.dry_run = dry_run; + err = got_object_idset_for_each(loose_ids, purge_loose_object, &arg); + if (err) + goto done; + *size_after = *size_before - arg.size_purged; +done: + got_object_idset_free(loose_ids); + got_object_idset_free(traversed_ids); + return err; +} blob - 994b3c728968b83c996b8eb7fd40f01354a38383 blob + ef0efe1dbbe320c62fff14e7f2e0b94db43f38ff --- regress/cmdline/Makefile +++ regress/cmdline/Makefile @@ -1,6 +1,6 @@ REGRESS_TARGETS=checkout update status log add rm diff blame branch tag \ ref commit revert cherrypick backout rebase import histedit \ - integrate stage unstage cat clone fetch tree pack + integrate stage unstage cat clone fetch tree pack cleanup NOOBJ=Yes GOT_TEST_ROOT=/tmp @@ -83,4 +83,8 @@ tree: pack: ./pack.sh -q -r "$(GOT_TEST_ROOT)" +cleanup: + ./cleanup.sh -q -r "$(GOT_TEST_ROOT)" + + .include blob - /dev/null blob + d68d9728b364b87e77f221ab917cc0da304d30d2 (mode 755) --- /dev/null +++ regress/cmdline/cleanup.sh @@ -0,0 +1,238 @@ +#!/bin/sh +# +# Copyright (c) 2021 Stefan Sperling +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +. ./common.sh + +test_cleanup_unreferenced_loose_objects() { + local testroot=`test_init cleanup_unreferenced_loose_objects` + + nloose0=`gotadmin info -r $testroot/repo | grep '^loose objects:' | \ + cut -d ':' -f 2 | tr -d ' '` + if [ "$nloose0" != "8" ]; then + echo "unexpected number of loose objects: $nloose0" >&2 + test_done "$testroot" "1" + return 1 + fi + + # create a branch with some changes + got branch -r $testroot/repo newbranch >/dev/null + + got checkout -b newbranch $testroot/repo $testroot/wt >/dev/null + ret="$?" + if [ "$ret" != "0" ]; then + echo "got checkout command failed unexpectedly" + test_done "$testroot" "$ret" + return 1 + fi + + echo 'foo' > $testroot/wt/foo + (cd $testroot/wt && got add foo > /dev/null) + echo 'modified alpha' > $testroot/wt/alpha + (cd $testroot/wt && got commit -m 'newbranch commit' > /dev/null) + local commit1=`git_show_branch_head $testroot/repo newbranch` + local tree1=`got cat -r $testroot/repo $newbranch_commit | \ + grep ^tree | cut -d ' ' -f2` + local alpha1=`got tree -r $testroot/repo -i -c $commit1 | \ + grep "[0-9a-f] alpha$" | cut -d' ' -f 1` + local foo1=`got tree -r $testroot/repo -i -c $commit1 | \ + grep "[0-9a-f] foo$" | cut -d' ' -f 1` + + nloose1=`gotadmin info -r $testroot/repo | grep '^loose objects:' | \ + cut -d ':' -f 2 | tr -d ' '` + if [ "$nloose1" != "12" ]; then + echo "unexpected number of loose objects: $nloose1" >&2 + test_done "$testroot" "1" + return 1 + fi + + # delete the branch + got branch -r $testroot/repo -d newbranch >/dev/null + + # remove worktree's base commit reference, which points at the branch + wt_uuid=`(cd $testroot/wt && got info | grep 'UUID:' | \ + cut -d ':' -f 2 | tr -d ' ')` + got ref -r $testroot/repo -d "refs/got/worktree/base-$wt_uuid" + + # cleanup -n should not remove any objects + ls -1 -R $testroot/repo/.git/objects > $testroot/objects-before + gotadmin cleanup -n -q -r $testroot/repo > $testroot/stdout + echo -n > $testroot/stdout.expected + cmp -s $testroot/stdout.expected $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + ls -1 -R $testroot/repo/.git/objects > $testroot/objects-after + cmp -s $testroot/objects-before $testroot/objects-after + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/objects-before $testroot/objects-after + test_done "$testroot" "$ret" + return 1 + fi + + # cleanup should remove loose objects that belonged to the branch + gotadmin cleanup -q -r $testroot/repo > $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + echo "gotadmin cleanup failed unexpectedly" >&2 + test_done "$testroot" "$ret" + return 1 + fi + echo -n > $testroot/stdout.expected + cmp -s $testroot/stdout.expected $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + nloose2=`gotadmin info -r $testroot/repo | grep '^loose objects:' | \ + cut -d ':' -f 2 | tr -d ' '` + if [ "$nloose2" != "$nloose0" ]; then + echo "unexpected number of loose objects: $nloose2" >&2 + test_done "$testroot" "1" + return 1 + fi + + for id in $commit1 $tree1 $alpha1 $foo1; do + path=`get_loose_object_path $testroot/repo $id` + if [ -e "$path" ]; then + echo "loose object $path was not purged" >&2 + ret=1 + break + fi + done + + test_done "$testroot" "$ret" +} + +test_cleanup_redundant_loose_objects() { + local testroot=`test_init cleanup_redundant_loose_objects` + + # tags should also be packed + got tag -r $testroot/repo -m 1.0 1.0 >/dev/null + + nloose0=`gotadmin info -r $testroot/repo | grep '^loose objects:' | \ + cut -d ':' -f 2 | tr -d ' '` + if [ "$nloose0" != "9" ]; then + echo "unexpected number of loose objects: $nloose0" >&2 + test_done "$testroot" "1" + return 1 + fi + + # no pack files should exist yet + ls $testroot/repo/.git/objects/pack/ > $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + test_done "$testroot" "$ret" + return 1 + fi + echo -n > $testroot/stdout.expected + cmp -s $testroot/stdout.expected $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + gotadmin pack -r $testroot/repo > /dev/null + + npacked0=`gotadmin info -r $testroot/repo | grep '^packed objects:' | \ + cut -d ':' -f 2 | tr -d ' '` + if [ "$npacked0" != "9" ]; then + echo "unexpected number of loose objects: $npacked0" >&2 + test_done "$testroot" "1" + return 1 + fi + + # cleanup -n should not remove any objects + ls -1 -R $testroot/repo/.git/objects > $testroot/objects-before + gotadmin cleanup -n -q -r $testroot/repo > $testroot/stdout + echo -n > $testroot/stdout.expected + cmp -s $testroot/stdout.expected $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + ls -1 -R $testroot/repo/.git/objects > $testroot/objects-after + cmp -s $testroot/objects-before $testroot/objects-after + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/objects-before $testroot/objects-after + test_done "$testroot" "$ret" + return 1 + fi + + nloose1=`gotadmin info -r $testroot/repo | grep '^loose objects:' | \ + cut -d ':' -f 2 | tr -d ' '` + if [ "$nloose1" != "$nloose0" ]; then + echo "unexpected number of loose objects: $nloose1" >&2 + test_done "$testroot" "1" + return 1 + fi + + # cleanup should remove all loose objects + gotadmin cleanup -q -r $testroot/repo > $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + echo "gotadmin cleanup failed unexpectedly" >&2 + test_done "$testroot" "$ret" + return 1 + fi + echo -n > $testroot/stdout.expected + cmp -s $testroot/stdout.expected $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + nloose2=`gotadmin info -r $testroot/repo | grep '^loose objects:' | \ + cut -d ':' -f 2 | tr -d ' '` + if [ "$nloose2" != "0" ]; then + echo "unexpected number of loose objects: $nloose2" >&2 + test_done "$testroot" "1" + return 1 + fi + + for d in $testroot/repo/.git/objects/[0-9a-f][0-9a-f]; do + id0=`basename $d` + ret=0 + for e in `ls $d`; do + obj_id=${id0}${e} + echo "loose object $obj_id was not purged" >&2 + ret=1 + break + done + if [ "$ret" == "1" ]; then + break + fi + done + + test_done "$testroot" "$ret" +} + +test_parseargs "$@" +run_test test_cleanup_unreferenced_loose_objects +run_test test_cleanup_redundant_loose_objects