commit 1124fe406f09a7d5d995da60e6251e3440226d40 from: Stefan Sperling date: Wed Jul 07 17:21:52 2021 UTC handle pack index files which lack a corresponding pack file ok millert commit - aea75d87862c3ab54d0b8c5e5e280e13efe8ec4a commit + 1124fe406f09a7d5d995da60e6251e3440226d40 blob - 1290b7007bf14c58dc88fc618969d59fe988b94a blob + 7447f80c0b353f5e721127e74978c14924336573 --- gotadmin/gotadmin.1 +++ gotadmin/gotadmin.1 @@ -181,7 +181,7 @@ and a break-down of the number of objects per object t .It Cm ls Short alias for .Cm listpack . -.It Cm cleanup Oo Fl n Oc Oo Fl r Ar repository-path Oc Oo Fl q Oc +.It Cm cleanup Oo Fl p Oc Oo Fl n Oc Oo Fl r Ar repository-path Oc Oo Fl q Oc Purge unreferenced loose objects from the repository and display the amount of disk space which has been freed as a result. .Pp @@ -238,6 +238,16 @@ will only purge corresponding objects once such refere deleted with .Cm got ref -d . .Pp +Some Git repositories contain pack index files which lack a corresponding +pack file, which is an inconsistent repository state. +In such cases, +.Cm gotadmin cleanup -p -n +will display a list of affected pack index files. +Whenever possible the missing pack files should be restored. +If restoring missing pack files is not possible then affected pack index +files can be removed with +.Cm gotadmin cleanup -p . +.Pp The .Dq preciousObjects Git extension is intended to prevent the removal of objects from a repository. @@ -248,9 +258,12 @@ The options for .Cm gotadmin cleanup are as follows: .Bl -tag -width Ds +.It Fl p +Instead of purging unreferenced loose objects, remove any pack index files +which do not have a corresponding pack file. .It Fl n Display the usual progress output and summary information but do not actually -purge any objects. +remove any files from disk. .It Fl r Ar repository-path Use the repository at the specified path. If not specified, assume the repository is located at or above the current blob - 3a8fe2211096a986460483094705912532891e8e blob + 810a5cc02697494c98449bfb5e56bfdb7b79349e --- gotadmin/gotadmin.c +++ gotadmin/gotadmin.c @@ -897,8 +897,8 @@ done: __dead static void usage_cleanup(void) { - fprintf(stderr, "usage: %s cleanup [-n] [-r repository-path] [-q]\n", - getprogname()); + fprintf(stderr, "usage: %s cleanup [-p] [-n] [-r repository-path] " + "[-q]\n", getprogname()); exit(1); } @@ -959,14 +959,39 @@ cleanup_progress(void *arg, int nloose, int ncommits, return NULL; } +struct got_lonely_packidx_progress_arg { + int verbosity; + int printed_something; + int dry_run; +}; + static const struct got_error * +lonely_packidx_progress(void *arg, const char *path) +{ + struct got_lonely_packidx_progress_arg *a = arg; + + if (a->verbosity < 0) + return NULL; + + if (a->dry_run) + printf("%s could be removed\n", path); + else + printf("%s removed\n", path); + + a->printed_something = 1; + return NULL; +} + +static const struct got_error * cmd_cleanup(int argc, char *argv[]) { const struct got_error *error = NULL; char *cwd = NULL, *repo_path = NULL; struct got_repository *repo = NULL; int ch, dry_run = 0, npacked = 0, verbosity = 0; + int remove_lonely_packidx = 0; struct got_cleanup_progress_arg cpa; + struct got_lonely_packidx_progress_arg lpa; off_t size_before, size_after; char scaled_before[FMT_SCALED_STRSIZE]; char scaled_after[FMT_SCALED_STRSIZE]; @@ -974,8 +999,11 @@ cmd_cleanup(int argc, char *argv[]) char **extensions; int nextensions, i; - while ((ch = getopt(argc, argv, "r:nq")) != -1) { + while ((ch = getopt(argc, argv, "pr:nq")) != -1) { switch (ch) { + case 'p': + remove_lonely_packidx = 1; + break; case 'r': repo_path = realpath(optarg, NULL); if (repo_path == NULL) @@ -1028,6 +1056,15 @@ cmd_cleanup(int argc, char *argv[]) } } + if (remove_lonely_packidx) { + memset(&lpa, 0, sizeof(lpa)); + lpa.dry_run = dry_run; + lpa.verbosity = verbosity; + error = got_repo_remove_lonely_packidx(repo, dry_run, + lonely_packidx_progress, &lpa, check_cancelled, NULL); + goto done; + } + memset(&cpa, 0, sizeof(cpa)); cpa.last_ncommits = -1; cpa.last_npurged = -1; blob - 851d4ad4128c5bcac0cd5616088fb5063844a83f blob + 48165fc4dfb98dc55aa9bb49ceea9ff107e8fc26 --- include/got_error.h +++ include/got_error.h @@ -146,6 +146,7 @@ #define GOT_ERR_BAD_SYMLINK 129 #define GOT_ERR_GIT_REPO_EXT 130 #define GOT_ERR_CANNOT_PACK 131 +#define GOT_ERR_LONELY_PACKIDX 132 static const struct got_error { int code; @@ -299,6 +300,7 @@ static const struct got_error { "version control" }, { GOT_ERR_GIT_REPO_EXT, "unsupported repository format extension" }, { GOT_ERR_CANNOT_PACK, "not enough objects to pack" }, + { GOT_ERR_LONELY_PACKIDX, "pack index has no corresponding pack file" }, }; /* blob - 99fa6426ad538680697e303f69032b8c33f9266a blob + d1e9be3e619d56a5e9e4569edfdc2cf9071ae49c --- include/got_repository_admin.h +++ include/got_repository_admin.h @@ -85,3 +85,13 @@ got_repo_purge_unreferenced_loose_objects(struct got_r off_t *size_before, off_t *size_after, int *npacked, int dry_run, got_cleanup_progress_cb progress_cb, void *progress_arg, got_cancel_cb cancel_cb, void *cancel_arg); + +/* A callback function which gets invoked with cleanup information to print. */ +typedef const struct got_error *(*got_lonely_packidx_progress_cb)(void *arg, + const char *path); + +/* Remove pack index files which do not have a corresponding pack file. */ +const struct got_error * +got_repo_remove_lonely_packidx(struct got_repository *repo, int dry_run, + got_lonely_packidx_progress_cb progress_cb, void *progress_arg, + got_cancel_cb cancel_cb, void *cancel_arg); blob - e26649f47637e11dd9477fd50158d147bac28aad blob + 7f21b68ba60596d4c90ebe7b6ea9618f08bb68f4 --- lib/got_lib_repository.h +++ lib/got_lib_repository.h @@ -93,6 +93,7 @@ const struct got_error*got_repo_cache_tag(struct got_r struct got_object_id *, struct got_tag_object *); struct got_tag_object *got_repo_get_cached_tag(struct got_repository *, struct got_object_id *); +int got_repo_is_packidx_filename(const char *, size_t); const struct got_error *got_repo_search_packidx(struct got_packidx **, int *, struct got_repository *, struct got_object_id *); const struct got_error *got_repo_cache_pack(struct got_pack **, blob - 9bb2b1a4ab231495841a9af5bbea8d25a8d485fa blob + 9d93bee5d0a3def590556db70197db9b3a2b4651 --- lib/pack.c +++ lib/pack.c @@ -332,34 +332,55 @@ got_packidx_open(struct got_packidx **packidx, int dir_fd, const char *relpath, int verify) { const struct got_error *err = NULL; - struct got_packidx *p; + struct got_packidx *p = NULL; + char *pack_relpath; struct stat sb; *packidx = NULL; + + err = got_packidx_get_packfile_path(&pack_relpath, relpath); + if (err) + return err; + + /* + * Ensure that a corresponding pack file exists. + * Some Git repositories have this problem. Git seems to ignore + * the existence of lonely pack index files but we do not. + */ + if (fstatat(dir_fd, pack_relpath, &sb, 0) == -1) { + if (errno == ENOENT) { + err = got_error_fmt(GOT_ERR_LONELY_PACKIDX, + "%s", relpath); + } else + err = got_error_from_errno2("fstatat", pack_relpath); + goto done; + } p = calloc(1, sizeof(*p)); - if (p == NULL) - return got_error_from_errno("calloc"); + if (p == NULL) { + err = got_error_from_errno("calloc"); + goto done; + } p->fd = openat(dir_fd, relpath, O_RDONLY | O_NOFOLLOW); if (p->fd == -1) { err = got_error_from_errno2("openat", relpath); free(p); - return err; + goto done; } if (fstat(p->fd, &sb) != 0) { err = got_error_from_errno2("fstat", relpath); close(p->fd); free(p); - return err; + goto done; } p->len = sb.st_size; if (p->len < sizeof(p->hdr)) { err = got_error(GOT_ERR_BAD_PACKIDX); close(p->fd); free(p); - return err; + goto done; } p->path_packidx = strdup(relpath); @@ -381,11 +402,12 @@ got_packidx_open(struct got_packidx **packidx, err = got_packidx_init_hdr(p, verify); done: - if (err) - got_packidx_close(p); - else + if (err) { + if (p) + got_packidx_close(p); + } else *packidx = p; - + free(pack_relpath); return err; } blob - 15c0782632559de89351d64aec03314766fc7a79 blob + 3d857e3a7784f358729bd0b93b349fa77eaa1c10 --- lib/repository.c +++ lib/repository.c @@ -938,8 +938,8 @@ cache_packidx(struct got_repository *repo, struct got_ return NULL; } -static int -is_packidx_filename(const char *name, size_t len) +int +got_repo_is_packidx_filename(const char *name, size_t len) { if (len != GOT_PACKIDX_NAMELEN) return 0; @@ -1008,7 +1008,7 @@ got_repo_search_packidx(struct got_packidx **packidx, while ((dent = readdir(packdir)) != NULL) { int is_cached = 0; - if (!is_packidx_filename(dent->d_name, dent->d_namlen)) + if (!got_repo_is_packidx_filename(dent->d_name, dent->d_namlen)) continue; if (asprintf(&path_packidx, "%s/%s", GOT_OBJECTS_PACK_DIR, @@ -1276,7 +1276,7 @@ match_packed_object(struct got_object_id **unique_id, struct got_packidx *packidx; struct got_object_qid *qid; - if (!is_packidx_filename(dent->d_name, dent->d_namlen)) + if (!got_repo_is_packidx_filename(dent->d_name, dent->d_namlen)) continue; if (asprintf(&path_packidx, "%s/%s", GOT_OBJECTS_PACK_DIR, @@ -1952,7 +1952,7 @@ got_repo_get_packfile_info(int *npackfiles, int *nobje } while ((dent = readdir(packdir)) != NULL) { - if (!is_packidx_filename(dent->d_name, dent->d_namlen)) + if (!got_repo_is_packidx_filename(dent->d_name, dent->d_namlen)) continue; if (asprintf(&path_packidx, "%s/%s", GOT_OBJECTS_PACK_DIR, blob - c5017d50e0b3fa7053f7d9159a76ee0467cf926a blob + 42e4e1a923ac6b0970e0e49aaa75dcb4458deb0c --- lib/repository_admin.c +++ lib/repository_admin.c @@ -1155,5 +1155,105 @@ got_repo_purge_unreferenced_loose_objects(struct got_r done: got_object_idset_free(loose_ids); got_object_idset_free(traversed_ids); + return err; +} + +static const struct got_error * +remove_packidx(int dir_fd, const char *relpath) +{ + const struct got_error *err, *unlock_err; + struct got_lockfile *lf; + + err = got_lockfile_lock(&lf, relpath, dir_fd); + if (err) + return err; + if (unlinkat(dir_fd, relpath, 0) == -1) + err = got_error_from_errno("unlinkat"); + unlock_err = got_lockfile_unlock(lf, dir_fd); + return err ? err : unlock_err; +} + +const struct got_error * +got_repo_remove_lonely_packidx(struct got_repository *repo, int dry_run, + got_lonely_packidx_progress_cb progress_cb, void *progress_arg, + got_cancel_cb cancel_cb, void *cancel_arg) +{ + const struct got_error *err; + DIR *packdir = NULL; + struct dirent *dent; + char *pack_relpath = NULL; + int packdir_fd; + struct stat sb; + + packdir_fd = openat(got_repo_get_fd(repo), + GOT_OBJECTS_PACK_DIR, O_DIRECTORY); + if (packdir_fd == -1) { + if (errno == ENOENT) + return NULL; + return got_error_from_errno_fmt("openat: %s/%s", + got_repo_get_path_git_dir(repo), + GOT_OBJECTS_PACK_DIR); + } + + packdir = fdopendir(packdir_fd); + if (packdir == NULL) { + err = got_error_from_errno("fdopendir"); + goto done; + } + + while ((dent = readdir(packdir)) != NULL) { + if (cancel_cb) { + err = cancel_cb(cancel_arg); + if (err) + goto done; + } + + if (!got_repo_is_packidx_filename(dent->d_name, dent->d_namlen)) + continue; + + err = got_packidx_get_packfile_path(&pack_relpath, + dent->d_name); + if (err) + goto done; + + if (fstatat(packdir_fd, pack_relpath, &sb, 0) != -1) { + free(pack_relpath); + pack_relpath = NULL; + continue; + } + if (errno != ENOENT) { + err = got_error_from_errno_fmt("fstatat: %s/%s/%s", + got_repo_get_path_git_dir(repo), + GOT_OBJECTS_PACK_DIR, + pack_relpath); + goto done; + } + + if (!dry_run) { + err = remove_packidx(packdir_fd, dent->d_name); + if (err) + goto done; + } + if (progress_cb) { + char *path; + if (asprintf(&path, "%s/%s/%s", + got_repo_get_path_git_dir(repo), + GOT_OBJECTS_PACK_DIR, + dent->d_name) == -1) { + err = got_error_from_errno("asprintf"); + goto done; + } + err = progress_cb(progress_arg, path); + free(path); + if (err) + goto done; + } + free(pack_relpath); + pack_relpath = NULL; + } +done: + if (packdir && closedir(packdir) != 0 && err == NULL) + err = got_error_from_errno("closedir"); + free(pack_relpath); return err; } blob - b39f8ac5681099dac17303be8a1288dc93f8b2f1 blob + aa49076f7354432812d339a4a4f80a670dbbcacc --- regress/cmdline/cleanup.sh +++ regress/cmdline/cleanup.sh @@ -255,12 +255,107 @@ test_cleanup_precious_objects() { echo -n "gotadmin: the preciousObjects Git extension is enabled; " \ > $testroot/stderr.expected echo "this implies that objects must not be deleted" \ + >> $testroot/stderr.expected + cmp -s $testroot/stderr.expected $testroot/stderr + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/stderr.expected $testroot/stderr + fi + test_done "$testroot" "$ret" +} + +test_cleanup_missing_pack_file() { + local testroot=`test_init cleanup_missing_pack_file` + + # no pack files should exist yet + ls $testroot/repo/.git/objects/pack/ > $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + test_done "$testroot" "$ret" + return 1 + fi + echo -n > $testroot/stdout.expected + cmp -s $testroot/stdout.expected $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + gotadmin pack -r $testroot/repo > $testroot/stdout + packname=`grep ^Wrote $testroot/stdout | cut -d ' ' -f2` + packhash=`echo $packname | sed -e 's:^objects/pack/pack-::' \ + -e 's/.pack$//'` + + # Some freshly cloned Git repositories suffer from lonely pack index + # files. Remove the pack file we just wrote to simulate this issue. + rm $testroot/repo/.git/objects/pack/pack-$packname + + # cleanup should now refuse to purge objects + gotadmin cleanup -q -r $testroot/repo > $testroot/stdout \ + 2> $testroot/stderr + ret="$?" + if [ "$ret" == "0" ]; then + echo "gotadmin cleanup succeeded unexpectedly" >&2 + test_done "$testroot" "1" + return 1 + fi + + echo -n "gotadmin: objects/pack/pack-${packhash}.idx: " \ + > $testroot/stderr.expected + echo "pack index has no corresponding pack file" \ >> $testroot/stderr.expected cmp -s $testroot/stderr.expected $testroot/stderr ret="$?" if [ "$ret" != "0" ]; then diff -u $testroot/stderr.expected $testroot/stderr + test_done "$testroot" "$ret" + return 1 fi + + gotadmin cleanup -r $testroot/repo -p -n > $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + echo "gotadmin cleanup failed unexpectedly" >&2 + test_done "$testroot" "$ret" + return 1 + fi + packidx_path=$testroot/repo/.git/objects/pack/pack-${packhash}.idx + echo "$packidx_path could be removed" > $testroot/stdout.expected + cmp -s $testroot/stdout.expected $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + gotadmin cleanup -r $testroot/repo -p > $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + echo "gotadmin cleanup failed unexpectedly" >&2 + test_done "$testroot" "$ret" + return 1 + fi + echo "$packidx_path removed" > $testroot/stdout.expected + cmp -s $testroot/stdout.expected $testroot/stdout + ret="$?" + if [ "$ret" != "0" ]; then + diff -u $testroot/stdout.expected $testroot/stdout + test_done "$testroot" "$ret" + return 1 + fi + + # cleanup should now attempt to purge objects + gotadmin cleanup -q -r $testroot/repo > $testroot/stdout \ + 2> $testroot/stderr + ret="$?" + if [ "$ret" != "0" ]; then + echo "gotadmin cleanup failed unexpectedly" >&2 + test_done "$testroot" "1" + return 1 + fi test_done "$testroot" "$ret" } @@ -268,3 +363,4 @@ test_parseargs "$@" run_test test_cleanup_unreferenced_loose_objects run_test test_cleanup_redundant_loose_objects run_test test_cleanup_precious_objects +run_test test_cleanup_missing_pack_file