commit 4c9641fdb639da1d7ad1bde82b3010e5ac1e096f from: Stefan Sperling date: Wed Aug 21 21:49:33 2019 UTC fix and simplify blame algorithm Always diff against latest version of file. This is much easier since there is no need to keep track of lines shifting around. commit - 90f3c347a65c36b0c585d582122ea614a032a2b0 commit + 4c9641fdb639da1d7ad1bde82b3010e5ac1e096f blob - 466e02e99fc3725e8f67e89c91c81439deb00602 blob + 4d887779da3647bcf38fded4c8db1d120fb3aaaf --- got/Makefile +++ got/Makefile @@ -3,7 +3,7 @@ .include "../got-version.mk" PROG= got -SRCS= got.c blame.c commit_graph.c delta.c diff.c diffoffset.c \ +SRCS= got.c blame.c commit_graph.c delta.c diff.c \ diffreg.c error.c fileindex.c object.c object_cache.c \ object_idset.c object_parse.c opentemp.c path.c pack.c \ privsep.c reference.c repository.c sha1.c worktree.c \ blob - df12bfb1268e9700fcdafe8129ce1e07f5248a33 blob + ba63ad853919fff5d18e5c571083a54b36b6cc62 --- lib/blame.c +++ lib/blame.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Stefan Sperling + * Copyright (c) 2018, 2019 Stefan Sperling * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -34,7 +34,6 @@ #include "got_lib_delta.h" #include "got_lib_object.h" #include "got_lib_diff.h" -#include "got_lib_diffoffset.h" #include "got_commit_graph.h" struct got_blame_line { @@ -42,14 +41,6 @@ struct got_blame_line { struct got_object_id id; }; -struct got_blame_diff_offsets { - struct got_diffoffset_chunks *chunks; - struct got_object_id *commit_id; - SLIST_ENTRY(got_blame_diff_offsets) entry; -}; - -SLIST_HEAD(got_blame_diff_offsets_list, got_blame_diff_offsets); - struct got_blame { FILE *f; size_t filesize; @@ -58,46 +49,9 @@ struct got_blame { struct got_blame_line *lines; /* one per line */ off_t *line_offsets; /* one per line */ int ncommits; - struct got_blame_diff_offsets_list diff_offsets_list; }; -static void -free_diff_offsets(struct got_blame_diff_offsets *diff_offsets) -{ - if (diff_offsets->chunks) - got_diffoffset_free(diff_offsets->chunks); - free(diff_offsets->commit_id); - free(diff_offsets); -} - static const struct got_error * -alloc_diff_offsets(struct got_blame_diff_offsets **diff_offsets, - struct got_object_id *commit_id) -{ - const struct got_error *err = NULL; - - *diff_offsets = calloc(1, sizeof(**diff_offsets)); - if (*diff_offsets == NULL) - return got_error_from_errno("calloc"); - - (*diff_offsets)->commit_id = got_object_id_dup(commit_id); - if ((*diff_offsets)->commit_id == NULL) { - err = got_error_from_errno("got_object_id_dup"); - free_diff_offsets(*diff_offsets); - *diff_offsets = NULL; - return err; - } - - err = got_diffoffset_alloc(&(*diff_offsets)->chunks); - if (err) { - free_diff_offsets(*diff_offsets); - return err; - } - - return NULL; -} - -static const struct got_error * annotate_line(struct got_blame *blame, int lineno, struct got_object_id *id, const struct got_error *(*cb)(void *, int, int, struct got_object_id *), void *arg) @@ -120,18 +74,6 @@ annotate_line(struct got_blame *blame, int lineno, str return err; } -static int -get_blamed_line(struct got_blame_diff_offsets_list *diff_offsets_list, - int lineno) -{ - struct got_blame_diff_offsets *diff_offsets; - - SLIST_FOREACH(diff_offsets, diff_offsets_list, entry) - lineno = got_diffoffset_get(diff_offsets->chunks, lineno); - - return lineno; -} - static const struct got_error * blame_changes(struct got_blame *blame, struct got_diff_changes *changes, struct got_object_id *commit_id, @@ -140,72 +82,49 @@ blame_changes(struct got_blame *blame, struct got_diff { const struct got_error *err = NULL; struct got_diff_change *change; - struct got_blame_diff_offsets *diff_offsets; SIMPLEQ_FOREACH(change, &changes->entries, entry) { int c = change->cv.c; int d = change->cv.d; - int new_lineno = c; + int new_lineno = (c < d ? c : d); int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0)); int ln; for (ln = new_lineno; ln < new_lineno + new_length; ln++) { - err = annotate_line(blame, - get_blamed_line(&blame->diff_offsets_list, ln), - commit_id, cb, arg); + err = annotate_line(blame, ln, commit_id, cb, arg); if (err) return err; if (blame->nlines == blame->nannotated) - return NULL; + break; } } - err = alloc_diff_offsets(&diff_offsets, commit_id); - if (err) - return err; - SIMPLEQ_FOREACH(change, &changes->entries, entry) { - int a = change->cv.a; - int b = change->cv.b; - int c = change->cv.c; - int d = change->cv.d; - int old_lineno = a; - int old_length = (a < b ? b - a + 1 : (a == b ? 1 : 0)); - int new_lineno = c; - int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0)); - - err = got_diffoffset_add(diff_offsets->chunks, - old_lineno, old_length, new_lineno, new_length); - if (err) { - free_diff_offsets(diff_offsets); - return err; - } - } - SLIST_INSERT_HEAD(&blame->diff_offsets_list, diff_offsets, entry); - return NULL; } static const struct got_error * -blame_commit(struct got_blame *blame, struct got_object_id *id, - const char *path, struct got_repository *repo, +blame_commit(struct got_blame *blame, struct got_object_id *parent_id, + struct got_object_id *id, const char *path, struct got_repository *repo, const struct got_error *(*cb)(void *, int, int, struct got_object_id *), void *arg) { const struct got_error *err = NULL; struct got_object *obj = NULL, *pobj = NULL; - struct got_object_id *obj_id = NULL, *pobj_id = NULL; + struct got_object_id *obj_id = NULL; struct got_commit_object *commit = NULL; - struct got_blob_object *blob = NULL, *pblob = NULL; + struct got_blob_object *blob = NULL; struct got_diff_changes *changes = NULL; - struct got_object_qid *pid = NULL; - err = got_object_open_as_commit(&commit, repo, id); + err = got_object_open_as_commit(&commit, repo, parent_id); if (err) return err; - err = got_object_id_by_path(&obj_id, repo, id, path); - if (err) + err = got_object_id_by_path(&obj_id, repo, parent_id, path); + if (err) { + if (err->code == GOT_ERR_NO_TREE_ENTRY) + err = NULL; goto done; + } err = got_object_open(&obj, repo, obj_id); if (err) @@ -216,47 +135,17 @@ blame_commit(struct got_blame *blame, struct got_objec goto done; } - pid = SIMPLEQ_FIRST(got_object_commit_get_parent_ids(commit)); - if (pid) { - err = got_object_id_by_path(&pobj_id, repo, pid->id, path); - if (err) { - if (err->code == GOT_ERR_NO_TREE_ENTRY) { - /* Blob's history began in previous commit. */ - err = got_error(GOT_ERR_ITER_COMPLETED); - } - goto done; - } - - /* If IDs match then don't bother with diffing. */ - if (got_object_id_cmp(obj_id, pobj_id) == 0) { - if (cb) - err = cb(arg, blame->nlines, -1, id); - goto done; - } - - err = got_object_open(&pobj, repo, pobj_id); - if (err) - goto done; - - if (pobj->type != GOT_OBJ_TYPE_BLOB) { - /* - * Encountered a non-blob at the path (probably a tree). - * Blob's history began in previous commit. - */ - err = got_error(GOT_ERR_ITER_COMPLETED); - goto done; - } - - err = got_object_blob_open(&pblob, repo, pobj, 8192); - if (err) - goto done; - } - err = got_object_blob_open(&blob, repo, obj, 8192); if (err) goto done; - err = got_diff_blob_lines_changed(&changes, pblob, blob); + if (fseek(blame->f, 0L, SEEK_SET) == -1) { + err = got_ferror(blame->f, GOT_ERR_IO); + goto done; + } + + err = got_diff_blob_file_lines_changed(&changes, blob, blame->f, + blame->filesize); if (err) goto done; @@ -269,15 +158,12 @@ done: if (commit) got_object_commit_close(commit); free(obj_id); - free(pobj_id); if (obj) got_object_close(obj); if (pobj) got_object_close(pobj); if (blob) got_object_blob_close(blob); - if (pblob) - got_object_blob_close(pblob); return err; } @@ -285,16 +171,10 @@ static const struct got_error * blame_close(struct got_blame *blame) { const struct got_error *err = NULL; - struct got_blame_diff_offsets *diff_offsets; if (blame->f && fclose(blame->f) != 0) err = got_error_from_errno("fclose"); free(blame->lines); - while (!SLIST_EMPTY(&blame->diff_offsets_list)) { - diff_offsets = SLIST_FIRST(&blame->diff_offsets_list); - SLIST_REMOVE_HEAD(&blame->diff_offsets_list, entry); - free_diff_offsets(diff_offsets); - } free(blame); return err; } @@ -310,7 +190,7 @@ blame_open(struct got_blame **blamep, const char *path struct got_object_id *obj_id = NULL; struct got_blob_object *blob = NULL; struct got_blame *blame = NULL; - struct got_object_id *id = NULL, *next_id = NULL; + struct got_object_id *id = NULL, *pid = NULL; int lineno; struct got_commit_graph *graph = NULL; @@ -363,17 +243,12 @@ blame_open(struct got_blame **blamep, const char *path err = got_commit_graph_iter_start(graph, start_commit_id, repo); if (err) goto done; - - id = NULL; + id = start_commit_id; for (;;) { - err = got_commit_graph_iter_next(&next_id, graph); + err = got_commit_graph_iter_next(&pid, graph); if (err) { if (err->code == GOT_ERR_ITER_COMPLETED) { - if (id) - err = blame_commit(blame, id, - path, repo, cb, arg); - else - err = NULL; + err = NULL; break; } if (err->code != GOT_ERR_ITER_NEED_MORE) @@ -383,9 +258,8 @@ blame_open(struct got_blame **blamep, const char *path break; continue; } - if (id) { - err = blame_commit(blame, id, path, repo, - cb, arg); + if (pid) { + err = blame_commit(blame, pid, id, path, repo, cb, arg); if (err) { if (err->code == GOT_ERR_ITER_COMPLETED) err = NULL; @@ -394,7 +268,7 @@ blame_open(struct got_blame **blamep, const char *path if (blame->nannotated == blame->nlines) break; } - id = next_id; + id = pid; } if (id && blame->nannotated < blame->nlines) { blob - 6d6f3dec0067739174d0bd9beae75cd22a7d7fe9 blob + 3c3c3397cc0c1896e190ac74c4ac314c5dc6e550 --- lib/diff.c +++ lib/diff.c @@ -230,6 +230,14 @@ got_diff_blob_file(struct got_blob_object *blob1, cons { return diff_blob_file(NULL, blob1, label1, f2, size2, label2, diff_context, outfile); +} + +const struct got_error * +got_diff_blob_file_lines_changed(struct got_diff_changes **changes, + struct got_blob_object *blob1, FILE *f2, size_t size2) +{ + return diff_blob_file(changes, blob1, NULL, f2, size2, NULL, + 0, NULL); } const struct got_error * blob - 989570511770f9cdb9ebc76d1a75d0b30e88f363 (mode 644) blob + /dev/null --- lib/diffoffset.c +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2018 Stefan Sperling - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include -#include - -#include -#include -#include -#include - -#include "got_object.h" - -#include "got_error.h" -#include "got_lib_delta.h" -#include "got_lib_inflate.h" -#include "got_lib_object.h" -#include "got_lib_diffoffset.h" - -/* - * A line offset between an old file and a new file, derived from diff chunk - * header info @@ -old_lineno,old_length +new_lineno,new_length @@ in a diff - * with zero context lines (as in diff -U0 old-file new-file). - */ -struct got_diffoffset_chunk { - int lineno; /* first line which has shifted */ - int offset; /* applies to subsequent lines until next chunk */ - SIMPLEQ_ENTRY(got_diffoffset_chunk) entry; -}; - -static struct got_diffoffset_chunk * -alloc_chunk(int lineno, int offset) -{ - struct got_diffoffset_chunk *chunk; - - chunk = calloc(1, sizeof(*chunk)); - if (chunk == NULL) - return NULL; - - chunk->lineno = lineno; - chunk->offset = offset; - - return chunk; -} - -const struct got_error * -got_diffoffset_alloc(struct got_diffoffset_chunks **chunks) -{ - const struct got_error *err = NULL; - - *chunks = calloc(1, sizeof(**chunks)); - if (*chunks == NULL) { - err = got_error_from_errno("calloc"); - return err; - } - - SIMPLEQ_INIT(*chunks); - return NULL; -} - -void -got_diffoffset_free(struct got_diffoffset_chunks *chunks) -{ - struct got_diffoffset_chunk *chunk; - - while (!SIMPLEQ_EMPTY(chunks)) { - chunk = SIMPLEQ_FIRST(chunks); - SIMPLEQ_REMOVE_HEAD(chunks, entry); - free(chunk); - } - free(chunks); -} - -const struct got_error * -add_chunk(struct got_diffoffset_chunks *chunks, int lineno, int offset) -{ - struct got_diffoffset_chunk *chunk; - - chunk = alloc_chunk(lineno, offset); - if (chunk == NULL) - return got_error_from_errno("alloc_chunk"); - - SIMPLEQ_INSERT_TAIL(chunks, chunk, entry); - return NULL; -} - - -const struct got_error * -got_diffoffset_add(struct got_diffoffset_chunks *chunks, - int old_lineno, int old_length, int new_lineno, int new_length) -{ - const struct got_error *err = NULL; - int offset; - - if (old_length != 0) { - offset = new_lineno - old_lineno; - if (offset != 0) { - err = add_chunk(chunks, old_lineno, offset); - if (err) - return err; - } - } else { - offset = new_length; - if (offset != 0) { - err = add_chunk(chunks, old_lineno, offset); - if (err) - return err; - } - if (old_lineno == new_lineno) - return NULL; - } - - offset = new_lineno - old_lineno + new_length - old_length; - if (offset != 0) - err = add_chunk(chunks, old_lineno + new_length, offset); - - return err; -} - -int -got_diffoffset_get(struct got_diffoffset_chunks *chunks, int lineno) -{ - struct got_diffoffset_chunk *chunk; - int offset = 0; - - SIMPLEQ_FOREACH(chunk, chunks, entry) { - if (chunk->lineno > lineno) - break; - offset += chunk->offset; - } - - return lineno + offset; -} blob - ce84a9b18f49d7778cef958719f92c6988e77c8c blob + f3ddb3fb288bda4b679437664c0f0f403351969d --- lib/got_lib_diff.h +++ lib/got_lib_diff.h @@ -144,6 +144,8 @@ const struct got_error *got_diffreg(int *, FILE *, const struct got_error *got_diff_blob_lines_changed(struct got_diff_changes **, struct got_blob_object *, struct got_blob_object *); +const struct got_error *got_diff_blob_file_lines_changed(struct got_diff_changes **, + struct got_blob_object *, FILE *, size_t); void got_diff_free_changes(struct got_diff_changes *); const struct got_error *got_merge_diff3(int *, int, const char *, const char *, blob - 705d33a582f9bfabcaf7d4aac05826d8123084bd (mode 644) blob + /dev/null --- lib/got_lib_diffoffset.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2018 Stefan Sperling - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -SIMPLEQ_HEAD(got_diffoffset_chunks, got_diffoffset_chunk); - -const struct got_error *got_diffoffset_alloc(struct got_diffoffset_chunks **); -void got_diffoffset_free(struct got_diffoffset_chunks *); -const struct got_error *got_diffoffset_add(struct got_diffoffset_chunks *, - int, int, int, int); -int got_diffoffset_get(struct got_diffoffset_chunks *, int); blob - 7ad08ea695569ef87b08505a096a53947d72ad92 blob + 194bcdf0178508b25beca839c298348310aa4588 --- regress/cmdline/blame.sh +++ regress/cmdline/blame.sh @@ -676,12 +676,8 @@ EOF (cd $testroot/wt && patch < blame-5.patch > /dev/null) (cd $testroot/wt && got commit -m "change 5" > /dev/null) - blame_cmp "$testroot" "got_blame.h" xfail + blame_cmp "$testroot" "got_blame.h" ret="$?" - if [ "$ret" != 0 ]; then - test_done "$testroot" "xfail: lines 12, 13, 16 wrongly annotated" - return 1 - fi test_done "$testroot" "$ret" } blob - 2c808a83b9f499bd124932936473fc60205bc0ce blob + 85f9a8965833c7ec25df706646762df09e50327b --- tog/Makefile +++ tog/Makefile @@ -3,7 +3,7 @@ .include "../got-version.mk" PROG= tog -SRCS= tog.c blame.c commit_graph.c delta.c diff.c diffoffset.c \ +SRCS= tog.c blame.c commit_graph.c delta.c diff.c \ diffreg.c error.c fileindex.c object.c object_cache.c \ object_idset.c object_parse.c opentemp.c path.c pack.c \ privsep.c reference.c repository.c sha1.c worktree.c \