diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 735cf07b20..0ce916a188 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -409,6 +409,48 @@ Note that without '\--full-history', this still simplifies merges: if one of the parents is TREESAME, we follow only that one, so the other sides of the merge are never walked. +Finally, there is a fourth simplification mode available: + +--simplify-merges:: + + First, build a history graph in the same way that + '\--full-history' with parent rewriting does (see above). ++ +Then simplify each commit `C` to its replacement `C'` in the final +history according to the following rules: ++ +-- +* Set `C'` to `C`. ++ +* Replace each parent `P` of `C'` with its simplification `P'`. In + the process, drop parents that are ancestors of other parents, and + remove duplicates. ++ +* If after this parent rewriting, `C'` is a root or merge commit (has + zero or >1 parents), a boundary commit, or !TREESAME, it remains. + Otherwise, it is replaced with its only parent. +-- ++ +The effect of this is best shown by way of comparing to +'\--full-history' with parent rewriting. The example turns into: ++ +----------------------------------------------------------------------- + .-A---M---N---O + / / / + I B D + \ / / + `---------' +----------------------------------------------------------------------- ++ +Note the major differences in `N` and `P` over '\--full-history': ++ +-- +* `N`'s parent list had `I` removed, because it is an ancestor of the + other parent `M`. Still, `N` remained because it is !TREESAME. ++ +* `P`'s parent list similarly had `I` removed. `P` was then + removed completely, because it had one parent and is TREESAME. +-- ifdef::git-rev-list[] Bisection Helpers diff --git a/git-filter-branch.sh b/git-filter-branch.sh index 2871a59e32..81392add0b 100755 --- a/git-filter-branch.sh +++ b/git-filter-branch.sh @@ -232,11 +232,11 @@ mkdir ../map || die "Could not create map/ directory" case "$filter_subdir" in "") git rev-list --reverse --topo-order --default HEAD \ - --parents "$@" + --parents --simplify-merges "$@" ;; *) git rev-list --reverse --topo-order --default HEAD \ - --parents "$@" -- "$filter_subdir" + --parents --simplify-merges "$@" -- "$filter_subdir" esac > ../revs || die "Could not get the commits" commits=$(wc -l <../revs | tr -d " ") @@ -317,24 +317,20 @@ done <../revs # In case of a subdirectory filter, it is possible that a specified head # is not in the set of rewritten commits, because it was pruned by the -# revision walker. Fix it by mapping these heads to the next rewritten -# ancestor(s), i.e. the boundaries in the set of rewritten commits. +# revision walker. Fix it by mapping these heads to the unique nearest +# ancestor that survived the pruning. -# NEEDSWORK: we should sort the unmapped refs topologically first -while read ref -do - sha1=$(git rev-parse "$ref"^0) - test -f "$workdir"/../map/$sha1 && continue - # Assign the boundarie(s) in the set of rewritten commits - # as the replacement commit(s). - # (This would look a bit nicer if --not --stdin worked.) - for p in $( (cd "$workdir"/../map; ls | sed "s/^/^/") | - git rev-list $ref --boundary --stdin | - sed -n "s/^-//p") +if test "$filter_subdir" +then + while read ref do - map $p >> "$workdir"/../map/$sha1 - done -done < "$tempdir"/heads + sha1=$(git rev-parse "$ref"^0) + test -f "$workdir"/../map/$sha1 && continue + ancestor=$(git rev-list --simplify-merges -1 \ + $ref -- "$filter_subdir") + test "$ancestor" && echo $(map $ancestor) >> "$workdir"/../map/$sha1 + done < "$tempdir"/heads +fi # Finally update the refs diff --git a/revision.c b/revision.c index 36291b6b86..bcbc7bd7e4 100644 --- a/revision.c +++ b/revision.c @@ -489,7 +489,7 @@ static int add_parents_to_list(struct rev_info *revs, struct commit *commit, p->object.flags |= SEEN; insert_by_date_cached(p, list, cached_base, cache_ptr); } - if(revs->first_parent_only) + if (revs->first_parent_only) break; } return 0; @@ -1041,6 +1041,11 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg } else if (!strcmp(arg, "--topo-order")) { revs->lifo = 1; revs->topo_order = 1; + } else if (!strcmp(arg, "--simplify-merges")) { + revs->simplify_merges = 1; + revs->rewrite_parents = 1; + revs->simplify_history = 0; + revs->limited = 1; } else if (!strcmp(arg, "--date-order")) { revs->lifo = 0; revs->topo_order = 1; @@ -1368,6 +1373,179 @@ static void add_child(struct rev_info *revs, struct commit *parent, struct commi l->next = add_decoration(&revs->children, &parent->object, l); } +static int remove_duplicate_parents(struct commit *commit) +{ + struct commit_list **pp, *p; + int surviving_parents; + + /* Examine existing parents while marking ones we have seen... */ + pp = &commit->parents; + while ((p = *pp) != NULL) { + struct commit *parent = p->item; + if (parent->object.flags & TMP_MARK) { + *pp = p->next; + continue; + } + parent->object.flags |= TMP_MARK; + pp = &p->next; + } + /* count them while clearing the temporary mark */ + surviving_parents = 0; + for (p = commit->parents; p; p = p->next) { + p->item->object.flags &= ~TMP_MARK; + surviving_parents++; + } + return surviving_parents; +} + +struct merge_simplify_state { + struct commit *simplified; +}; + +static struct merge_simplify_state *locate_simplify_state(struct rev_info *revs, struct commit *commit) +{ + struct merge_simplify_state *st; + + st = lookup_decoration(&revs->merge_simplification, &commit->object); + if (!st) { + st = xcalloc(1, sizeof(*st)); + add_decoration(&revs->merge_simplification, &commit->object, st); + } + return st; +} + +static struct commit_list **simplify_one(struct rev_info *revs, struct commit *commit, struct commit_list **tail) +{ + struct commit_list *p; + struct merge_simplify_state *st, *pst; + int cnt; + + st = locate_simplify_state(revs, commit); + + /* + * Have we handled this one? + */ + if (st->simplified) + return tail; + + /* + * An UNINTERESTING commit simplifies to itself, so does a + * root commit. We do not rewrite parents of such commit + * anyway. + */ + if ((commit->object.flags & UNINTERESTING) || !commit->parents) { + st->simplified = commit; + return tail; + } + + /* + * Do we know what commit all of our parents should be rewritten to? + * Otherwise we are not ready to rewrite this one yet. + */ + for (cnt = 0, p = commit->parents; p; p = p->next) { + pst = locate_simplify_state(revs, p->item); + if (!pst->simplified) { + tail = &commit_list_insert(p->item, tail)->next; + cnt++; + } + } + if (cnt) { + tail = &commit_list_insert(commit, tail)->next; + return tail; + } + + /* + * Rewrite our list of parents. + */ + for (p = commit->parents; p; p = p->next) { + pst = locate_simplify_state(revs, p->item); + p->item = pst->simplified; + } + cnt = remove_duplicate_parents(commit); + + /* + * It is possible that we are a merge and one side branch + * does not have any commit that touches the given paths; + * in such a case, the immediate parents will be rewritten + * to different commits. + * + * o----X X: the commit we are looking at; + * / / o: a commit that touches the paths; + * ---o----' + * + * Further reduce the parents by removing redundant parents. + */ + if (1 < cnt) { + struct commit_list *h = reduce_heads(commit->parents); + cnt = commit_list_count(h); + free_commit_list(commit->parents); + commit->parents = h; + } + + /* + * A commit simplifies to itself if it is a root, if it is + * UNINTERESTING, if it touches the given paths, or if it is a + * merge and its parents simplifies to more than one commits + * (the first two cases are already handled at the beginning of + * this function). + * + * Otherwise, it simplifies to what its sole parent simplifies to. + */ + if (!cnt || + (commit->object.flags & UNINTERESTING) || + !(commit->object.flags & TREESAME) || + (1 < cnt)) + st->simplified = commit; + else { + pst = locate_simplify_state(revs, commit->parents->item); + st->simplified = pst->simplified; + } + return tail; +} + +static void simplify_merges(struct rev_info *revs) +{ + struct commit_list *list; + struct commit_list *yet_to_do, **tail; + + if (!revs->topo_order) + sort_in_topological_order(&revs->commits, revs->lifo); + if (!revs->prune) + return; + + /* feed the list reversed */ + yet_to_do = NULL; + for (list = revs->commits; list; list = list->next) + commit_list_insert(list->item, &yet_to_do); + while (yet_to_do) { + list = yet_to_do; + yet_to_do = NULL; + tail = &yet_to_do; + while (list) { + struct commit *commit = list->item; + struct commit_list *next = list->next; + free(list); + list = next; + tail = simplify_one(revs, commit, tail); + } + } + + /* clean up the result, removing the simplified ones */ + list = revs->commits; + revs->commits = NULL; + tail = &revs->commits; + while (list) { + struct commit *commit = list->item; + struct commit_list *next = list->next; + struct merge_simplify_state *st; + free(list); + list = next; + st = locate_simplify_state(revs, commit); + if (st->simplified == commit) + tail = &commit_list_insert(commit, tail)->next; + } +} + static void set_children(struct rev_info *revs) { struct commit_list *l; @@ -1408,6 +1586,8 @@ int prepare_revision_walk(struct rev_info *revs) return -1; if (revs->topo_order) sort_in_topological_order(&revs->commits, revs->lifo); + if (revs->simplify_merges) + simplify_merges(revs); if (revs->children.name) set_children(revs); return 0; @@ -1440,26 +1620,6 @@ static enum rewrite_result rewrite_one(struct rev_info *revs, struct commit **pp } } -static void remove_duplicate_parents(struct commit *commit) -{ - struct commit_list **pp, *p; - - /* Examine existing parents while marking ones we have seen... */ - pp = &commit->parents; - while ((p = *pp) != NULL) { - struct commit *parent = p->item; - if (parent->object.flags & TMP_MARK) { - *pp = p->next; - continue; - } - parent->object.flags |= TMP_MARK; - pp = &p->next; - } - /* ... and clear the temporary mark */ - for (p = commit->parents; p; p = p->next) - p->item->object.flags &= ~TMP_MARK; -} - static int rewrite_parents(struct rev_info *revs, struct commit *commit) { struct commit_list **pp = &commit->parents; diff --git a/revision.h b/revision.h index 91f194478b..fc23522b38 100644 --- a/revision.h +++ b/revision.h @@ -42,6 +42,7 @@ struct rev_info { simplify_history:1, lifo:1, topo_order:1, + simplify_merges:1, tag_objects:1, tree_objects:1, blob_objects:1, @@ -110,6 +111,7 @@ struct rev_info { struct reflog_walk_info *reflog_info; struct decoration children; + struct decoration merge_simplification; }; #define REV_TREE_SAME 0 diff --git a/t/t6012-rev-list-simplify.sh b/t/t6012-rev-list-simplify.sh new file mode 100755 index 0000000000..510bb9679f --- /dev/null +++ b/t/t6012-rev-list-simplify.sh @@ -0,0 +1,93 @@ +#!/bin/sh + +test_description='merge simplification' + +. ./test-lib.sh + +note () { + git tag "$1" +} + +_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]' +_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40" + +unnote () { + git name-rev --tags --stdin | sed -e "s|$_x40 (tags/\([^)]*\)) |\1 |g" +} + +test_expect_success setup ' + echo "Hi there" >file && + git add file && + test_tick && git commit -m "Initial file" && + note A && + + git branch other-branch && + + echo "Hello" >file && + git add file && + test_tick && git commit -m "Modified file" && + note B && + + git checkout other-branch && + + echo "Hello" >file && + git add file && + test_tick && git commit -m "Modified the file identically" && + note C && + + echo "This is a stupid example" >another-file && + git add another-file && + test_tick && git commit -m "Add another file" && + note D && + + test_tick && git merge -m "merge" master && + note E && + + echo "Yet another" >elif && + git add elif && + test_tick && git commit -m "Irrelevant change" && + note F && + + git checkout master && + echo "Yet another" >elif && + git add elif && + test_tick && git commit -m "Another irrelevant change" && + note G && + + test_tick && git merge -m "merge" other-branch && + note H && + + echo "Final change" >file && + test_tick && git commit -a -m "Final change" && + note I +' + +FMT='tformat:%P %H | %s' + +check_result () { + for c in $1 + do + echo "$c" + done >expect && + shift && + param="$*" && + test_expect_success "log $param" ' + git log --pretty="$FMT" --parents $param | + unnote >actual && + sed -e "s/^.* \([^ ]*\) .*/\1/" >check