1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-10-28 04:49:43 +01:00
git/t/t6600-test-reach.sh

737 lines
17 KiB
Bash
Raw Permalink Normal View History

#!/bin/sh
test_description='basic commit reachability tests'
. ./test-lib.sh
# Construct a grid-like commit graph with points (x,y)
# with 1 <= x <= 10, 1 <= y <= 10, where (x,y) has
# parents (x-1, y) and (x, y-1), keeping in mind that
# we drop a parent if a coordinate is nonpositive.
#
# (10,10)
# / \
# (10,9) (9,10)
# / \ / \
# (10,8) (9,9) (8,10)
# / \ / \ / \
# ( continued...)
# \ / \ / \ /
# (3,1) (2,2) (1,3)
# \ / \ /
# (2,1) (2,1)
# \ /
# (1,1)
#
# We use branch 'commit-x-y' to refer to (x,y).
# This grid allows interesting reachability and
# non-reachability queries: (x,y) can reach (x',y')
# if and only if x' <= x and y' <= y.
test_expect_success 'setup' '
for i in $(test_seq 1 10)
do
test_commit "1-$i" &&
git branch -f commit-1-$i &&
git tag -a -m "1-$i" tag-1-$i commit-1-$i || return 1
done &&
for j in $(test_seq 1 9)
do
git reset --hard commit-$j-1 &&
x=$(($j + 1)) &&
test_commit "$x-1" &&
git branch -f commit-$x-1 &&
git tag -a -m "$x-1" tag-$x-1 commit-$x-1 &&
for i in $(test_seq 2 10)
do
git merge commit-$j-$i -m "$x-$i" &&
git branch -f commit-$x-$i &&
git tag -a -m "$x-$i" tag-$x-$i commit-$x-$i || return 1
done
done &&
git commit-graph write --reachable &&
mv .git/objects/info/commit-graph commit-graph-full &&
commit-graph.c: write non-split graphs as read-only In the previous commit, Git learned 'hold_lock_file_for_update_mode' to allow the caller to specify the permission bits (prior to further adjustment by the umask and shared repository permissions) used when acquiring a temporary file. Use this in the commit-graph machinery for writing a non-split graph to acquire an opened temporary file with permissions read-only permissions to match the split behavior. (In the split case, Git uses git_mkstemp_mode' for each of the commit-graph layers with permission bits '0444'). One can notice this discrepancy when moving a non-split graph to be part of a new chain. This causes a commit-graph chain where all layers have read-only permission bits, except for the base layer, which is writable for the current user. Resolve this discrepancy by using the new 'hold_lock_file_for_update_mode' and passing the desired permission bits. Doing so causes some test fallout in t5318 and t6600. In t5318, this occurs in tests that corrupt a commit-graph file by writing into it. For these, 'chmod u+w'-ing the file beforehand resolves the issue. The additional spot in 'corrupt_graph_verify' is necessary because of the extra 'git commit-graph write' beforehand (which *does* rewrite the commit-graph file). In t6600, this is caused by copying a read-only commit-graph file into place and then trying to replace it. For these, make these files writable. Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-29 19:36:38 +02:00
chmod u+w commit-graph-full &&
git show-ref -s commit-5-5 | git commit-graph write --stdin-commits &&
mv .git/objects/info/commit-graph commit-graph-half &&
commit-graph.c: write non-split graphs as read-only In the previous commit, Git learned 'hold_lock_file_for_update_mode' to allow the caller to specify the permission bits (prior to further adjustment by the umask and shared repository permissions) used when acquiring a temporary file. Use this in the commit-graph machinery for writing a non-split graph to acquire an opened temporary file with permissions read-only permissions to match the split behavior. (In the split case, Git uses git_mkstemp_mode' for each of the commit-graph layers with permission bits '0444'). One can notice this discrepancy when moving a non-split graph to be part of a new chain. This causes a commit-graph chain where all layers have read-only permission bits, except for the base layer, which is writable for the current user. Resolve this discrepancy by using the new 'hold_lock_file_for_update_mode' and passing the desired permission bits. Doing so causes some test fallout in t5318 and t6600. In t5318, this occurs in tests that corrupt a commit-graph file by writing into it. For these, 'chmod u+w'-ing the file beforehand resolves the issue. The additional spot in 'corrupt_graph_verify' is necessary because of the extra 'git commit-graph write' beforehand (which *does* rewrite the commit-graph file). In t6600, this is caused by copying a read-only commit-graph file into place and then trying to replace it. For these, make these files writable. Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-29 19:36:38 +02:00
chmod u+w commit-graph-half &&
git -c commitGraph.generationVersion=1 commit-graph write --reachable &&
commit-graph: implement generation data chunk As discovered by Ævar, we cannot increment graph version to distinguish between generation numbers v1 and v2 [1]. Thus, one of pre-requistes before implementing generation number v2 was to distinguish between graph versions in a backwards compatible manner. We are going to introduce a new chunk called Generation DATa chunk (or GDAT). GDAT will store corrected committer date offsets whereas CDAT will still store topological level. Old Git does not understand GDAT chunk and would ignore it, reading topological levels from CDAT. New Git can parse GDAT and take advantage of newer generation numbers, falling back to topological levels when GDAT chunk is missing (as it would happen with a commit-graph written by old Git). We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT' which forces commit-graph file to be written without generation data chunk to emulate a commit-graph file written by old Git. To minimize the space required to store corrrected commit date, Git stores corrected commit date offsets into the commit-graph file, instea of corrected commit dates. This saves us 4 bytes per commit, decreasing the GDAT chunk size by half, but it's possible for the offset to overflow the 4-bytes allocated for storage. As such overflows are and should be exceedingly rare, we use the following overflow management scheme: We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV') to store corrected commit dates for commits with offsets greater than GENERATION_NUMBER_V2_OFFSET_MAX. If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set the MSB of the offset and the other bits store the position of corrected commit date in GDOV chunk, similar to how Extra Edge List is maintained. We test the overflow-related code with the following repo history: F - N - U / \ U - N - U N \ / N - F - N Where the commits denoted by U have committer date of zero seconds since Unix epoch, the commits denoted by N have committer date of 1112354055 (default committer date for the test suite) seconds since Unix epoch and the commits denoted by F have committer date of (2 ^ 31 - 2) seconds since Unix epoch. The largest offset observed is 2 ^ 31, just large enough to overflow. [1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/ Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com> Reviewed-by: Taylor Blau <me@ttaylorr.com> Reviewed-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
mv .git/objects/info/commit-graph commit-graph-no-gdat &&
chmod u+w commit-graph-no-gdat &&
git config core.commitGraph true
'
run_all_modes () {
test_when_finished rm -rf .git/objects/info/commit-graph &&
"$@" <input >actual &&
test_cmp expect actual &&
cp commit-graph-full .git/objects/info/commit-graph &&
"$@" <input >actual &&
test_cmp expect actual &&
cp commit-graph-half .git/objects/info/commit-graph &&
"$@" <input >actual &&
commit-graph: implement generation data chunk As discovered by Ævar, we cannot increment graph version to distinguish between generation numbers v1 and v2 [1]. Thus, one of pre-requistes before implementing generation number v2 was to distinguish between graph versions in a backwards compatible manner. We are going to introduce a new chunk called Generation DATa chunk (or GDAT). GDAT will store corrected committer date offsets whereas CDAT will still store topological level. Old Git does not understand GDAT chunk and would ignore it, reading topological levels from CDAT. New Git can parse GDAT and take advantage of newer generation numbers, falling back to topological levels when GDAT chunk is missing (as it would happen with a commit-graph written by old Git). We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT' which forces commit-graph file to be written without generation data chunk to emulate a commit-graph file written by old Git. To minimize the space required to store corrrected commit date, Git stores corrected commit date offsets into the commit-graph file, instea of corrected commit dates. This saves us 4 bytes per commit, decreasing the GDAT chunk size by half, but it's possible for the offset to overflow the 4-bytes allocated for storage. As such overflows are and should be exceedingly rare, we use the following overflow management scheme: We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV') to store corrected commit dates for commits with offsets greater than GENERATION_NUMBER_V2_OFFSET_MAX. If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set the MSB of the offset and the other bits store the position of corrected commit date in GDOV chunk, similar to how Extra Edge List is maintained. We test the overflow-related code with the following repo history: F - N - U / \ U - N - U N \ / N - F - N Where the commits denoted by U have committer date of zero seconds since Unix epoch, the commits denoted by N have committer date of 1112354055 (default committer date for the test suite) seconds since Unix epoch and the commits denoted by F have committer date of (2 ^ 31 - 2) seconds since Unix epoch. The largest offset observed is 2 ^ 31, just large enough to overflow. [1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/ Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com> Reviewed-by: Taylor Blau <me@ttaylorr.com> Reviewed-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
test_cmp expect actual &&
cp commit-graph-no-gdat .git/objects/info/commit-graph &&
"$@" <input >actual &&
test_cmp expect actual
}
test_all_modes () {
run_all_modes test-tool reach "$@"
}
test_expect_success 'ref_newer:miss' '
cat >input <<-\EOF &&
A:commit-5-7
B:commit-4-9
EOF
echo "ref_newer(A,B):0" >expect &&
test_all_modes ref_newer
'
test_expect_success 'ref_newer:hit' '
cat >input <<-\EOF &&
A:commit-5-7
B:commit-2-3
EOF
echo "ref_newer(A,B):1" >expect &&
test_all_modes ref_newer
'
test_expect_success 'in_merge_bases:hit' '
cat >input <<-\EOF &&
A:commit-5-7
B:commit-8-8
EOF
echo "in_merge_bases(A,B):1" >expect &&
test_all_modes in_merge_bases
'
test_expect_success 'in_merge_bases:miss' '
cat >input <<-\EOF &&
A:commit-6-8
B:commit-5-9
EOF
echo "in_merge_bases(A,B):0" >expect &&
test_all_modes in_merge_bases
'
commit-reach: fix in_merge_bases_many bug Way back in f9b8908b (commit.c: use generation numbers for in_merge_bases(), 2018-05-01), a heuristic was used to short-circuit the in_merge_bases() walk. This works just fine as long as the caller is checking only two commits, but when there are multiple, there is a possibility that this heuristic is _very wrong_. Some code moves since then has changed this method to repo_in_merge_bases_many() inside commit-reach.c. The heuristic computes the minimum generation number of the "reference" list, then compares this number to the generation number of the "commit". In a recent topic, a test was added that used in_merge_bases_many() to test if a commit was reachable from a number of commits pulled from a reflog. However, this highlighted the problem: if any of the reference commits have a smaller generation number than the given commit, then the walk is skipped _even if there exist some with higher generation number_. This heuristic is wrong! It must check the MAXIMUM generation number of the reference commits, not the MINIMUM. This highlights a testing gap. t6600-test-reach.sh covers many methods in commit-reach.c, including in_merge_bases() and get_merge_bases_many(), but since these methods either restrict to two input commits or actually look for the full list of merge bases, they don't check this heuristic! Add a possible input to "test-tool reach" that tests in_merge_bases_many() and add tests to t6600-test-reach.sh that cover this heuristic. This includes cases for the reference commits having generation above and below the generation of the input commit, but also having maximum generation below the generation of the input commit. The fix itself is to swap min_generation with a max_generation in repo_in_merge_bases_many(). Reported-by: Srinidhi Kaushik <shrinidhi.kaushik@gmail.com> Helped-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-02 16:58:56 +02:00
test_expect_success 'in_merge_bases_many:hit' '
cat >input <<-\EOF &&
A:commit-6-8
X:commit-6-9
X:commit-5-7
EOF
echo "in_merge_bases_many(A,X):1" >expect &&
test_all_modes in_merge_bases_many
commit-reach: fix in_merge_bases_many bug Way back in f9b8908b (commit.c: use generation numbers for in_merge_bases(), 2018-05-01), a heuristic was used to short-circuit the in_merge_bases() walk. This works just fine as long as the caller is checking only two commits, but when there are multiple, there is a possibility that this heuristic is _very wrong_. Some code moves since then has changed this method to repo_in_merge_bases_many() inside commit-reach.c. The heuristic computes the minimum generation number of the "reference" list, then compares this number to the generation number of the "commit". In a recent topic, a test was added that used in_merge_bases_many() to test if a commit was reachable from a number of commits pulled from a reflog. However, this highlighted the problem: if any of the reference commits have a smaller generation number than the given commit, then the walk is skipped _even if there exist some with higher generation number_. This heuristic is wrong! It must check the MAXIMUM generation number of the reference commits, not the MINIMUM. This highlights a testing gap. t6600-test-reach.sh covers many methods in commit-reach.c, including in_merge_bases() and get_merge_bases_many(), but since these methods either restrict to two input commits or actually look for the full list of merge bases, they don't check this heuristic! Add a possible input to "test-tool reach" that tests in_merge_bases_many() and add tests to t6600-test-reach.sh that cover this heuristic. This includes cases for the reference commits having generation above and below the generation of the input commit, but also having maximum generation below the generation of the input commit. The fix itself is to swap min_generation with a max_generation in repo_in_merge_bases_many(). Reported-by: Srinidhi Kaushik <shrinidhi.kaushik@gmail.com> Helped-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-02 16:58:56 +02:00
'
test_expect_success 'in_merge_bases_many:miss' '
cat >input <<-\EOF &&
A:commit-6-8
X:commit-7-7
X:commit-8-6
EOF
echo "in_merge_bases_many(A,X):0" >expect &&
test_all_modes in_merge_bases_many
commit-reach: fix in_merge_bases_many bug Way back in f9b8908b (commit.c: use generation numbers for in_merge_bases(), 2018-05-01), a heuristic was used to short-circuit the in_merge_bases() walk. This works just fine as long as the caller is checking only two commits, but when there are multiple, there is a possibility that this heuristic is _very wrong_. Some code moves since then has changed this method to repo_in_merge_bases_many() inside commit-reach.c. The heuristic computes the minimum generation number of the "reference" list, then compares this number to the generation number of the "commit". In a recent topic, a test was added that used in_merge_bases_many() to test if a commit was reachable from a number of commits pulled from a reflog. However, this highlighted the problem: if any of the reference commits have a smaller generation number than the given commit, then the walk is skipped _even if there exist some with higher generation number_. This heuristic is wrong! It must check the MAXIMUM generation number of the reference commits, not the MINIMUM. This highlights a testing gap. t6600-test-reach.sh covers many methods in commit-reach.c, including in_merge_bases() and get_merge_bases_many(), but since these methods either restrict to two input commits or actually look for the full list of merge bases, they don't check this heuristic! Add a possible input to "test-tool reach" that tests in_merge_bases_many() and add tests to t6600-test-reach.sh that cover this heuristic. This includes cases for the reference commits having generation above and below the generation of the input commit, but also having maximum generation below the generation of the input commit. The fix itself is to swap min_generation with a max_generation in repo_in_merge_bases_many(). Reported-by: Srinidhi Kaushik <shrinidhi.kaushik@gmail.com> Helped-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-02 16:58:56 +02:00
'
test_expect_success 'in_merge_bases_many:miss-heuristic' '
cat >input <<-\EOF &&
A:commit-6-8
X:commit-7-5
X:commit-6-6
EOF
echo "in_merge_bases_many(A,X):0" >expect &&
test_all_modes in_merge_bases_many
commit-reach: fix in_merge_bases_many bug Way back in f9b8908b (commit.c: use generation numbers for in_merge_bases(), 2018-05-01), a heuristic was used to short-circuit the in_merge_bases() walk. This works just fine as long as the caller is checking only two commits, but when there are multiple, there is a possibility that this heuristic is _very wrong_. Some code moves since then has changed this method to repo_in_merge_bases_many() inside commit-reach.c. The heuristic computes the minimum generation number of the "reference" list, then compares this number to the generation number of the "commit". In a recent topic, a test was added that used in_merge_bases_many() to test if a commit was reachable from a number of commits pulled from a reflog. However, this highlighted the problem: if any of the reference commits have a smaller generation number than the given commit, then the walk is skipped _even if there exist some with higher generation number_. This heuristic is wrong! It must check the MAXIMUM generation number of the reference commits, not the MINIMUM. This highlights a testing gap. t6600-test-reach.sh covers many methods in commit-reach.c, including in_merge_bases() and get_merge_bases_many(), but since these methods either restrict to two input commits or actually look for the full list of merge bases, they don't check this heuristic! Add a possible input to "test-tool reach" that tests in_merge_bases_many() and add tests to t6600-test-reach.sh that cover this heuristic. This includes cases for the reference commits having generation above and below the generation of the input commit, but also having maximum generation below the generation of the input commit. The fix itself is to swap min_generation with a max_generation in repo_in_merge_bases_many(). Reported-by: Srinidhi Kaushik <shrinidhi.kaushik@gmail.com> Helped-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-02 16:58:56 +02:00
'
test_expect_success 'is_descendant_of:hit' '
cat >input <<-\EOF &&
A:commit-5-7
X:commit-4-8
X:commit-6-6
X:commit-1-1
EOF
echo "is_descendant_of(A,X):1" >expect &&
test_all_modes is_descendant_of
'
test_expect_success 'is_descendant_of:miss' '
cat >input <<-\EOF &&
A:commit-6-8
X:commit-5-9
X:commit-4-10
X:commit-7-6
EOF
echo "is_descendant_of(A,X):0" >expect &&
test_all_modes is_descendant_of
'
test_expect_success 'get_merge_bases_many' '
cat >input <<-\EOF &&
A:commit-5-7
X:commit-4-8
X:commit-6-6
X:commit-8-3
EOF
{
echo "get_merge_bases_many(A,X):" &&
git rev-parse commit-5-6 \
commit-4-7 | sort
} >expect &&
test_all_modes get_merge_bases_many
'
test_expect_success 'reduce_heads' '
cat >input <<-\EOF &&
X:commit-1-10
X:commit-2-8
X:commit-3-6
X:commit-4-4
X:commit-1-7
X:commit-2-5
X:commit-3-3
X:commit-5-1
EOF
{
echo "reduce_heads(X):" &&
git rev-parse commit-5-1 \
commit-4-4 \
commit-3-6 \
commit-2-8 \
commit-1-10 | sort
} >expect &&
test_all_modes reduce_heads
'
test_expect_success 'can_all_from_reach:hit' '
cat >input <<-\EOF &&
X:commit-2-10
X:commit-3-9
X:commit-4-8
X:commit-5-7
X:commit-6-6
X:commit-7-5
X:commit-8-4
X:commit-9-3
Y:commit-1-9
Y:commit-2-8
Y:commit-3-7
Y:commit-4-6
Y:commit-5-5
Y:commit-6-4
Y:commit-7-3
Y:commit-8-1
EOF
echo "can_all_from_reach(X,Y):1" >expect &&
test_all_modes can_all_from_reach
'
test_expect_success 'can_all_from_reach:miss' '
cat >input <<-\EOF &&
X:commit-2-10
X:commit-3-9
X:commit-4-8
X:commit-5-7
X:commit-6-6
X:commit-7-5
X:commit-8-4
X:commit-9-3
Y:commit-1-9
Y:commit-2-8
Y:commit-3-7
Y:commit-4-6
Y:commit-5-5
Y:commit-6-4
Y:commit-8-5
EOF
echo "can_all_from_reach(X,Y):0" >expect &&
test_all_modes can_all_from_reach
'
test_expect_success 'can_all_from_reach_with_flag: tags case' '
cat >input <<-\EOF &&
X:tag-2-10
X:tag-3-9
X:tag-4-8
X:commit-5-7
X:commit-6-6
X:commit-7-5
X:commit-8-4
X:commit-9-3
Y:tag-1-9
Y:tag-2-8
Y:tag-3-7
Y:commit-4-6
Y:commit-5-5
Y:commit-6-4
Y:commit-7-3
Y:commit-8-1
EOF
echo "can_all_from_reach_with_flag(X,_,_,0,0):1" >expect &&
test_all_modes can_all_from_reach_with_flag
'
test_expect_success 'commit_contains:hit' '
cat >input <<-\EOF &&
A:commit-7-7
X:commit-2-10
X:commit-3-9
X:commit-4-8
X:commit-5-7
X:commit-6-6
X:commit-7-5
X:commit-8-4
X:commit-9-3
EOF
echo "commit_contains(_,A,X,_):1" >expect &&
test_all_modes commit_contains &&
test_all_modes commit_contains --tag
'
test_expect_success 'commit_contains:miss' '
cat >input <<-\EOF &&
A:commit-6-5
X:commit-2-10
X:commit-3-9
X:commit-4-8
X:commit-5-7
X:commit-6-6
X:commit-7-5
X:commit-8-4
X:commit-9-3
EOF
echo "commit_contains(_,A,X,_):0" >expect &&
test_all_modes commit_contains &&
test_all_modes commit_contains --tag
'
test_expect_success 'rev-list: basic topo-order' '
git rev-parse \
commit-6-6 commit-5-6 commit-4-6 commit-3-6 commit-2-6 commit-1-6 \
commit-6-5 commit-5-5 commit-4-5 commit-3-5 commit-2-5 commit-1-5 \
commit-6-4 commit-5-4 commit-4-4 commit-3-4 commit-2-4 commit-1-4 \
commit-6-3 commit-5-3 commit-4-3 commit-3-3 commit-2-3 commit-1-3 \
commit-6-2 commit-5-2 commit-4-2 commit-3-2 commit-2-2 commit-1-2 \
commit-6-1 commit-5-1 commit-4-1 commit-3-1 commit-2-1 commit-1-1 \
>expect &&
run_all_modes git rev-list --topo-order commit-6-6
'
test_expect_success 'rev-list: first-parent topo-order' '
git rev-parse \
commit-6-6 \
commit-6-5 \
commit-6-4 \
commit-6-3 \
commit-6-2 \
commit-6-1 commit-5-1 commit-4-1 commit-3-1 commit-2-1 commit-1-1 \
>expect &&
run_all_modes git rev-list --first-parent --topo-order commit-6-6
'
test_expect_success 'rev-list: range topo-order' '
git rev-parse \
commit-6-6 commit-5-6 commit-4-6 commit-3-6 commit-2-6 commit-1-6 \
commit-6-5 commit-5-5 commit-4-5 commit-3-5 commit-2-5 commit-1-5 \
commit-6-4 commit-5-4 commit-4-4 commit-3-4 commit-2-4 commit-1-4 \
commit-6-3 commit-5-3 commit-4-3 \
commit-6-2 commit-5-2 commit-4-2 \
commit-6-1 commit-5-1 commit-4-1 \
>expect &&
run_all_modes git rev-list --topo-order commit-3-3..commit-6-6
'
test_expect_success 'rev-list: range topo-order' '
git rev-parse \
commit-6-6 commit-5-6 commit-4-6 \
commit-6-5 commit-5-5 commit-4-5 \
commit-6-4 commit-5-4 commit-4-4 \
commit-6-3 commit-5-3 commit-4-3 \
commit-6-2 commit-5-2 commit-4-2 \
commit-6-1 commit-5-1 commit-4-1 \
>expect &&
run_all_modes git rev-list --topo-order commit-3-8..commit-6-6
'
test_expect_success 'rev-list: first-parent range topo-order' '
git rev-parse \
commit-6-6 \
commit-6-5 \
commit-6-4 \
commit-6-3 \
commit-6-2 \
commit-6-1 commit-5-1 commit-4-1 \
>expect &&
run_all_modes git rev-list --first-parent --topo-order commit-3-8..commit-6-6
'
test_expect_success 'rev-list: ancestry-path topo-order' '
git rev-parse \
commit-6-6 commit-5-6 commit-4-6 commit-3-6 \
commit-6-5 commit-5-5 commit-4-5 commit-3-5 \
commit-6-4 commit-5-4 commit-4-4 commit-3-4 \
commit-6-3 commit-5-3 commit-4-3 \
>expect &&
run_all_modes git rev-list --topo-order --ancestry-path commit-3-3..commit-6-6
'
test_expect_success 'rev-list: symmetric difference topo-order' '
git rev-parse \
commit-6-6 commit-5-6 commit-4-6 \
commit-6-5 commit-5-5 commit-4-5 \
commit-6-4 commit-5-4 commit-4-4 \
commit-6-3 commit-5-3 commit-4-3 \
commit-6-2 commit-5-2 commit-4-2 \
commit-6-1 commit-5-1 commit-4-1 \
commit-3-8 commit-2-8 commit-1-8 \
commit-3-7 commit-2-7 commit-1-7 \
>expect &&
run_all_modes git rev-list --topo-order commit-3-8...commit-6-6
'
test_expect_success 'get_reachable_subset:all' '
cat >input <<-\EOF &&
X:commit-9-1
X:commit-8-3
X:commit-7-5
X:commit-6-6
X:commit-1-7
Y:commit-3-3
Y:commit-1-7
Y:commit-5-6
EOF
(
echo "get_reachable_subset(X,Y)" &&
git rev-parse commit-3-3 \
commit-1-7 \
commit-5-6 | sort
) >expect &&
test_all_modes get_reachable_subset
'
test_expect_success 'get_reachable_subset:some' '
cat >input <<-\EOF &&
X:commit-9-1
X:commit-8-3
X:commit-7-5
X:commit-1-7
Y:commit-3-3
Y:commit-1-7
Y:commit-5-6
EOF
(
echo "get_reachable_subset(X,Y)" &&
git rev-parse commit-3-3 \
commit-1-7 | sort
) >expect &&
test_all_modes get_reachable_subset
'
test_expect_success 'get_reachable_subset:none' '
cat >input <<-\EOF &&
X:commit-9-1
X:commit-8-3
X:commit-7-5
X:commit-1-7
Y:commit-9-3
Y:commit-7-6
Y:commit-2-8
EOF
echo "get_reachable_subset(X,Y)" >expect &&
test_all_modes get_reachable_subset
'
for-each-ref: add ahead-behind format atom The previous change implemented the ahead_behind() method, including an algorithm to compute the ahead/behind values for a number of commit tips relative to a number of commit bases. Now, integrate that algorithm as part of 'git for-each-ref' hidden behind a new format atom, ahead-behind. This naturally extends to 'git branch' and 'git tag' builtins, as well. This format allows specifying multiple bases, if so desired, and all matching references are compared against all of those bases. For this reason, failing to read a reference provided from these atoms results in an error. In order to translate the ahead_behind() method information to the format output code in ref-filter.c, we must populate arrays of ahead_behind_count structs. In struct ref_array, we store the full array that will be passed to ahead_behind(). In struct ref_array_item, we store an array of pointers that point to the relvant items within the full array. In this way, we can pull all relevant ahead/behind values directly when formatting output for a specific item. It also ensures the lifetime of the ahead_behind_count structs matches the time that the array is being used. Add specific tests of the ahead/behind counts in t6600-test-reach.sh, as it has an interesting repository shape. In particular, its merging strategy and its use of different commit-graphs would demonstrate over- counting if the ahead_behind() method did not already account for that possibility. Also add tests for the specific for-each-ref, branch, and tag builtins. In the case of 'git tag', there are intersting cases that happen when some of the selected tips are not commits. This requires careful logic around commits_nr in the second loop of filter_ahead_behind(). Also, the test in t7004 is carefully located to avoid being dependent on the GPG prereq. It also avoids using the test_commit helper, as that will add ticks to the time and disrupt the expected timestamps in later tag tests. Also add performance tests in a new p1300-graph-walks.sh script. This will be useful for more uses in the future, but for now compare the ahead-behind counting algorithm in 'git for-each-ref' to the naive implementation by running 'git rev-list --count' processes for each input. For the Git source code repository, the improvement is already obvious: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.07(0.07+0.00) 1500.3: ahead-behind counts: git branch 0.07(0.06+0.00) 1500.4: ahead-behind counts: git tag 0.07(0.06+0.00) 1500.5: ahead-behind counts: git rev-list 1.32(1.04+0.27) But the standard performance benchmark is the Linux kernel repository, which demosntrates a significant improvement: Test this tree --------------------------------------------------------------- 1500.2: ahead-behind counts: git for-each-ref 0.27(0.24+0.02) 1500.3: ahead-behind counts: git branch 0.27(0.24+0.03) 1500.4: ahead-behind counts: git tag 0.28(0.27+0.01) 1500.5: ahead-behind counts: git rev-list 4.57(4.03+0.54) The 'git rev-list' test exists in this change as a demonstration, but it will be removed in the next change to avoid wasting time on this comparison. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 12:26:54 +01:00
test_expect_success 'for-each-ref ahead-behind:linear' '
cat >input <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-1-3
refs/heads/commit-1-5
refs/heads/commit-1-8
EOF
cat >expect <<-\EOF &&
refs/heads/commit-1-1 0 8
refs/heads/commit-1-3 0 6
refs/heads/commit-1-5 0 4
refs/heads/commit-1-8 0 1
EOF
run_all_modes git for-each-ref \
--format="%(refname) %(ahead-behind:commit-1-9)" --stdin
'
test_expect_success 'for-each-ref ahead-behind:all' '
cat >input <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-2-4
refs/heads/commit-4-2
refs/heads/commit-4-4
EOF
cat >expect <<-\EOF &&
refs/heads/commit-1-1 0 24
refs/heads/commit-2-4 0 17
refs/heads/commit-4-2 0 17
refs/heads/commit-4-4 0 9
EOF
run_all_modes git for-each-ref \
--format="%(refname) %(ahead-behind:commit-5-5)" --stdin
'
test_expect_success 'for-each-ref ahead-behind:some' '
cat >input <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-5-3
refs/heads/commit-4-8
refs/heads/commit-9-9
EOF
cat >expect <<-\EOF &&
refs/heads/commit-1-1 0 53
refs/heads/commit-4-8 8 30
refs/heads/commit-5-3 0 39
refs/heads/commit-9-9 27 0
EOF
run_all_modes git for-each-ref \
--format="%(refname) %(ahead-behind:commit-9-6)" --stdin
'
test_expect_success 'for-each-ref ahead-behind:some, multibase' '
cat >input <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-5-3
refs/heads/commit-7-8
refs/heads/commit-4-8
refs/heads/commit-9-9
EOF
cat >expect <<-\EOF &&
refs/heads/commit-1-1 0 53 0 53
refs/heads/commit-4-8 8 30 0 22
refs/heads/commit-5-3 0 39 0 39
refs/heads/commit-7-8 14 12 8 6
refs/heads/commit-9-9 27 0 27 0
EOF
run_all_modes git for-each-ref \
--format="%(refname) %(ahead-behind:commit-9-6) %(ahead-behind:commit-6-9)" \
--stdin
'
test_expect_success 'for-each-ref ahead-behind:none' '
cat >input <<-\EOF &&
refs/heads/commit-7-5
refs/heads/commit-4-8
refs/heads/commit-9-9
EOF
cat >expect <<-\EOF &&
refs/heads/commit-4-8 16 16
refs/heads/commit-7-5 7 4
refs/heads/commit-9-9 49 0
EOF
run_all_modes git for-each-ref \
--format="%(refname) %(ahead-behind:commit-8-4)" --stdin
'
commit-reach: add tips_reachable_from_bases() Both 'git for-each-ref --merged=<X>' and 'git branch --merged=<X>' use the ref-filter machinery to select references or branches (respectively) that are reachable from a set of commits presented by one or more --merged arguments. This happens within reach_filter(), which uses the revision-walk machinery to walk history in a standard way. However, the commit-reach.c file is full of custom searches that are more efficient, especially for reachability queries that can terminate early when reachability is discovered. Add a new tips_reachable_from_bases() method to commit-reach.c and call it from within reach_filter() in ref-filter.c. This affects both 'git branch' and 'git for-each-ref' as tested in p1500-graph-walks.sh. For the Linux kernel repository, we take an already-fast algorithm and make it even faster: Test HEAD~1 HEAD ------------------------------------------------------------------- 1500.5: contains: git for-each-ref --merged 0.13 0.02 -84.6% 1500.6: contains: git branch --merged 0.14 0.02 -85.7% 1500.7: contains: git tag --merged 0.15 0.03 -80.0% (Note that we remove the iterative 'git rev-list' test from p1500 because it no longer makes sense as a comparison to 'git for-each-ref' and would just waste time running it for these comparisons.) The algorithm is implemented in commit-reach.c in the method tips_reachable_from_base(). This method takes a string_list of tips and assigns the 'util' for each item with the value 1 if the base commit can reach those tips. Like other reachability queries in commit-reach.c, the fastest way to search for "can A reach B?" is to do a depth-first search up to the generation number of B, preferring to explore first parents before later parents. While we must walk all reachable commits up to that generation number when the answer is "no", the depth-first search can answer "yes" much faster than other approaches in most cases. This search becomes trickier when there are multiple targets for the depth-first search. The commits with lower generation number are more likely to be within the history of the start commit, but we don't want to waste time searching commits of low generation number if the commit target with lowest generation number has already been found. The trick here is to take the input commits and sort them by generation number in ascending order. Track the index within this order as min_generation_index. When we find a commit, if its index in the list is equal to min_generation_index, then we can increase the generation number boundary of our search to the next-lowest value in the list. With this mechanism, the number of commits to search is minimized with respect to the depth-first search heuristic. We will walk all commits up to the minimum generation number of a commit that is _not_ reachable from the start, but we will walk only the necessary portion of the depth-first search for the reachable commits of lower generation. Add extra tests for this behavior in t6600-test-reach.sh as the interesting data shape of that repository can sometimes demonstrate corner case bugs. Signed-off-by: Derrick Stolee <derrickstolee@github.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-20 12:26:55 +01:00
test_expect_success 'for-each-ref merged:linear' '
cat >input <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-1-3
refs/heads/commit-1-5
refs/heads/commit-1-8
refs/heads/commit-2-1
refs/heads/commit-5-1
refs/heads/commit-9-1
EOF
cat >expect <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-1-3
refs/heads/commit-1-5
refs/heads/commit-1-8
EOF
run_all_modes git for-each-ref --merged=commit-1-9 \
--format="%(refname)" --stdin
'
test_expect_success 'for-each-ref merged:all' '
cat >input <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-2-4
refs/heads/commit-4-2
refs/heads/commit-4-4
EOF
cat >expect <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-2-4
refs/heads/commit-4-2
refs/heads/commit-4-4
EOF
run_all_modes git for-each-ref --merged=commit-5-5 \
--format="%(refname)" --stdin
'
test_expect_success 'for-each-ref ahead-behind:some' '
cat >input <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-5-3
refs/heads/commit-4-8
refs/heads/commit-9-9
EOF
cat >expect <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-5-3
EOF
run_all_modes git for-each-ref --merged=commit-9-6 \
--format="%(refname)" --stdin
'
test_expect_success 'for-each-ref merged:some, multibase' '
cat >input <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-5-3
refs/heads/commit-7-8
refs/heads/commit-4-8
refs/heads/commit-9-9
EOF
cat >expect <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-4-8
refs/heads/commit-5-3
EOF
run_all_modes git for-each-ref \
--merged=commit-5-8 \
--merged=commit-8-5 \
--format="%(refname)" \
--stdin
'
test_expect_success 'for-each-ref merged:none' '
cat >input <<-\EOF &&
refs/heads/commit-7-5
refs/heads/commit-4-8
refs/heads/commit-9-9
EOF
>expect &&
run_all_modes git for-each-ref --merged=commit-8-4 \
--format="%(refname)" --stdin
'
commit-reach: add get_branch_base_for_tip Add a new reachability algorithm that intends to discover (from a heuristic) which branch was used as the starting point for a given commit. Add focused tests using the 'test-tool reach' command. In repositories that use pull requests (or merge requests) to advance one or more "protected" branches, the history of that reference can be recovered by following the first-parent history in most cases. Most are completed using no-fast-forward merges, though squash merges are quite common. Less common is rebase-and-merge, which still validates this assumption. Finally, the case that breaks this assumption is the fast-forward update (with potential rebasing). Even in this case, the previous commit commonly appears in the first-parent history of the branch. Similar assumptions can be made for a topic branch created by a single user with the intention to merge back into another branch. Using 'git commit', 'git merge', and 'git cherry-pick' from HEAD will default to having the first-parent commit be the previous commit at HEAD. This history changes only with commands such as 'git reset' or 'git rebase', where the command names also imply that the branch is starting from a new location. With this movement of branches in mind, the following heuristic is proposed as a way to determine the base branch for a given source branch: Among a list of candidate base branches, select the candidate that minimizes the number of commits in the first-parent history of the source that are not in the first-parent history of the candidate. Prior third-party solutions to this problem have used this optimization criteria, but have relied upon extracting the first-parent history and comparing those lists as tables instead of using commit-graph walks. Given current command-line interface options, this optimization criteria is not easy to detect directly. Even using the command git rev-list --count --first-parent <base>..<source> does not measure this count, as it uses full reachability from <base> to determine which commits to remove from the range '<base>..<source>'. This may lead to one asking if we should instead be using the full reachability of the candidate and only the first-parent history of the source. This, unfortunately, does not work for repositories that use long-lived branches and automation to merge across those branches. In extremely large repositories, merging into a single trunk may not be feasible. This is usually due to the desired frequency of updates (thousands of engineers doing daily work) combined with the time required to perform a validation build. These factors combine to create significant risk of semantic merge conflicts, leading to build breaks on the trunk. In response, repository maintainers can create a single Level Zero (L0) trunk and multiple Level One (L1) branches. By partitioning the engineers by organization, these engineers may see lower risk of semantic merge conflicts as well as be protected against build breaks in other L1 branches. The key to making this system work is a semi-automated process of merging L1 branches into the L0 trunk and vice-versa. In a large enough organization, these L1 branches may further split into L2 or L3 branches, but the same principles apply for merging across deeper levels. If these automated merges use a typical merge with the second parent bringing in the "new" content, then each L0 and L1 branch can track its previous positions by following first-parent history, which appear as parallel paths (until reaching the first place where the branches diverged). If we also walk to second parents, then the histories overlap significantly and cannot be distinguished except for very-recent changes. For this reason, the first-parent condition should be symmetrical across the base and source branches. Another common case for desiring the result of this optimization method is the use of release branches. When releasing a version of a repository, a branch can be used to track that release. Any updates that are worth fixing in that release can be merged to the release branch and shipped with only the necessary fixes without any new features introduced in the trunk branch. The 'maint-2.<X>' branches represent this pattern in the Git project. The microsoft/git fork uses 'vfs-2.<X>.<Y>' branches to track the changes that are custom to that fork on top of each upstream Git release 2.<X>.<Y>. This application doesn't need the symmetrical first-parent condition, but the use of first-parent histories does not change the results for these branches. To determine the base branch from a list of candidates, create a new method in commit-reach.c that performs a single* commit-graph walk. The core concept is to walk first-parents starting at the candidate bases and the source, tracking the "best" base to reach a given commit. Use generation numbers to ensure that a commit is walked at most once and all children have been explored before visiting it. When reaching a commit that is reachable from both a base and the source, we will then have a guarantee that this is the closest intersection of first-parent histories. Track the best base to reach that commit and return it as a result. In rare cases involving multiple root commits, the first-parent history of the source may never intersect any of the candidates and thus a null result is returned. * There are up to two walks, since we require all commits to have a computed generation number in order to avoid incorrect results. This is similar to the need for computed generation numbers in ahead_behind() as implemented in fd67d149bde (commit-reach: implement ahead_behind() logic, 2023-03-20). In order to track the "best" base, use a new commit slab that stores an integer. This value defaults to zero upon initialization, so use -1 to track that the source commit can reach this commit and use 'i + 1' to track that the ith base can reach this commit. When multiple bases can reach a commit, minimize the index to break ties. This allows the caller to specify an order to the bases that determines some amount of preference when the heuristic does not result in a unique result. The trickiest part of the integer slab is what happens when reaching a collision among the histories of the bases and the history of the source. This is noticed when viewing the first parent and seeing that it has a slab value that differs in sign (negative or positive). In this case, the collision commit is stored in the method variable 'branch_point' and its slab value is set to -1. The index of the best base (so far) is stored in the method variable 'best_index'. It is possible that there are multiple commits that have the branch_point as its first parent, leading to multiple updates of best_index. The result is determined when 'branch_point' is visited in the commit walk, giving the guarantee that all commits that could reach 'branch_point' were visited. Several interesting cases of collisions and different results are tested in the t6600-test-reach.sh script. Recall that this script also tests the algorithm in three possible states involving the commit-graph file and how many commits are written in the file. This provides some coverage of the need (and lack of need) for the ensure_generations_valid() method. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-08-14 12:31:27 +02:00
# For get_branch_base_for_tip, we only care about
# first-parent history. Here is the test graph with
# second parents removed:
#
# (10,10)
# /
# (10,9) (9,10)
# / /
# (10,8) (9,9) (8,10)
# / / /
# ( continued...)
# \ / / /
# (3,1) (2,2) (1,3)
# \ / /
# (2,1) (1,2)
# \ /
# (1,1)
#
# In short, for a commit (i,j), the first-parent history
# walks all commits (i, k) with k from j to 1, then the
# commits (l, 1) with l from i to 1.
test_expect_success 'get_branch_base_for_tip: none reach' '
# (2,3) branched from the first tip (i,4) in X with i > 2
cat >input <<-\EOF &&
A:commit-2-3
X:commit-1-2
X:commit-1-4
X:commit-4-4
X:commit-8-4
X:commit-10-4
EOF
echo "get_branch_base_for_tip(A,X):2" >expect &&
test_all_modes get_branch_base_for_tip
'
test_expect_success 'get_branch_base_for_tip: equal to tip' '
# (2,3) branched from the first tip (i,4) in X with i > 2
cat >input <<-\EOF &&
A:commit-8-4
X:commit-1-2
X:commit-1-4
X:commit-4-4
X:commit-8-4
X:commit-10-4
EOF
echo "get_branch_base_for_tip(A,X):3" >expect &&
test_all_modes get_branch_base_for_tip
'
test_expect_success 'get_branch_base_for_tip: all reach tip' '
# (2,3) branched from the first tip (i,4) in X with i > 2
cat >input <<-\EOF &&
A:commit-4-1
X:commit-4-2
X:commit-5-1
EOF
echo "get_branch_base_for_tip(A,X):0" >expect &&
test_all_modes get_branch_base_for_tip
'
for-each-ref: add 'is-base' token The previous change introduced the get_branch_base_for_tip() method in commit-reach.c. The motivation of that change was about using a heuristic to deteremine the base branch for a source commit from a list of candidate commit tips. This change makes that algorithm visible to users via a new atom in the 'git for-each-ref' format. This change is very similar to the chang in 49abcd21da6 (for-each-ref: add ahead-behind format atom, 2023-03-20). Introduce the 'is-base:<source>' atom, which will indicate that the algorithm should be computed and the result of the algorithm is reported using an indicator of the form '(<source>)'. For example, using '%(is-base:HEAD)' would result in one line having the token '(HEAD)'. Use the sorted order of refs included in the ref filter to break ties in the algorithm's heuristic. In the previous change, the motivating examples include using an L0 trunk, long-lived L1 branches, and temporary release branches. A caller could communicate the ordered preference among these categories using the input refpecs and avoiding a different sort mechanism. This sorting behavior is tested in the test scripts. It is important to include this atom as a special case to can_do_iterative_format() to match the expectations created in bd98f9774e1 (ref-filter.c: filter & format refs in the same callback, 2023-11-14). The ahead-behind atom was one of the special cases, and this similarly requires using an algorithm across all input refs before starting the format of any single ref. In the test script, the format tokens use colons or lack whitespace to avoid Git complaining about trailing whitespace errors. Signed-off-by: Derrick Stolee <stolee@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-08-14 12:31:29 +02:00
test_expect_success 'for-each-ref is-base: none reach' '
cat >input <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-4-2
refs/heads/commit-4-4
refs/heads/commit-8-4
EOF
cat >expect <<-\EOF &&
refs/heads/commit-1-1:
refs/heads/commit-4-2:(commit-2-3)
refs/heads/commit-4-4:
refs/heads/commit-8-4:
EOF
run_all_modes git for-each-ref \
--format="%(refname):%(is-base:commit-2-3)" --stdin
'
test_expect_success 'for-each-ref is-base: all reach' '
cat >input <<-\EOF &&
refs/heads/commit-4-2
refs/heads/commit-5-1
EOF
cat >expect <<-\EOF &&
refs/heads/commit-4-2:(commit-4-1)
refs/heads/commit-5-1:
EOF
run_all_modes git for-each-ref \
--format="%(refname):%(is-base:commit-4-1)" --stdin
'
test_expect_success 'for-each-ref is-base: equal to tip' '
cat >input <<-\EOF &&
refs/heads/commit-4-2
refs/heads/commit-5-1
EOF
cat >expect <<-\EOF &&
refs/heads/commit-4-2:(commit-4-2)
refs/heads/commit-5-1:
EOF
run_all_modes git for-each-ref \
--format="%(refname):%(is-base:commit-4-2)" --stdin
'
test_expect_success 'for-each-ref is-base:multiple' '
cat >input <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-4-2
refs/heads/commit-4-4
refs/heads/commit-8-4
EOF
cat >expect <<-\EOF &&
refs/heads/commit-1-1[-]
refs/heads/commit-4-2[(commit-2-3)-]
refs/heads/commit-4-4[-]
refs/heads/commit-8-4[-(commit-6-5)]
EOF
run_all_modes git for-each-ref \
--format="%(refname)[%(is-base:commit-2-3)-%(is-base:commit-6-5)]" --stdin
'
test_done