1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-11-01 06:47:52 +01:00
git/t/t7701-repack-unpack-unreachable.sh
Jeff King 905f27b86a repack: add --keep-unreachable option
The usual way to do a full repack (and what is done by
git-gc) is to run "repack -Ad --unpack-unreachable=<when>",
which will loosen any unreachable objects newer than
"<when>", and drop any older ones.

This is a safer alternative to "repack -ad", because
"<when>" becomes a grace period during which we will not
drop any new objects that are about to be referenced.
However, it isn't perfectly safe. It's always possible that
a process is about to reference an old object. Even if that
process were to take care to update the timestamp on the
object, there is no atomicity with a simultaneously running
"repack" process.

So while unlikely, there is a small race wherein we may drop
an object that is in the process of being referenced. If you
do automated repacking on a large number of active
repositories, you may hit it eventually, and the result is a
corrupted repository.

It would be nice to fix that race in the long run, but it's
complicated.  In the meantime, there is a much simpler
strategy for automated repository maintenance: do not drop
objects at all. We already have a "--keep-unreachable"
option in pack-objects; we just need to plumb it through
from git-repack.

Note that this _isn't_ plumbed through from git-gc, so at
this point it's strictly a tool for people doing their own
advanced repository maintenance strategy.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-14 13:57:42 -07:00

140 lines
4 KiB
Bash
Executable file

#!/bin/sh
test_description='git repack works correctly'
. ./test-lib.sh
fsha1=
csha1=
tsha1=
test_expect_success '-A with -d option leaves unreachable objects unpacked' '
echo content > file1 &&
git add . &&
test_tick &&
git commit -m initial_commit &&
# create a transient branch with unique content
git checkout -b transient_branch &&
echo more content >> file1 &&
# record the objects created in the database for file, commit, tree
fsha1=$(git hash-object file1) &&
test_tick &&
git commit -a -m more_content &&
csha1=$(git rev-parse HEAD^{commit}) &&
tsha1=$(git rev-parse HEAD^{tree}) &&
git checkout master &&
echo even more content >> file1 &&
test_tick &&
git commit -a -m even_more_content &&
# delete the transient branch
git branch -D transient_branch &&
# pack the repo
git repack -A -d -l &&
# verify objects are packed in repository
test 3 = $(git verify-pack -v -- .git/objects/pack/*.idx |
egrep "^($fsha1|$csha1|$tsha1) " |
sort | uniq | wc -l) &&
git show $fsha1 &&
git show $csha1 &&
git show $tsha1 &&
# now expire the reflog, while keeping reachable ones but expiring
# unreachables immediately
test_tick &&
sometimeago=$(( $test_tick - 10000 )) &&
git reflog expire --expire=$sometimeago --expire-unreachable=$test_tick --all &&
# and repack
git repack -A -d -l &&
# verify objects are retained unpacked
test 0 = $(git verify-pack -v -- .git/objects/pack/*.idx |
egrep "^($fsha1|$csha1|$tsha1) " |
sort | uniq | wc -l) &&
git show $fsha1 &&
git show $csha1 &&
git show $tsha1
'
compare_mtimes ()
{
read tref rest &&
while read t rest; do
test "$tref" = "$t" || return 1
done
}
test_expect_success '-A without -d option leaves unreachable objects packed' '
fsha1path=$(echo "$fsha1" | sed -e "s|\(..\)|\1/|") &&
fsha1path=".git/objects/$fsha1path" &&
csha1path=$(echo "$csha1" | sed -e "s|\(..\)|\1/|") &&
csha1path=".git/objects/$csha1path" &&
tsha1path=$(echo "$tsha1" | sed -e "s|\(..\)|\1/|") &&
tsha1path=".git/objects/$tsha1path" &&
git branch transient_branch $csha1 &&
git repack -a -d -l &&
test ! -f "$fsha1path" &&
test ! -f "$csha1path" &&
test ! -f "$tsha1path" &&
test 1 = $(ls -1 .git/objects/pack/pack-*.pack | wc -l) &&
packfile=$(ls .git/objects/pack/pack-*.pack) &&
git branch -D transient_branch &&
test_tick &&
git repack -A -l &&
test ! -f "$fsha1path" &&
test ! -f "$csha1path" &&
test ! -f "$tsha1path" &&
git show $fsha1 &&
git show $csha1 &&
git show $tsha1
'
test_expect_success 'unpacked objects receive timestamp of pack file' '
tmppack=".git/objects/pack/tmp_pack" &&
ln "$packfile" "$tmppack" &&
git repack -A -l -d &&
test-chmtime -v +0 "$tmppack" "$fsha1path" "$csha1path" "$tsha1path" \
> mtimes &&
compare_mtimes < mtimes
'
test_expect_success 'do not bother loosening old objects' '
obj1=$(echo one | git hash-object -w --stdin) &&
obj2=$(echo two | git hash-object -w --stdin) &&
pack1=$(echo $obj1 | git pack-objects .git/objects/pack/pack) &&
pack2=$(echo $obj2 | git pack-objects .git/objects/pack/pack) &&
git prune-packed &&
git cat-file -p $obj1 &&
git cat-file -p $obj2 &&
test-chmtime =-86400 .git/objects/pack/pack-$pack2.pack &&
git repack -A -d --unpack-unreachable=1.hour.ago &&
git cat-file -p $obj1 &&
test_must_fail git cat-file -p $obj2
'
test_expect_success 'keep packed objects found only in index' '
echo my-unique-content >file &&
git add file &&
git commit -m "make it reachable" &&
git gc &&
git reset HEAD^ &&
git reflog expire --expire=now --all &&
git add file &&
test-chmtime =-86400 .git/objects/pack/* &&
git gc --prune=1.hour.ago &&
git cat-file blob :file
'
test_expect_success 'repack -k keeps unreachable packed objects' '
# create packed-but-unreachable object
sha1=$(echo unreachable-packed | git hash-object -w --stdin) &&
pack=$(echo $sha1 | git pack-objects .git/objects/pack/pack) &&
git prune-packed &&
# -k should keep it
git repack -adk &&
git cat-file -p $sha1 &&
# and double check that without -k it would have been removed
git repack -ad &&
test_must_fail git cat-file -p $sha1
'
test_done