1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-10-28 12:59:41 +01:00

Merge branch 'ps/reftable-stack-compaction'

The code paths to compact multiple reftable files have been updated
to correctly deal with multiple compaction triggering at the same
time.

* ps/reftable-stack-compaction:
  reftable/stack: handle locked tables during auto-compaction
  reftable/stack: fix corruption on concurrent compaction
  reftable/stack: use lock_file when adding table to "tables.list"
  reftable/stack: do not die when fsyncing lock file files
  reftable/stack: simplify tracking of table locks
  reftable/stack: update stats on failed full compaction
  reftable/stack: test compaction with already-locked tables
  reftable/stack: extract function to setup stack with N tables
  reftable/stack: refactor function to gather table sizes
This commit is contained in:
Junio C Hamano 2024-08-15 13:22:13 -07:00
commit 69b737999c
3 changed files with 313 additions and 87 deletions

View file

@ -567,7 +567,7 @@ static void format_name(struct strbuf *dest, uint64_t min, uint64_t max)
}
struct reftable_addition {
struct tempfile *lock_file;
struct lock_file tables_list_lock;
struct reftable_stack *stack;
char **new_tables;
@ -581,13 +581,13 @@ static int reftable_stack_init_addition(struct reftable_addition *add,
struct reftable_stack *st)
{
struct strbuf lock_file_name = STRBUF_INIT;
int err = 0;
int err;
add->stack = st;
strbuf_addf(&lock_file_name, "%s.lock", st->list_file);
add->lock_file = create_tempfile(lock_file_name.buf);
if (!add->lock_file) {
err = hold_lock_file_for_update(&add->tables_list_lock, st->list_file,
LOCK_NO_DEREF);
if (err < 0) {
if (errno == EEXIST) {
err = REFTABLE_LOCK_ERROR;
} else {
@ -596,7 +596,8 @@ static int reftable_stack_init_addition(struct reftable_addition *add,
goto done;
}
if (st->opts.default_permissions) {
if (chmod(add->lock_file->filename.buf, st->opts.default_permissions) < 0) {
if (chmod(get_lock_file_path(&add->tables_list_lock),
st->opts.default_permissions) < 0) {
err = REFTABLE_IO_ERROR;
goto done;
}
@ -635,7 +636,7 @@ static void reftable_addition_close(struct reftable_addition *add)
add->new_tables_len = 0;
add->new_tables_cap = 0;
delete_tempfile(&add->lock_file);
rollback_lock_file(&add->tables_list_lock);
strbuf_release(&nm);
}
@ -651,7 +652,7 @@ void reftable_addition_destroy(struct reftable_addition *add)
int reftable_addition_commit(struct reftable_addition *add)
{
struct strbuf table_list = STRBUF_INIT;
int lock_file_fd = get_tempfile_fd(add->lock_file);
int lock_file_fd = get_lock_file_fd(&add->tables_list_lock);
int err = 0;
size_t i;
@ -674,10 +675,13 @@ int reftable_addition_commit(struct reftable_addition *add)
goto done;
}
fsync_component_or_die(FSYNC_COMPONENT_REFERENCE, lock_file_fd,
get_tempfile_path(add->lock_file));
err = fsync_component(FSYNC_COMPONENT_REFERENCE, lock_file_fd);
if (err < 0) {
err = REFTABLE_IO_ERROR;
goto done;
}
err = rename_tempfile(&add->lock_file, add->stack->list_file);
err = commit_lock_file(&add->tables_list_lock);
if (err < 0) {
err = REFTABLE_IO_ERROR;
goto done;
@ -995,6 +999,15 @@ static int stack_write_compact(struct reftable_stack *st,
return err;
}
enum stack_compact_range_flags {
/*
* Perform a best-effort compaction. That is, even if we cannot lock
* all tables in the specified range, we will try to compact the
* remaining slice.
*/
STACK_COMPACT_RANGE_BEST_EFFORT = (1 << 0),
};
/*
* Compact all tables in the range `[first, last)` into a single new table.
*
@ -1006,7 +1019,8 @@ static int stack_write_compact(struct reftable_stack *st,
*/
static int stack_compact_range(struct reftable_stack *st,
size_t first, size_t last,
struct reftable_log_expiry_config *expiry)
struct reftable_log_expiry_config *expiry,
unsigned int flags)
{
struct strbuf tables_list_buf = STRBUF_INIT;
struct strbuf new_table_name = STRBUF_INIT;
@ -1016,7 +1030,9 @@ static int stack_compact_range(struct reftable_stack *st,
struct lock_file *table_locks = NULL;
struct tempfile *new_table = NULL;
int is_empty_table = 0, err = 0;
size_t i;
size_t first_to_replace, last_to_replace;
size_t i, nlocks = 0;
char **names = NULL;
if (first > last || (!expiry && first == last)) {
err = 0;
@ -1046,27 +1062,55 @@ static int stack_compact_range(struct reftable_stack *st,
/*
* Lock all tables in the user-provided range. This is the slice of our
* stack which we'll compact.
*
* Note that we lock tables in reverse order from last to first. The
* intent behind this is to allow a newer process to perform best
* effort compaction of tables that it has added in the case where an
* older process is still busy compacting tables which are preexisting
* from the point of view of the newer process.
*/
REFTABLE_CALLOC_ARRAY(table_locks, last - first + 1);
for (i = first; i <= last; i++) {
stack_filename(&table_name, st, reader_name(st->readers[i]));
for (i = last + 1; i > first; i--) {
stack_filename(&table_name, st, reader_name(st->readers[i - 1]));
err = hold_lock_file_for_update(&table_locks[i - first],
err = hold_lock_file_for_update(&table_locks[nlocks],
table_name.buf, LOCK_NO_DEREF);
if (err < 0) {
if (errno == EEXIST)
/*
* When the table is locked already we may do a
* best-effort compaction and compact only the tables
* that we have managed to lock so far. This of course
* requires that we have been able to lock at least two
* tables, otherwise there would be nothing to compact.
* In that case, we return a lock error to our caller.
*/
if (errno == EEXIST && last - (i - 1) >= 2 &&
flags & STACK_COMPACT_RANGE_BEST_EFFORT) {
err = 0;
/*
* The subtraction is to offset the index, the
* addition is to only compact up to the table
* of the preceding iteration. They obviously
* cancel each other out, but that may be
* non-obvious when it was omitted.
*/
first = (i - 1) + 1;
break;
} else if (errno == EEXIST) {
err = REFTABLE_LOCK_ERROR;
else
goto done;
} else {
err = REFTABLE_IO_ERROR;
goto done;
}
}
/*
* We need to close the lockfiles as we might otherwise easily
* run into file descriptor exhaustion when we compress a lot
* of tables.
*/
err = close_lock_file_gently(&table_locks[i - first]);
err = close_lock_file_gently(&table_locks[nlocks++]);
if (err < 0) {
err = REFTABLE_IO_ERROR;
goto done;
@ -1119,6 +1163,100 @@ static int stack_compact_range(struct reftable_stack *st,
}
}
/*
* As we have unlocked the stack while compacting our slice of tables
* it may have happened that a concurrently running process has updated
* the stack while we were compacting. In that case, we need to check
* whether the tables that we have just compacted still exist in the
* stack in the exact same order as we have compacted them.
*
* If they do exist, then it is fine to continue and replace those
* tables with our compacted version. If they don't, then we need to
* abort.
*/
err = stack_uptodate(st);
if (err < 0)
goto done;
if (err > 0) {
ssize_t new_offset = -1;
int fd;
fd = open(st->list_file, O_RDONLY);
if (fd < 0) {
err = REFTABLE_IO_ERROR;
goto done;
}
err = fd_read_lines(fd, &names);
close(fd);
if (err < 0)
goto done;
/*
* Search for the offset of the first table that we have
* compacted in the updated "tables.list" file.
*/
for (size_t i = 0; names[i]; i++) {
if (strcmp(names[i], st->readers[first]->name))
continue;
/*
* We have found the first entry. Verify that all the
* subsequent tables we have compacted still exist in
* the modified stack in the exact same order as we
* have compacted them.
*/
for (size_t j = 1; j < last - first + 1; j++) {
const char *old = first + j < st->merged->stack_len ?
st->readers[first + j]->name : NULL;
const char *new = names[i + j];
/*
* If some entries are missing or in case the tables
* have changed then we need to bail out. Again, this
* shouldn't ever happen because we have locked the
* tables we are compacting.
*/
if (!old || !new || strcmp(old, new)) {
err = REFTABLE_OUTDATED_ERROR;
goto done;
}
}
new_offset = i;
break;
}
/*
* In case we didn't find our compacted tables in the stack we
* need to bail out. In theory, this should have never happened
* because we locked the tables we are compacting.
*/
if (new_offset < 0) {
err = REFTABLE_OUTDATED_ERROR;
goto done;
}
/*
* We have found the new range that we want to replace, so
* let's update the range of tables that we want to replace.
*/
first_to_replace = new_offset;
last_to_replace = last + (new_offset - first);
} else {
/*
* `fd_read_lines()` uses a `NULL` sentinel to indicate that
* the array is at its end. As we use `free_names()` to free
* the array, we need to include this sentinel value here and
* thus have to allocate `stack_len + 1` many entries.
*/
REFTABLE_CALLOC_ARRAY(names, st->merged->stack_len + 1);
for (size_t i = 0; i < st->merged->stack_len; i++)
names[i] = xstrdup(st->readers[i]->name);
first_to_replace = first;
last_to_replace = last;
}
/*
* If the resulting compacted table is not empty, then we need to move
* it into place now.
@ -1141,12 +1279,12 @@ static int stack_compact_range(struct reftable_stack *st,
* have just written. In case the compacted table became empty we
* simply skip writing it.
*/
for (i = 0; i < first; i++)
strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name);
for (i = 0; i < first_to_replace; i++)
strbuf_addf(&tables_list_buf, "%s\n", names[i]);
if (!is_empty_table)
strbuf_addf(&tables_list_buf, "%s\n", new_table_name.buf);
for (i = last + 1; i < st->merged->stack_len; i++)
strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name);
for (i = last_to_replace + 1; names[i]; i++)
strbuf_addf(&tables_list_buf, "%s\n", names[i]);
err = write_in_full(get_lock_file_fd(&tables_list_lock),
tables_list_buf.buf, tables_list_buf.len);
@ -1183,8 +1321,8 @@ static int stack_compact_range(struct reftable_stack *st,
* Delete the old tables. They may still be in use by concurrent
* readers, so it is expected that unlinking tables may fail.
*/
for (i = first; i <= last; i++) {
struct lock_file *table_lock = &table_locks[i - first];
for (i = 0; i < nlocks; i++) {
struct lock_file *table_lock = &table_locks[i];
char *table_path = get_locked_file_path(table_lock);
unlink(table_path);
free(table_path);
@ -1192,34 +1330,36 @@ static int stack_compact_range(struct reftable_stack *st,
done:
rollback_lock_file(&tables_list_lock);
for (i = first; table_locks && i <= last; i++)
rollback_lock_file(&table_locks[i - first]);
for (i = 0; table_locks && i < nlocks; i++)
rollback_lock_file(&table_locks[i]);
reftable_free(table_locks);
delete_tempfile(&new_table);
strbuf_release(&new_table_name);
strbuf_release(&new_table_path);
strbuf_release(&tables_list_buf);
strbuf_release(&table_name);
free_names(names);
return err;
}
static int stack_compact_range_stats(struct reftable_stack *st,
size_t first, size_t last,
struct reftable_log_expiry_config *config,
unsigned int flags)
{
int err = stack_compact_range(st, first, last, config, flags);
if (err == REFTABLE_LOCK_ERROR)
st->stats.failures++;
return err;
}
int reftable_stack_compact_all(struct reftable_stack *st,
struct reftable_log_expiry_config *config)
{
return stack_compact_range(st, 0, st->merged->stack_len ?
st->merged->stack_len - 1 : 0, config);
}
static int stack_compact_range_stats(struct reftable_stack *st,
size_t first, size_t last,
struct reftable_log_expiry_config *config)
{
int err = stack_compact_range(st, first, last, config);
if (err == REFTABLE_LOCK_ERROR)
st->stats.failures++;
return err;
size_t last = st->merged->stack_len ? st->merged->stack_len - 1 : 0;
return stack_compact_range_stats(st, 0, last, config, 0);
}
static int segment_size(struct segment *s)
@ -1305,14 +1445,15 @@ struct segment suggest_compaction_segment(uint64_t *sizes, size_t n,
static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st)
{
uint64_t *sizes =
reftable_calloc(st->merged->stack_len, sizeof(*sizes));
int version = (st->opts.hash_id == GIT_SHA1_FORMAT_ID) ? 1 : 2;
int overhead = header_size(version) - 1;
int i = 0;
for (i = 0; i < st->merged->stack_len; i++) {
uint64_t *sizes;
REFTABLE_CALLOC_ARRAY(sizes, st->merged->stack_len);
for (size_t i = 0; i < st->merged->stack_len; i++)
sizes[i] = st->readers[i]->size - overhead;
}
return sizes;
}
@ -1325,7 +1466,7 @@ int reftable_stack_auto_compact(struct reftable_stack *st)
reftable_free(sizes);
if (segment_size(&seg) > 0)
return stack_compact_range_stats(st, seg.start, seg.end - 1,
NULL);
NULL, STACK_COMPACT_RANGE_BEST_EFFORT);
return 0;
}

View file

@ -109,6 +109,34 @@ static int write_test_ref(struct reftable_writer *wr, void *arg)
return reftable_writer_add_ref(wr, ref);
}
static void write_n_ref_tables(struct reftable_stack *st,
size_t n)
{
struct strbuf buf = STRBUF_INIT;
int disable_auto_compact;
int err;
disable_auto_compact = st->opts.disable_auto_compact;
st->opts.disable_auto_compact = 1;
for (size_t i = 0; i < n; i++) {
struct reftable_ref_record ref = {
.update_index = reftable_stack_next_update_index(st),
.value_type = REFTABLE_REF_VAL1,
};
strbuf_addf(&buf, "refs/heads/branch-%04u", (unsigned) i);
ref.refname = buf.buf;
set_test_hash(ref.value.val1, i);
err = reftable_stack_add(st, &write_test_ref, &ref);
EXPECT_ERR(err);
}
st->opts.disable_auto_compact = disable_auto_compact;
strbuf_release(&buf);
}
struct write_log_arg {
struct reftable_log_record *log;
uint64_t update_index;
@ -863,6 +891,47 @@ static void test_reftable_stack_auto_compaction(void)
clear_dir(dir);
}
static void test_reftable_stack_auto_compaction_with_locked_tables(void)
{
struct reftable_write_options opts = {
.disable_auto_compact = 1,
};
struct reftable_stack *st = NULL;
struct strbuf buf = STRBUF_INIT;
char *dir = get_tmp_dir(__LINE__);
int err;
err = reftable_new_stack(&st, dir, &opts);
EXPECT_ERR(err);
write_n_ref_tables(st, 5);
EXPECT(st->merged->stack_len == 5);
/*
* Given that all tables we have written should be roughly the same
* size, we expect that auto-compaction will want to compact all of the
* tables. Locking any of the tables will keep it from doing so.
*/
strbuf_reset(&buf);
strbuf_addf(&buf, "%s/%s.lock", dir, st->readers[2]->name);
write_file_buf(buf.buf, "", 0);
/*
* When parts of the stack are locked, then auto-compaction does a best
* effort compaction of those tables which aren't locked. So while this
* would in theory compact all tables, due to the preexisting lock we
* only compact the newest two tables.
*/
err = reftable_stack_auto_compact(st);
EXPECT_ERR(err);
EXPECT(st->stats.failures == 0);
EXPECT(st->merged->stack_len == 4);
reftable_stack_destroy(st);
strbuf_release(&buf);
clear_dir(dir);
}
static void test_reftable_stack_add_performs_auto_compaction(void)
{
struct reftable_write_options opts = { 0 };
@ -911,30 +980,51 @@ static void test_reftable_stack_add_performs_auto_compaction(void)
clear_dir(dir);
}
static void test_reftable_stack_compaction_with_locked_tables(void)
{
struct reftable_write_options opts = {
.disable_auto_compact = 1,
};
struct reftable_stack *st = NULL;
struct strbuf buf = STRBUF_INIT;
char *dir = get_tmp_dir(__LINE__);
int err;
err = reftable_new_stack(&st, dir, &opts);
EXPECT_ERR(err);
write_n_ref_tables(st, 3);
EXPECT(st->merged->stack_len == 3);
/* Lock one of the tables that we're about to compact. */
strbuf_reset(&buf);
strbuf_addf(&buf, "%s/%s.lock", dir, st->readers[1]->name);
write_file_buf(buf.buf, "", 0);
/*
* Compaction is expected to fail given that we were not able to
* compact all tables.
*/
err = reftable_stack_compact_all(st, NULL);
EXPECT(err == REFTABLE_LOCK_ERROR);
EXPECT(st->stats.failures == 1);
EXPECT(st->merged->stack_len == 3);
reftable_stack_destroy(st);
strbuf_release(&buf);
clear_dir(dir);
}
static void test_reftable_stack_compaction_concurrent(void)
{
struct reftable_write_options opts = { 0 };
struct reftable_stack *st1 = NULL, *st2 = NULL;
char *dir = get_tmp_dir(__LINE__);
int err, i;
int N = 3;
int err;
err = reftable_new_stack(&st1, dir, &opts);
EXPECT_ERR(err);
for (i = 0; i < N; i++) {
char name[100];
struct reftable_ref_record ref = {
.refname = name,
.update_index = reftable_stack_next_update_index(st1),
.value_type = REFTABLE_REF_SYMREF,
.value.symref = (char *) "master",
};
snprintf(name, sizeof(name), "branch%04d", i);
err = reftable_stack_add(st1, &write_test_ref, &ref);
EXPECT_ERR(err);
}
write_n_ref_tables(st1, 3);
err = reftable_new_stack(&st2, dir, &opts);
EXPECT_ERR(err);
@ -965,25 +1055,11 @@ static void test_reftable_stack_compaction_concurrent_clean(void)
struct reftable_write_options opts = { 0 };
struct reftable_stack *st1 = NULL, *st2 = NULL, *st3 = NULL;
char *dir = get_tmp_dir(__LINE__);
int err, i;
int N = 3;
int err;
err = reftable_new_stack(&st1, dir, &opts);
EXPECT_ERR(err);
for (i = 0; i < N; i++) {
char name[100];
struct reftable_ref_record ref = {
.refname = name,
.update_index = reftable_stack_next_update_index(st1),
.value_type = REFTABLE_REF_SYMREF,
.value.symref = (char *) "master",
};
snprintf(name, sizeof(name), "branch%04d", i);
err = reftable_stack_add(st1, &write_test_ref, &ref);
EXPECT_ERR(err);
}
write_n_ref_tables(st1, 3);
err = reftable_new_stack(&st2, dir, &opts);
EXPECT_ERR(err);
@ -1016,9 +1092,11 @@ int stack_test_main(int argc, const char *argv[])
RUN_TEST(test_reftable_stack_add);
RUN_TEST(test_reftable_stack_add_one);
RUN_TEST(test_reftable_stack_auto_compaction);
RUN_TEST(test_reftable_stack_auto_compaction_with_locked_tables);
RUN_TEST(test_reftable_stack_add_performs_auto_compaction);
RUN_TEST(test_reftable_stack_compaction_concurrent);
RUN_TEST(test_reftable_stack_compaction_concurrent_clean);
RUN_TEST(test_reftable_stack_compaction_with_locked_tables);
RUN_TEST(test_reftable_stack_hash_id);
RUN_TEST(test_reftable_stack_lock_failure);
RUN_TEST(test_reftable_stack_log_normalize);

View file

@ -478,19 +478,26 @@ test_expect_success "$command: auto compaction" '
test_oid blob17_2 | git hash-object -w --stdin &&
# Lock all tables write some refs. Auto-compaction will be
# unable to compact tables and thus fails gracefully, leaving
# the stack in a sub-optimal state.
ls .git/reftable/*.ref |
# Lock all tables, write some refs. Auto-compaction will be
# unable to compact tables and thus fails gracefully,
# compacting only those tables which are not locked.
ls .git/reftable/*.ref | sort |
while read table
do
touch "$table.lock" || exit 1
touch "$table.lock" &&
basename "$table" >>tables.expect || exit 1
done &&
test_line_count = 2 .git/reftable/tables.list &&
git branch B &&
git branch C &&
rm .git/reftable/*.lock &&
test_line_count = 4 .git/reftable/tables.list &&
# The new tables are auto-compacted, but the locked tables are
# left intact.
test_line_count = 3 .git/reftable/tables.list &&
head -n 2 .git/reftable/tables.list >tables.head &&
test_cmp tables.expect tables.head &&
rm .git/reftable/*.lock &&
git $command --auto &&
test_line_count = 1 .git/reftable/tables.list
)