mirror of
https://github.com/git/git.git
synced 2024-11-01 14:57:52 +01:00
2db1a43f41
When pack-objects is computing the reachability bitmap to serve a fetch request, it can erroneously die() if some of the UNINTERESTING objects are not present. Upload-pack throws away HAVE lines from the client for objects we do not have, but we may have a tip object without all of its ancestors (e.g., if the tip is no longer reachable and was new enough to survive a `git prune`, but some of its reachable objects did get pruned). In the non-bitmap case, we do a revision walk with the HAVE objects marked as UNINTERESTING. The revision walker explicitly ignores errors in accessing UNINTERESTING commits to handle this case (and we do not bother looking at UNINTERESTING trees or blobs at all). When we have bitmaps, however, the process is quite different. The bitmap index for a pack-objects run is calculated in two separate steps: First, we perform an extensive walk from all the HAVEs to find the full set of objects reachable from them. This walk is usually optimized away because we are expected to hit an object with a bitmap during the traversal, which allows us to terminate early. Secondly, we perform an extensive walk from all the WANTs, which usually also terminates early because we hit a commit with an existing bitmap. Once we have the resulting bitmaps from the two walks, we AND-NOT them together to obtain the resulting set of objects we need to pack. When we are walking the HAVE objects, the revision walker does not know that we are walking it only to mark the results as uninteresting. We strip out the UNINTERESTING flag, because those objects _are_ interesting to us during the first walk. We want to keep going to get a complete set of reachable objects if we can. We need some way to tell the revision walker that it's OK to silently truncate the HAVE walk, just like it does for the UNINTERESTING case. This patch introduces a new `ignore_missing_links` flag to the `rev_info` struct, which we set only for the HAVE walk. It also adds tests to cover UNINTERESTING objects missing from several positions: a missing blob, a missing tree, and a missing parent commit. The missing blob already worked (as we do not care about its contents at all), but the other two cases caused us to die(). Note that there are a few cases we do not need to test: 1. We do not need to test a missing tree, with the blob still present. Without the tree that refers to it, we would not know that the blob is relevant to our walk. 2. We do not need to test a tip commit that is missing. Upload-pack omits these for us (and in fact, we complain even in the non-bitmap case if it fails to do so). Reported-by: Siddharth Agarwal <sid0@fb.com> Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
236 lines
6 KiB
C
236 lines
6 KiB
C
#include "cache.h"
|
|
#include "tag.h"
|
|
#include "commit.h"
|
|
#include "tree.h"
|
|
#include "blob.h"
|
|
#include "diff.h"
|
|
#include "tree-walk.h"
|
|
#include "revision.h"
|
|
#include "list-objects.h"
|
|
|
|
static void process_blob(struct rev_info *revs,
|
|
struct blob *blob,
|
|
show_object_fn show,
|
|
struct name_path *path,
|
|
const char *name,
|
|
void *cb_data)
|
|
{
|
|
struct object *obj = &blob->object;
|
|
|
|
if (!revs->blob_objects)
|
|
return;
|
|
if (!obj)
|
|
die("bad blob object");
|
|
if (obj->flags & (UNINTERESTING | SEEN))
|
|
return;
|
|
obj->flags |= SEEN;
|
|
show(obj, path, name, cb_data);
|
|
}
|
|
|
|
/*
|
|
* Processing a gitlink entry currently does nothing, since
|
|
* we do not recurse into the subproject.
|
|
*
|
|
* We *could* eventually add a flag that actually does that,
|
|
* which would involve:
|
|
* - is the subproject actually checked out?
|
|
* - if so, see if the subproject has already been added
|
|
* to the alternates list, and add it if not.
|
|
* - process the commit (or tag) the gitlink points to
|
|
* recursively.
|
|
*
|
|
* However, it's unclear whether there is really ever any
|
|
* reason to see superprojects and subprojects as such a
|
|
* "unified" object pool (potentially resulting in a totally
|
|
* humongous pack - avoiding which was the whole point of
|
|
* having gitlinks in the first place!).
|
|
*
|
|
* So for now, there is just a note that we *could* follow
|
|
* the link, and how to do it. Whether it necessarily makes
|
|
* any sense what-so-ever to ever do that is another issue.
|
|
*/
|
|
static void process_gitlink(struct rev_info *revs,
|
|
const unsigned char *sha1,
|
|
show_object_fn show,
|
|
struct name_path *path,
|
|
const char *name,
|
|
void *cb_data)
|
|
{
|
|
/* Nothing to do */
|
|
}
|
|
|
|
static void process_tree(struct rev_info *revs,
|
|
struct tree *tree,
|
|
show_object_fn show,
|
|
struct name_path *path,
|
|
struct strbuf *base,
|
|
const char *name,
|
|
void *cb_data)
|
|
{
|
|
struct object *obj = &tree->object;
|
|
struct tree_desc desc;
|
|
struct name_entry entry;
|
|
struct name_path me;
|
|
enum interesting match = revs->diffopt.pathspec.nr == 0 ?
|
|
all_entries_interesting: entry_not_interesting;
|
|
int baselen = base->len;
|
|
|
|
if (!revs->tree_objects)
|
|
return;
|
|
if (!obj)
|
|
die("bad tree object");
|
|
if (obj->flags & (UNINTERESTING | SEEN))
|
|
return;
|
|
if (parse_tree(tree) < 0) {
|
|
if (revs->ignore_missing_links)
|
|
return;
|
|
die("bad tree object %s", sha1_to_hex(obj->sha1));
|
|
}
|
|
obj->flags |= SEEN;
|
|
show(obj, path, name, cb_data);
|
|
me.up = path;
|
|
me.elem = name;
|
|
me.elem_len = strlen(name);
|
|
|
|
if (!match) {
|
|
strbuf_addstr(base, name);
|
|
if (base->len)
|
|
strbuf_addch(base, '/');
|
|
}
|
|
|
|
init_tree_desc(&desc, tree->buffer, tree->size);
|
|
|
|
while (tree_entry(&desc, &entry)) {
|
|
if (match != all_entries_interesting) {
|
|
match = tree_entry_interesting(&entry, base, 0,
|
|
&revs->diffopt.pathspec);
|
|
if (match == all_entries_not_interesting)
|
|
break;
|
|
if (match == entry_not_interesting)
|
|
continue;
|
|
}
|
|
|
|
if (S_ISDIR(entry.mode))
|
|
process_tree(revs,
|
|
lookup_tree(entry.sha1),
|
|
show, &me, base, entry.path,
|
|
cb_data);
|
|
else if (S_ISGITLINK(entry.mode))
|
|
process_gitlink(revs, entry.sha1,
|
|
show, &me, entry.path,
|
|
cb_data);
|
|
else
|
|
process_blob(revs,
|
|
lookup_blob(entry.sha1),
|
|
show, &me, entry.path,
|
|
cb_data);
|
|
}
|
|
strbuf_setlen(base, baselen);
|
|
free_tree_buffer(tree);
|
|
}
|
|
|
|
static void mark_edge_parents_uninteresting(struct commit *commit,
|
|
struct rev_info *revs,
|
|
show_edge_fn show_edge)
|
|
{
|
|
struct commit_list *parents;
|
|
|
|
for (parents = commit->parents; parents; parents = parents->next) {
|
|
struct commit *parent = parents->item;
|
|
if (!(parent->object.flags & UNINTERESTING))
|
|
continue;
|
|
mark_tree_uninteresting(parent->tree);
|
|
if (revs->edge_hint && !(parent->object.flags & SHOWN)) {
|
|
parent->object.flags |= SHOWN;
|
|
show_edge(parent);
|
|
}
|
|
}
|
|
}
|
|
|
|
void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge)
|
|
{
|
|
struct commit_list *list;
|
|
int i;
|
|
|
|
for (list = revs->commits; list; list = list->next) {
|
|
struct commit *commit = list->item;
|
|
|
|
if (commit->object.flags & UNINTERESTING) {
|
|
mark_tree_uninteresting(commit->tree);
|
|
if (revs->edge_hint && !(commit->object.flags & SHOWN)) {
|
|
commit->object.flags |= SHOWN;
|
|
show_edge(commit);
|
|
}
|
|
continue;
|
|
}
|
|
mark_edge_parents_uninteresting(commit, revs, show_edge);
|
|
}
|
|
for (i = 0; i < revs->cmdline.nr; i++) {
|
|
struct object *obj = revs->cmdline.rev[i].item;
|
|
struct commit *commit = (struct commit *)obj;
|
|
if (obj->type != OBJ_COMMIT || !(obj->flags & UNINTERESTING))
|
|
continue;
|
|
mark_tree_uninteresting(commit->tree);
|
|
if (revs->edge_hint && !(obj->flags & SHOWN)) {
|
|
obj->flags |= SHOWN;
|
|
show_edge(commit);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void add_pending_tree(struct rev_info *revs, struct tree *tree)
|
|
{
|
|
add_pending_object(revs, &tree->object, "");
|
|
}
|
|
|
|
void traverse_commit_list(struct rev_info *revs,
|
|
show_commit_fn show_commit,
|
|
show_object_fn show_object,
|
|
void *data)
|
|
{
|
|
int i;
|
|
struct commit *commit;
|
|
struct strbuf base;
|
|
|
|
strbuf_init(&base, PATH_MAX);
|
|
while ((commit = get_revision(revs)) != NULL) {
|
|
/*
|
|
* an uninteresting boundary commit may not have its tree
|
|
* parsed yet, but we are not going to show them anyway
|
|
*/
|
|
if (commit->tree)
|
|
add_pending_tree(revs, commit->tree);
|
|
show_commit(commit, data);
|
|
}
|
|
for (i = 0; i < revs->pending.nr; i++) {
|
|
struct object_array_entry *pending = revs->pending.objects + i;
|
|
struct object *obj = pending->item;
|
|
const char *name = pending->name;
|
|
if (obj->flags & (UNINTERESTING | SEEN))
|
|
continue;
|
|
if (obj->type == OBJ_TAG) {
|
|
obj->flags |= SEEN;
|
|
show_object(obj, NULL, name, data);
|
|
continue;
|
|
}
|
|
if (obj->type == OBJ_TREE) {
|
|
process_tree(revs, (struct tree *)obj, show_object,
|
|
NULL, &base, name, data);
|
|
continue;
|
|
}
|
|
if (obj->type == OBJ_BLOB) {
|
|
process_blob(revs, (struct blob *)obj, show_object,
|
|
NULL, name, data);
|
|
continue;
|
|
}
|
|
die("unknown pending object %s (%s)",
|
|
sha1_to_hex(obj->sha1), name);
|
|
}
|
|
if (revs->pending.nr) {
|
|
free(revs->pending.objects);
|
|
revs->pending.nr = 0;
|
|
revs->pending.alloc = 0;
|
|
revs->pending.objects = NULL;
|
|
}
|
|
strbuf_release(&base);
|
|
}
|