mirror of
https://github.com/git/git.git
synced 2024-11-15 05:33:04 +01:00
fbd4a7036d
The purpose of edge commits is to let pack-objects know what objects it can use as base, but does not need to include in the thin pack because the other side is supposed to already have them. So far we mark uninteresting parents of interesting commits as edges. But even an unrelated uninteresting commit (that the other side has) may become a good base for pack-objects and help produce more efficient packs. This is especially true for shallow clone, when the client issues a fetch with a depth smaller or equal to the number of commits the server is ahead of the client. For example, in this commit history the client has up to "A" and the server has up to "B": -------A---B have--^ ^ / want--+ If depth 1 is requested, the commit list to send to the client includes only B. The way m_e_u is working, it checks if parent commits of B are uninteresting, if so mark them as edges. Due to shallow effect, commit B is grafted to have no parents and the revision walker never sees A as the parent of B. In fact it marks no edges at all in this simple case and sends everything B has to the client even if it could have excluded what A and also the client already have. In a slightly different case where A is not a direct parent of B (iow there are commits in between A and B), marking A as an edge can still save some because B may still have stuff from the far ancestor A. There is another case from the earlier patch, when we deepen a ref from C->E to A->E: ---A---B C---D---E want--^ ^ ^ shallow-+ / have-------+ In this case we need to send A and B to the client, and C (i.e. the current shallow point that the client informs the server) is a very good base because it's closet to A and B. Normal m_e_u won't recognize C as an edge because it only looks back to parents (i.e. A<-B) not the opposite way B->C even if C is already marked as uninteresting commit by the previous patch. This patch includes all uninteresting commits from command line as edges and lets pack-objects decide what's best to do. The upside is we have better chance of producing better packs in certain cases. The downside is we may need to process some extra objects on the server side. For the shallow case on git.git, when the client is 5 commits behind and does "fetch --depth=3", the result pack is 99.26 KiB instead of 4.92 MiB. Reported-and-analyzed-by: Matthijs Kooijman <matthijs@stdin.nl> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
234 lines
6 KiB
C
234 lines
6 KiB
C
#include "cache.h"
|
|
#include "tag.h"
|
|
#include "commit.h"
|
|
#include "tree.h"
|
|
#include "blob.h"
|
|
#include "diff.h"
|
|
#include "tree-walk.h"
|
|
#include "revision.h"
|
|
#include "list-objects.h"
|
|
|
|
static void process_blob(struct rev_info *revs,
|
|
struct blob *blob,
|
|
show_object_fn show,
|
|
struct name_path *path,
|
|
const char *name,
|
|
void *cb_data)
|
|
{
|
|
struct object *obj = &blob->object;
|
|
|
|
if (!revs->blob_objects)
|
|
return;
|
|
if (!obj)
|
|
die("bad blob object");
|
|
if (obj->flags & (UNINTERESTING | SEEN))
|
|
return;
|
|
obj->flags |= SEEN;
|
|
show(obj, path, name, cb_data);
|
|
}
|
|
|
|
/*
|
|
* Processing a gitlink entry currently does nothing, since
|
|
* we do not recurse into the subproject.
|
|
*
|
|
* We *could* eventually add a flag that actually does that,
|
|
* which would involve:
|
|
* - is the subproject actually checked out?
|
|
* - if so, see if the subproject has already been added
|
|
* to the alternates list, and add it if not.
|
|
* - process the commit (or tag) the gitlink points to
|
|
* recursively.
|
|
*
|
|
* However, it's unclear whether there is really ever any
|
|
* reason to see superprojects and subprojects as such a
|
|
* "unified" object pool (potentially resulting in a totally
|
|
* humongous pack - avoiding which was the whole point of
|
|
* having gitlinks in the first place!).
|
|
*
|
|
* So for now, there is just a note that we *could* follow
|
|
* the link, and how to do it. Whether it necessarily makes
|
|
* any sense what-so-ever to ever do that is another issue.
|
|
*/
|
|
static void process_gitlink(struct rev_info *revs,
|
|
const unsigned char *sha1,
|
|
show_object_fn show,
|
|
struct name_path *path,
|
|
const char *name,
|
|
void *cb_data)
|
|
{
|
|
/* Nothing to do */
|
|
}
|
|
|
|
static void process_tree(struct rev_info *revs,
|
|
struct tree *tree,
|
|
show_object_fn show,
|
|
struct name_path *path,
|
|
struct strbuf *base,
|
|
const char *name,
|
|
void *cb_data)
|
|
{
|
|
struct object *obj = &tree->object;
|
|
struct tree_desc desc;
|
|
struct name_entry entry;
|
|
struct name_path me;
|
|
enum interesting match = revs->diffopt.pathspec.nr == 0 ?
|
|
all_entries_interesting: entry_not_interesting;
|
|
int baselen = base->len;
|
|
|
|
if (!revs->tree_objects)
|
|
return;
|
|
if (!obj)
|
|
die("bad tree object");
|
|
if (obj->flags & (UNINTERESTING | SEEN))
|
|
return;
|
|
if (parse_tree(tree) < 0)
|
|
die("bad tree object %s", sha1_to_hex(obj->sha1));
|
|
obj->flags |= SEEN;
|
|
show(obj, path, name, cb_data);
|
|
me.up = path;
|
|
me.elem = name;
|
|
me.elem_len = strlen(name);
|
|
|
|
if (!match) {
|
|
strbuf_addstr(base, name);
|
|
if (base->len)
|
|
strbuf_addch(base, '/');
|
|
}
|
|
|
|
init_tree_desc(&desc, tree->buffer, tree->size);
|
|
|
|
while (tree_entry(&desc, &entry)) {
|
|
if (match != all_entries_interesting) {
|
|
match = tree_entry_interesting(&entry, base, 0,
|
|
&revs->diffopt.pathspec);
|
|
if (match == all_entries_not_interesting)
|
|
break;
|
|
if (match == entry_not_interesting)
|
|
continue;
|
|
}
|
|
|
|
if (S_ISDIR(entry.mode))
|
|
process_tree(revs,
|
|
lookup_tree(entry.sha1),
|
|
show, &me, base, entry.path,
|
|
cb_data);
|
|
else if (S_ISGITLINK(entry.mode))
|
|
process_gitlink(revs, entry.sha1,
|
|
show, &me, entry.path,
|
|
cb_data);
|
|
else
|
|
process_blob(revs,
|
|
lookup_blob(entry.sha1),
|
|
show, &me, entry.path,
|
|
cb_data);
|
|
}
|
|
strbuf_setlen(base, baselen);
|
|
free(tree->buffer);
|
|
tree->buffer = NULL;
|
|
}
|
|
|
|
static void mark_edge_parents_uninteresting(struct commit *commit,
|
|
struct rev_info *revs,
|
|
show_edge_fn show_edge)
|
|
{
|
|
struct commit_list *parents;
|
|
|
|
for (parents = commit->parents; parents; parents = parents->next) {
|
|
struct commit *parent = parents->item;
|
|
if (!(parent->object.flags & UNINTERESTING))
|
|
continue;
|
|
mark_tree_uninteresting(parent->tree);
|
|
if (revs->edge_hint && !(parent->object.flags & SHOWN)) {
|
|
parent->object.flags |= SHOWN;
|
|
show_edge(parent);
|
|
}
|
|
}
|
|
}
|
|
|
|
void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge)
|
|
{
|
|
struct commit_list *list;
|
|
int i;
|
|
|
|
for (list = revs->commits; list; list = list->next) {
|
|
struct commit *commit = list->item;
|
|
|
|
if (commit->object.flags & UNINTERESTING) {
|
|
mark_tree_uninteresting(commit->tree);
|
|
if (revs->edge_hint && !(commit->object.flags & SHOWN)) {
|
|
commit->object.flags |= SHOWN;
|
|
show_edge(commit);
|
|
}
|
|
continue;
|
|
}
|
|
mark_edge_parents_uninteresting(commit, revs, show_edge);
|
|
}
|
|
for (i = 0; i < revs->cmdline.nr; i++) {
|
|
struct object *obj = revs->cmdline.rev[i].item;
|
|
struct commit *commit = (struct commit *)obj;
|
|
if (obj->type != OBJ_COMMIT || !(obj->flags & UNINTERESTING))
|
|
continue;
|
|
mark_tree_uninteresting(commit->tree);
|
|
if (revs->edge_hint && !(obj->flags & SHOWN)) {
|
|
obj->flags |= SHOWN;
|
|
show_edge(commit);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void add_pending_tree(struct rev_info *revs, struct tree *tree)
|
|
{
|
|
add_pending_object(revs, &tree->object, "");
|
|
}
|
|
|
|
void traverse_commit_list(struct rev_info *revs,
|
|
show_commit_fn show_commit,
|
|
show_object_fn show_object,
|
|
void *data)
|
|
{
|
|
int i;
|
|
struct commit *commit;
|
|
struct strbuf base;
|
|
|
|
strbuf_init(&base, PATH_MAX);
|
|
while ((commit = get_revision(revs)) != NULL) {
|
|
/*
|
|
* an uninteresting boundary commit may not have its tree
|
|
* parsed yet, but we are not going to show them anyway
|
|
*/
|
|
if (commit->tree)
|
|
add_pending_tree(revs, commit->tree);
|
|
show_commit(commit, data);
|
|
}
|
|
for (i = 0; i < revs->pending.nr; i++) {
|
|
struct object_array_entry *pending = revs->pending.objects + i;
|
|
struct object *obj = pending->item;
|
|
const char *name = pending->name;
|
|
if (obj->flags & (UNINTERESTING | SEEN))
|
|
continue;
|
|
if (obj->type == OBJ_TAG) {
|
|
obj->flags |= SEEN;
|
|
show_object(obj, NULL, name, data);
|
|
continue;
|
|
}
|
|
if (obj->type == OBJ_TREE) {
|
|
process_tree(revs, (struct tree *)obj, show_object,
|
|
NULL, &base, name, data);
|
|
continue;
|
|
}
|
|
if (obj->type == OBJ_BLOB) {
|
|
process_blob(revs, (struct blob *)obj, show_object,
|
|
NULL, name, data);
|
|
continue;
|
|
}
|
|
die("unknown pending object %s (%s)",
|
|
sha1_to_hex(obj->sha1), name);
|
|
}
|
|
if (revs->pending.nr) {
|
|
free(revs->pending.objects);
|
|
revs->pending.nr = 0;
|
|
revs->pending.alloc = 0;
|
|
revs->pending.objects = NULL;
|
|
}
|
|
strbuf_release(&base);
|
|
}
|