1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-11-16 06:03:44 +01:00
git/tree-diff.c

443 lines
11 KiB
C
Raw Normal View History

/*
* Helper functions for tree diff generation
*/
#include "cache.h"
#include "diff.h"
Finally implement "git log --follow" Ok, I've really held off doing this too damn long, because I'm lazy, and I was always hoping that somebody else would do it. But no, people keep asking for it, but nobody actually did anything, so I decided I might as well bite the bullet, and instead of telling people they could add a "--follow" flag to "git log" to do what they want to do, I decided that it looks like I just have to do it for them.. The code wasn't actually that complicated, in that the diffstat for this patch literally says "70 insertions(+), 1 deletions(-)", but I will have to admit that in order to get to this fairly simple patch, you did have to know and understand the internal git diff generation machinery pretty well, and had to really be able to follow how commit generation interacts with generating patches and generating the log. So I suspect that while I was right that it wasn't that hard, I might have been expecting too much of random people - this patch does seem to be firmly in the core "Linus or Junio" territory. To make a long story short: I'm sorry for it taking so long until I just did it. I'm not going to guarantee that this works for everybody, but you really can just look at the patch, and after the appropriate appreciative noises ("Ooh, aah") over how clever I am, you can then just notice that the code itself isn't really that complicated. All the real new code is in the new "try_to_follow_renames()" function. It really isn't rocket science: we notice that the pathname we were looking at went away, so we start a full tree diff and try to see if we can instead make that pathname be a rename or a copy from some other previous pathname. And if we can, we just continue, except we show *that* particular diff, and ever after we use the _previous_ pathname. One thing to look out for: the "rename detection" is considered to be a singular event in the _linear_ "git log" output! That's what people want to do, but I just wanted to point out that this patch is *not* carrying around a "commit,pathname" kind of pair and it's *not* going to be able to notice the file coming from multiple *different* files in earlier history. IOW, if you use "git log --follow", then you get the stupid CVS/SVN kind of "files have single identities" kind of semantics, and git log will just pick the identity based on the normal move/copy heuristics _as_if_ the history could be linearized. Put another way: I think the model is broken, but given the broken model, I think this patch does just about as well as you can do. If you have merges with the same "file" having different filenames over the two branches, git will just end up picking _one_ of the pathnames at the point where the newer one goes away. It never looks at multiple pathnames in parallel. And if you understood all that, you probably didn't need it explained, and if you didn't understand the above blathering, it doesn't really mtter to you. What matters to you is that you can now do git log -p --follow builtin-rev-list.c and it will find the point where the old "rev-list.c" got renamed to "builtin-rev-list.c" and show it as such. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-19 23:22:46 +02:00
#include "diffcore.h"
#include "tree.h"
static char *malloc_base(const char *base, int baselen, const char *path, int pathlen)
{
char *newbase = xmalloc(baselen + pathlen + 2);
memcpy(newbase, base, baselen);
memcpy(newbase + baselen, path, pathlen);
memcpy(newbase + baselen + pathlen, "/", 2);
return newbase;
}
static void show_entry(struct diff_options *opt, const char *prefix, struct tree_desc *desc,
const char *base, int baselen);
static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, const char *base, int baselen, struct diff_options *opt)
{
unsigned mode1, mode2;
const char *path1, *path2;
const unsigned char *sha1, *sha2;
int cmp, pathlen1, pathlen2;
sha1 = tree_entry_extract(t1, &path1, &mode1);
sha2 = tree_entry_extract(t2, &path2, &mode2);
pathlen1 = tree_entry_len(path1, sha1);
pathlen2 = tree_entry_len(path2, sha2);
cmp = base_name_compare(path1, pathlen1, mode1, path2, pathlen2, mode2);
if (cmp < 0) {
show_entry(opt, "-", t1, base, baselen);
return -1;
}
if (cmp > 0) {
show_entry(opt, "+", t2, base, baselen);
return 1;
}
if (!opt->find_copies_harder && !hashcmp(sha1, sha2) && mode1 == mode2)
return 0;
/*
* If the filemode has changed to/from a directory from/to a regular
* file, we need to consider it a remove and an add.
*/
if (S_ISDIR(mode1) != S_ISDIR(mode2)) {
show_entry(opt, "-", t1, base, baselen);
show_entry(opt, "+", t2, base, baselen);
return 0;
}
if (opt->recursive && S_ISDIR(mode1)) {
int retval;
char *newbase = malloc_base(base, baselen, path1, pathlen1);
if (opt->tree_in_recursive)
opt->change(opt, mode1, mode2,
sha1, sha2, base, path1);
retval = diff_tree_sha1(sha1, sha2, newbase, opt);
free(newbase);
return retval;
}
opt->change(opt, mode1, mode2, sha1, sha2, base, path1);
return 0;
}
Set up for better tree diff optimizations This is mainly just a cleanup patch, and sets up for later changes where the tree-diff.c "interesting()" function can return more than just a yes/no value. In particular, it should be quite possible to say "no subsequent entries in this tree can possibly be interesting any more", and thus allow the callers to short-circuit the tree entirely. In fact, changing the callers to do so is trivial, and is really all this patch really does, because changing "interesting()" itself to say that nothing further is going to be interesting is definitely more complicated, considering that we may have arbitrary pathspecs. But in cleaning up the callers, this actually fixes a potential small performance issue in diff_tree(): if the second tree has a lot of uninterestign crud in it, we would keep on doing the "is it interesting?" check on the first tree for each uninteresting entry in the second one. The answer is obviously not going to change, so that was just not helping. The new code is clearer and simpler and avoids this issue entirely. I also renamed "interesting()" to "tree_entry_interesting()", because I got frustrated by the fact that - we actually had *another* function called "interesting()" in another file, and I couldn't tell from the profiles which one was the one that mattered more. - when rewriting it to return a ternary value, you can't just do if (interesting(...)) ... any more, but want to assign the return value to a local variable. The name of choice for that variable would normally be "interesting", so I just wanted to make the function name be more specific, and avoid that whole issue (even though I then didn't choose that name for either of the users, just to avoid confusion in the patch itself ;) In other words, this doesn't really change anything, but I think it's a good thing to do, and if somebody comes along and writes the logic for "yeah, none of the pathspecs you have are interesting", we now support that trivially. It could easily be a meaningful optimization for things like "blame", where there's just one pathspec, and stopping when you've seen it would allow you to avoid about 50% of the tree traversals on average. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-18 23:18:30 +01:00
/*
* Is a tree entry interesting given the pathspec we have?
*
* Return:
* - 2 for "yes, and all subsequent entries will be"
* - 1 for yes
Set up for better tree diff optimizations This is mainly just a cleanup patch, and sets up for later changes where the tree-diff.c "interesting()" function can return more than just a yes/no value. In particular, it should be quite possible to say "no subsequent entries in this tree can possibly be interesting any more", and thus allow the callers to short-circuit the tree entirely. In fact, changing the callers to do so is trivial, and is really all this patch really does, because changing "interesting()" itself to say that nothing further is going to be interesting is definitely more complicated, considering that we may have arbitrary pathspecs. But in cleaning up the callers, this actually fixes a potential small performance issue in diff_tree(): if the second tree has a lot of uninterestign crud in it, we would keep on doing the "is it interesting?" check on the first tree for each uninteresting entry in the second one. The answer is obviously not going to change, so that was just not helping. The new code is clearer and simpler and avoids this issue entirely. I also renamed "interesting()" to "tree_entry_interesting()", because I got frustrated by the fact that - we actually had *another* function called "interesting()" in another file, and I couldn't tell from the profiles which one was the one that mattered more. - when rewriting it to return a ternary value, you can't just do if (interesting(...)) ... any more, but want to assign the return value to a local variable. The name of choice for that variable would normally be "interesting", so I just wanted to make the function name be more specific, and avoid that whole issue (even though I then didn't choose that name for either of the users, just to avoid confusion in the patch itself ;) In other words, this doesn't really change anything, but I think it's a good thing to do, and if somebody comes along and writes the logic for "yeah, none of the pathspecs you have are interesting", we now support that trivially. It could easily be a meaningful optimization for things like "blame", where there's just one pathspec, and stopping when you've seen it would allow you to avoid about 50% of the tree traversals on average. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-18 23:18:30 +01:00
* - zero for no
* - negative for "no, and no subsequent entries will be either"
*/
static int tree_entry_interesting(struct tree_desc *desc, const char *base, int baselen, struct diff_options *opt)
{
const char *path;
const unsigned char *sha1;
unsigned mode;
int i;
int pathlen;
int never_interesting = -1;
if (!opt->nr_paths)
return 1;
sha1 = tree_entry_extract(desc, &path, &mode);
pathlen = tree_entry_len(path, sha1);
for (i = 0; i < opt->nr_paths; i++) {
const char *match = opt->paths[i];
int matchlen = opt->pathlens[i];
int m = -1; /* signals that we haven't called strncmp() */
if (baselen >= matchlen) {
/* If it doesn't match, move along... */
if (strncmp(base, match, matchlen))
continue;
/*
* The base is a subdirectory of a path which
* was specified, so all of them are interesting.
*/
return 2;
}
/* Does the base match? */
if (strncmp(base, match, baselen))
continue;
match += baselen;
matchlen -= baselen;
if (never_interesting) {
/*
* We have not seen any match that sorts later
* than the current path.
*/
/*
* Does match sort strictly earlier than path
* with their common parts?
*/
m = strncmp(match, path,
(matchlen < pathlen) ? matchlen : pathlen);
if (m < 0)
continue;
/*
* If we come here even once, that means there is at
* least one pathspec that would sort equal to or
* later than the path we are currently looking at.
* In other words, if we have never reached this point
* after iterating all pathspecs, it means all
* pathspecs are either outside of base, or inside the
* base but sorts strictly earlier than the current
* one. In either case, they will never match the
* subsequent entries. In such a case, we initialized
* the variable to -1 and that is what will be
* returned, allowing the caller to terminate early.
*/
never_interesting = 0;
}
if (pathlen > matchlen)
continue;
if (matchlen > pathlen) {
if (match[pathlen] != '/')
continue;
if (!S_ISDIR(mode))
continue;
}
if (m == -1)
/*
* we cheated and did not do strncmp(), so we do
* that here.
*/
m = strncmp(match, path, pathlen);
/*
* If common part matched earlier then it is a hit,
* because we rejected the case where path is not a
* leading directory and is shorter than match.
*/
if (!m)
return 1;
}
return never_interesting; /* No matches */
}
/* A whole sub-tree went away or appeared */
static void show_tree(struct diff_options *opt, const char *prefix, struct tree_desc *desc, const char *base, int baselen)
{
int all_interesting = 0;
while (desc->size) {
int show;
if (all_interesting)
show = 1;
else {
show = tree_entry_interesting(desc, base, baselen,
opt);
if (show == 2)
all_interesting = 1;
}
Set up for better tree diff optimizations This is mainly just a cleanup patch, and sets up for later changes where the tree-diff.c "interesting()" function can return more than just a yes/no value. In particular, it should be quite possible to say "no subsequent entries in this tree can possibly be interesting any more", and thus allow the callers to short-circuit the tree entirely. In fact, changing the callers to do so is trivial, and is really all this patch really does, because changing "interesting()" itself to say that nothing further is going to be interesting is definitely more complicated, considering that we may have arbitrary pathspecs. But in cleaning up the callers, this actually fixes a potential small performance issue in diff_tree(): if the second tree has a lot of uninterestign crud in it, we would keep on doing the "is it interesting?" check on the first tree for each uninteresting entry in the second one. The answer is obviously not going to change, so that was just not helping. The new code is clearer and simpler and avoids this issue entirely. I also renamed "interesting()" to "tree_entry_interesting()", because I got frustrated by the fact that - we actually had *another* function called "interesting()" in another file, and I couldn't tell from the profiles which one was the one that mattered more. - when rewriting it to return a ternary value, you can't just do if (interesting(...)) ... any more, but want to assign the return value to a local variable. The name of choice for that variable would normally be "interesting", so I just wanted to make the function name be more specific, and avoid that whole issue (even though I then didn't choose that name for either of the users, just to avoid confusion in the patch itself ;) In other words, this doesn't really change anything, but I think it's a good thing to do, and if somebody comes along and writes the logic for "yeah, none of the pathspecs you have are interesting", we now support that trivially. It could easily be a meaningful optimization for things like "blame", where there's just one pathspec, and stopping when you've seen it would allow you to avoid about 50% of the tree traversals on average. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-18 23:18:30 +01:00
if (show < 0)
break;
if (show)
show_entry(opt, prefix, desc, base, baselen);
update_tree_entry(desc);
}
}
/* A file entry went away or appeared */
static void show_entry(struct diff_options *opt, const char *prefix, struct tree_desc *desc,
const char *base, int baselen)
{
unsigned mode;
const char *path;
const unsigned char *sha1 = tree_entry_extract(desc, &path, &mode);
if (opt->recursive && S_ISDIR(mode)) {
enum object_type type;
int pathlen = tree_entry_len(path, sha1);
char *newbase = malloc_base(base, baselen, path, pathlen);
struct tree_desc inner;
void *tree;
unsigned long size;
tree = read_sha1_file(sha1, &type, &size);
if (!tree || type != OBJ_TREE)
die("corrupt tree sha %s", sha1_to_hex(sha1));
init_tree_desc(&inner, tree, size);
show_tree(opt, prefix, &inner, newbase, baselen + 1 + pathlen);
free(tree);
free(newbase);
} else {
opt->add_remove(opt, prefix[0], mode, sha1, base, path);
}
}
Set up for better tree diff optimizations This is mainly just a cleanup patch, and sets up for later changes where the tree-diff.c "interesting()" function can return more than just a yes/no value. In particular, it should be quite possible to say "no subsequent entries in this tree can possibly be interesting any more", and thus allow the callers to short-circuit the tree entirely. In fact, changing the callers to do so is trivial, and is really all this patch really does, because changing "interesting()" itself to say that nothing further is going to be interesting is definitely more complicated, considering that we may have arbitrary pathspecs. But in cleaning up the callers, this actually fixes a potential small performance issue in diff_tree(): if the second tree has a lot of uninterestign crud in it, we would keep on doing the "is it interesting?" check on the first tree for each uninteresting entry in the second one. The answer is obviously not going to change, so that was just not helping. The new code is clearer and simpler and avoids this issue entirely. I also renamed "interesting()" to "tree_entry_interesting()", because I got frustrated by the fact that - we actually had *another* function called "interesting()" in another file, and I couldn't tell from the profiles which one was the one that mattered more. - when rewriting it to return a ternary value, you can't just do if (interesting(...)) ... any more, but want to assign the return value to a local variable. The name of choice for that variable would normally be "interesting", so I just wanted to make the function name be more specific, and avoid that whole issue (even though I then didn't choose that name for either of the users, just to avoid confusion in the patch itself ;) In other words, this doesn't really change anything, but I think it's a good thing to do, and if somebody comes along and writes the logic for "yeah, none of the pathspecs you have are interesting", we now support that trivially. It could easily be a meaningful optimization for things like "blame", where there's just one pathspec, and stopping when you've seen it would allow you to avoid about 50% of the tree traversals on average. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-18 23:18:30 +01:00
static void skip_uninteresting(struct tree_desc *t, const char *base, int baselen, struct diff_options *opt)
{
int all_interesting = 0;
Set up for better tree diff optimizations This is mainly just a cleanup patch, and sets up for later changes where the tree-diff.c "interesting()" function can return more than just a yes/no value. In particular, it should be quite possible to say "no subsequent entries in this tree can possibly be interesting any more", and thus allow the callers to short-circuit the tree entirely. In fact, changing the callers to do so is trivial, and is really all this patch really does, because changing "interesting()" itself to say that nothing further is going to be interesting is definitely more complicated, considering that we may have arbitrary pathspecs. But in cleaning up the callers, this actually fixes a potential small performance issue in diff_tree(): if the second tree has a lot of uninterestign crud in it, we would keep on doing the "is it interesting?" check on the first tree for each uninteresting entry in the second one. The answer is obviously not going to change, so that was just not helping. The new code is clearer and simpler and avoids this issue entirely. I also renamed "interesting()" to "tree_entry_interesting()", because I got frustrated by the fact that - we actually had *another* function called "interesting()" in another file, and I couldn't tell from the profiles which one was the one that mattered more. - when rewriting it to return a ternary value, you can't just do if (interesting(...)) ... any more, but want to assign the return value to a local variable. The name of choice for that variable would normally be "interesting", so I just wanted to make the function name be more specific, and avoid that whole issue (even though I then didn't choose that name for either of the users, just to avoid confusion in the patch itself ;) In other words, this doesn't really change anything, but I think it's a good thing to do, and if somebody comes along and writes the logic for "yeah, none of the pathspecs you have are interesting", we now support that trivially. It could easily be a meaningful optimization for things like "blame", where there's just one pathspec, and stopping when you've seen it would allow you to avoid about 50% of the tree traversals on average. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-18 23:18:30 +01:00
while (t->size) {
int show;
if (all_interesting)
show = 1;
else {
show = tree_entry_interesting(t, base, baselen, opt);
if (show == 2)
all_interesting = 1;
}
Set up for better tree diff optimizations This is mainly just a cleanup patch, and sets up for later changes where the tree-diff.c "interesting()" function can return more than just a yes/no value. In particular, it should be quite possible to say "no subsequent entries in this tree can possibly be interesting any more", and thus allow the callers to short-circuit the tree entirely. In fact, changing the callers to do so is trivial, and is really all this patch really does, because changing "interesting()" itself to say that nothing further is going to be interesting is definitely more complicated, considering that we may have arbitrary pathspecs. But in cleaning up the callers, this actually fixes a potential small performance issue in diff_tree(): if the second tree has a lot of uninterestign crud in it, we would keep on doing the "is it interesting?" check on the first tree for each uninteresting entry in the second one. The answer is obviously not going to change, so that was just not helping. The new code is clearer and simpler and avoids this issue entirely. I also renamed "interesting()" to "tree_entry_interesting()", because I got frustrated by the fact that - we actually had *another* function called "interesting()" in another file, and I couldn't tell from the profiles which one was the one that mattered more. - when rewriting it to return a ternary value, you can't just do if (interesting(...)) ... any more, but want to assign the return value to a local variable. The name of choice for that variable would normally be "interesting", so I just wanted to make the function name be more specific, and avoid that whole issue (even though I then didn't choose that name for either of the users, just to avoid confusion in the patch itself ;) In other words, this doesn't really change anything, but I think it's a good thing to do, and if somebody comes along and writes the logic for "yeah, none of the pathspecs you have are interesting", we now support that trivially. It could easily be a meaningful optimization for things like "blame", where there's just one pathspec, and stopping when you've seen it would allow you to avoid about 50% of the tree traversals on average. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-18 23:18:30 +01:00
if (!show) {
update_tree_entry(t);
continue;
}
/* Skip it all? */
if (show < 0)
t->size = 0;
return;
}
}
int diff_tree(struct tree_desc *t1, struct tree_desc *t2, const char *base, struct diff_options *opt)
{
int baselen = strlen(base);
Set up for better tree diff optimizations This is mainly just a cleanup patch, and sets up for later changes where the tree-diff.c "interesting()" function can return more than just a yes/no value. In particular, it should be quite possible to say "no subsequent entries in this tree can possibly be interesting any more", and thus allow the callers to short-circuit the tree entirely. In fact, changing the callers to do so is trivial, and is really all this patch really does, because changing "interesting()" itself to say that nothing further is going to be interesting is definitely more complicated, considering that we may have arbitrary pathspecs. But in cleaning up the callers, this actually fixes a potential small performance issue in diff_tree(): if the second tree has a lot of uninterestign crud in it, we would keep on doing the "is it interesting?" check on the first tree for each uninteresting entry in the second one. The answer is obviously not going to change, so that was just not helping. The new code is clearer and simpler and avoids this issue entirely. I also renamed "interesting()" to "tree_entry_interesting()", because I got frustrated by the fact that - we actually had *another* function called "interesting()" in another file, and I couldn't tell from the profiles which one was the one that mattered more. - when rewriting it to return a ternary value, you can't just do if (interesting(...)) ... any more, but want to assign the return value to a local variable. The name of choice for that variable would normally be "interesting", so I just wanted to make the function name be more specific, and avoid that whole issue (even though I then didn't choose that name for either of the users, just to avoid confusion in the patch itself ;) In other words, this doesn't really change anything, but I think it's a good thing to do, and if somebody comes along and writes the logic for "yeah, none of the pathspecs you have are interesting", we now support that trivially. It could easily be a meaningful optimization for things like "blame", where there's just one pathspec, and stopping when you've seen it would allow you to avoid about 50% of the tree traversals on average. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-18 23:18:30 +01:00
for (;;) {
if (opt->quiet && opt->has_changes)
break;
Set up for better tree diff optimizations This is mainly just a cleanup patch, and sets up for later changes where the tree-diff.c "interesting()" function can return more than just a yes/no value. In particular, it should be quite possible to say "no subsequent entries in this tree can possibly be interesting any more", and thus allow the callers to short-circuit the tree entirely. In fact, changing the callers to do so is trivial, and is really all this patch really does, because changing "interesting()" itself to say that nothing further is going to be interesting is definitely more complicated, considering that we may have arbitrary pathspecs. But in cleaning up the callers, this actually fixes a potential small performance issue in diff_tree(): if the second tree has a lot of uninterestign crud in it, we would keep on doing the "is it interesting?" check on the first tree for each uninteresting entry in the second one. The answer is obviously not going to change, so that was just not helping. The new code is clearer and simpler and avoids this issue entirely. I also renamed "interesting()" to "tree_entry_interesting()", because I got frustrated by the fact that - we actually had *another* function called "interesting()" in another file, and I couldn't tell from the profiles which one was the one that mattered more. - when rewriting it to return a ternary value, you can't just do if (interesting(...)) ... any more, but want to assign the return value to a local variable. The name of choice for that variable would normally be "interesting", so I just wanted to make the function name be more specific, and avoid that whole issue (even though I then didn't choose that name for either of the users, just to avoid confusion in the patch itself ;) In other words, this doesn't really change anything, but I think it's a good thing to do, and if somebody comes along and writes the logic for "yeah, none of the pathspecs you have are interesting", we now support that trivially. It could easily be a meaningful optimization for things like "blame", where there's just one pathspec, and stopping when you've seen it would allow you to avoid about 50% of the tree traversals on average. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-18 23:18:30 +01:00
if (opt->nr_paths) {
skip_uninteresting(t1, base, baselen, opt);
skip_uninteresting(t2, base, baselen, opt);
}
if (!t1->size) {
Set up for better tree diff optimizations This is mainly just a cleanup patch, and sets up for later changes where the tree-diff.c "interesting()" function can return more than just a yes/no value. In particular, it should be quite possible to say "no subsequent entries in this tree can possibly be interesting any more", and thus allow the callers to short-circuit the tree entirely. In fact, changing the callers to do so is trivial, and is really all this patch really does, because changing "interesting()" itself to say that nothing further is going to be interesting is definitely more complicated, considering that we may have arbitrary pathspecs. But in cleaning up the callers, this actually fixes a potential small performance issue in diff_tree(): if the second tree has a lot of uninterestign crud in it, we would keep on doing the "is it interesting?" check on the first tree for each uninteresting entry in the second one. The answer is obviously not going to change, so that was just not helping. The new code is clearer and simpler and avoids this issue entirely. I also renamed "interesting()" to "tree_entry_interesting()", because I got frustrated by the fact that - we actually had *another* function called "interesting()" in another file, and I couldn't tell from the profiles which one was the one that mattered more. - when rewriting it to return a ternary value, you can't just do if (interesting(...)) ... any more, but want to assign the return value to a local variable. The name of choice for that variable would normally be "interesting", so I just wanted to make the function name be more specific, and avoid that whole issue (even though I then didn't choose that name for either of the users, just to avoid confusion in the patch itself ;) In other words, this doesn't really change anything, but I think it's a good thing to do, and if somebody comes along and writes the logic for "yeah, none of the pathspecs you have are interesting", we now support that trivially. It could easily be a meaningful optimization for things like "blame", where there's just one pathspec, and stopping when you've seen it would allow you to avoid about 50% of the tree traversals on average. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-18 23:18:30 +01:00
if (!t2->size)
break;
show_entry(opt, "+", t2, base, baselen);
update_tree_entry(t2);
continue;
}
if (!t2->size) {
show_entry(opt, "-", t1, base, baselen);
update_tree_entry(t1);
continue;
}
switch (compare_tree_entry(t1, t2, base, baselen, opt)) {
case -1:
update_tree_entry(t1);
continue;
case 0:
update_tree_entry(t1);
/* Fallthrough */
case 1:
update_tree_entry(t2);
continue;
}
die("git-diff-tree: internal error");
}
return 0;
}
Finally implement "git log --follow" Ok, I've really held off doing this too damn long, because I'm lazy, and I was always hoping that somebody else would do it. But no, people keep asking for it, but nobody actually did anything, so I decided I might as well bite the bullet, and instead of telling people they could add a "--follow" flag to "git log" to do what they want to do, I decided that it looks like I just have to do it for them.. The code wasn't actually that complicated, in that the diffstat for this patch literally says "70 insertions(+), 1 deletions(-)", but I will have to admit that in order to get to this fairly simple patch, you did have to know and understand the internal git diff generation machinery pretty well, and had to really be able to follow how commit generation interacts with generating patches and generating the log. So I suspect that while I was right that it wasn't that hard, I might have been expecting too much of random people - this patch does seem to be firmly in the core "Linus or Junio" territory. To make a long story short: I'm sorry for it taking so long until I just did it. I'm not going to guarantee that this works for everybody, but you really can just look at the patch, and after the appropriate appreciative noises ("Ooh, aah") over how clever I am, you can then just notice that the code itself isn't really that complicated. All the real new code is in the new "try_to_follow_renames()" function. It really isn't rocket science: we notice that the pathname we were looking at went away, so we start a full tree diff and try to see if we can instead make that pathname be a rename or a copy from some other previous pathname. And if we can, we just continue, except we show *that* particular diff, and ever after we use the _previous_ pathname. One thing to look out for: the "rename detection" is considered to be a singular event in the _linear_ "git log" output! That's what people want to do, but I just wanted to point out that this patch is *not* carrying around a "commit,pathname" kind of pair and it's *not* going to be able to notice the file coming from multiple *different* files in earlier history. IOW, if you use "git log --follow", then you get the stupid CVS/SVN kind of "files have single identities" kind of semantics, and git log will just pick the identity based on the normal move/copy heuristics _as_if_ the history could be linearized. Put another way: I think the model is broken, but given the broken model, I think this patch does just about as well as you can do. If you have merges with the same "file" having different filenames over the two branches, git will just end up picking _one_ of the pathnames at the point where the newer one goes away. It never looks at multiple pathnames in parallel. And if you understood all that, you probably didn't need it explained, and if you didn't understand the above blathering, it doesn't really mtter to you. What matters to you is that you can now do git log -p --follow builtin-rev-list.c and it will find the point where the old "rev-list.c" got renamed to "builtin-rev-list.c" and show it as such. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-19 23:22:46 +02:00
/*
* Does it look like the resulting diff might be due to a rename?
* - single entry
* - not a valid previous file
*/
static inline int diff_might_be_rename(void)
{
return diff_queued_diff.nr == 1 &&
!DIFF_FILE_VALID(diff_queued_diff.queue[0]->one);
}
static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, const char *base, struct diff_options *opt)
{
struct diff_options diff_opts;
struct diff_queue_struct *q = &diff_queued_diff;
struct diff_filepair *choice;
const char *paths[1];
Finally implement "git log --follow" Ok, I've really held off doing this too damn long, because I'm lazy, and I was always hoping that somebody else would do it. But no, people keep asking for it, but nobody actually did anything, so I decided I might as well bite the bullet, and instead of telling people they could add a "--follow" flag to "git log" to do what they want to do, I decided that it looks like I just have to do it for them.. The code wasn't actually that complicated, in that the diffstat for this patch literally says "70 insertions(+), 1 deletions(-)", but I will have to admit that in order to get to this fairly simple patch, you did have to know and understand the internal git diff generation machinery pretty well, and had to really be able to follow how commit generation interacts with generating patches and generating the log. So I suspect that while I was right that it wasn't that hard, I might have been expecting too much of random people - this patch does seem to be firmly in the core "Linus or Junio" territory. To make a long story short: I'm sorry for it taking so long until I just did it. I'm not going to guarantee that this works for everybody, but you really can just look at the patch, and after the appropriate appreciative noises ("Ooh, aah") over how clever I am, you can then just notice that the code itself isn't really that complicated. All the real new code is in the new "try_to_follow_renames()" function. It really isn't rocket science: we notice that the pathname we were looking at went away, so we start a full tree diff and try to see if we can instead make that pathname be a rename or a copy from some other previous pathname. And if we can, we just continue, except we show *that* particular diff, and ever after we use the _previous_ pathname. One thing to look out for: the "rename detection" is considered to be a singular event in the _linear_ "git log" output! That's what people want to do, but I just wanted to point out that this patch is *not* carrying around a "commit,pathname" kind of pair and it's *not* going to be able to notice the file coming from multiple *different* files in earlier history. IOW, if you use "git log --follow", then you get the stupid CVS/SVN kind of "files have single identities" kind of semantics, and git log will just pick the identity based on the normal move/copy heuristics _as_if_ the history could be linearized. Put another way: I think the model is broken, but given the broken model, I think this patch does just about as well as you can do. If you have merges with the same "file" having different filenames over the two branches, git will just end up picking _one_ of the pathnames at the point where the newer one goes away. It never looks at multiple pathnames in parallel. And if you understood all that, you probably didn't need it explained, and if you didn't understand the above blathering, it doesn't really mtter to you. What matters to you is that you can now do git log -p --follow builtin-rev-list.c and it will find the point where the old "rev-list.c" got renamed to "builtin-rev-list.c" and show it as such. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-19 23:22:46 +02:00
int i;
/* Remove the file creation entry from the diff queue, and remember it */
choice = q->queue[0];
q->nr = 0;
Finally implement "git log --follow" Ok, I've really held off doing this too damn long, because I'm lazy, and I was always hoping that somebody else would do it. But no, people keep asking for it, but nobody actually did anything, so I decided I might as well bite the bullet, and instead of telling people they could add a "--follow" flag to "git log" to do what they want to do, I decided that it looks like I just have to do it for them.. The code wasn't actually that complicated, in that the diffstat for this patch literally says "70 insertions(+), 1 deletions(-)", but I will have to admit that in order to get to this fairly simple patch, you did have to know and understand the internal git diff generation machinery pretty well, and had to really be able to follow how commit generation interacts with generating patches and generating the log. So I suspect that while I was right that it wasn't that hard, I might have been expecting too much of random people - this patch does seem to be firmly in the core "Linus or Junio" territory. To make a long story short: I'm sorry for it taking so long until I just did it. I'm not going to guarantee that this works for everybody, but you really can just look at the patch, and after the appropriate appreciative noises ("Ooh, aah") over how clever I am, you can then just notice that the code itself isn't really that complicated. All the real new code is in the new "try_to_follow_renames()" function. It really isn't rocket science: we notice that the pathname we were looking at went away, so we start a full tree diff and try to see if we can instead make that pathname be a rename or a copy from some other previous pathname. And if we can, we just continue, except we show *that* particular diff, and ever after we use the _previous_ pathname. One thing to look out for: the "rename detection" is considered to be a singular event in the _linear_ "git log" output! That's what people want to do, but I just wanted to point out that this patch is *not* carrying around a "commit,pathname" kind of pair and it's *not* going to be able to notice the file coming from multiple *different* files in earlier history. IOW, if you use "git log --follow", then you get the stupid CVS/SVN kind of "files have single identities" kind of semantics, and git log will just pick the identity based on the normal move/copy heuristics _as_if_ the history could be linearized. Put another way: I think the model is broken, but given the broken model, I think this patch does just about as well as you can do. If you have merges with the same "file" having different filenames over the two branches, git will just end up picking _one_ of the pathnames at the point where the newer one goes away. It never looks at multiple pathnames in parallel. And if you understood all that, you probably didn't need it explained, and if you didn't understand the above blathering, it doesn't really mtter to you. What matters to you is that you can now do git log -p --follow builtin-rev-list.c and it will find the point where the old "rev-list.c" got renamed to "builtin-rev-list.c" and show it as such. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-19 23:22:46 +02:00
diff_setup(&diff_opts);
diff_opts.recursive = 1;
diff_opts.detect_rename = DIFF_DETECT_RENAME;
diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
diff_opts.single_follow = opt->paths[0];
paths[0] = NULL;
diff_tree_setup_paths(paths, &diff_opts);
if (diff_setup_done(&diff_opts) < 0)
die("unable to set up diff options to follow renames");
diff_tree(t1, t2, base, &diff_opts);
diffcore_std(&diff_opts);
/* Go through the new set of filepairing, and see if we find a more interesting one */
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
Finally implement "git log --follow" Ok, I've really held off doing this too damn long, because I'm lazy, and I was always hoping that somebody else would do it. But no, people keep asking for it, but nobody actually did anything, so I decided I might as well bite the bullet, and instead of telling people they could add a "--follow" flag to "git log" to do what they want to do, I decided that it looks like I just have to do it for them.. The code wasn't actually that complicated, in that the diffstat for this patch literally says "70 insertions(+), 1 deletions(-)", but I will have to admit that in order to get to this fairly simple patch, you did have to know and understand the internal git diff generation machinery pretty well, and had to really be able to follow how commit generation interacts with generating patches and generating the log. So I suspect that while I was right that it wasn't that hard, I might have been expecting too much of random people - this patch does seem to be firmly in the core "Linus or Junio" territory. To make a long story short: I'm sorry for it taking so long until I just did it. I'm not going to guarantee that this works for everybody, but you really can just look at the patch, and after the appropriate appreciative noises ("Ooh, aah") over how clever I am, you can then just notice that the code itself isn't really that complicated. All the real new code is in the new "try_to_follow_renames()" function. It really isn't rocket science: we notice that the pathname we were looking at went away, so we start a full tree diff and try to see if we can instead make that pathname be a rename or a copy from some other previous pathname. And if we can, we just continue, except we show *that* particular diff, and ever after we use the _previous_ pathname. One thing to look out for: the "rename detection" is considered to be a singular event in the _linear_ "git log" output! That's what people want to do, but I just wanted to point out that this patch is *not* carrying around a "commit,pathname" kind of pair and it's *not* going to be able to notice the file coming from multiple *different* files in earlier history. IOW, if you use "git log --follow", then you get the stupid CVS/SVN kind of "files have single identities" kind of semantics, and git log will just pick the identity based on the normal move/copy heuristics _as_if_ the history could be linearized. Put another way: I think the model is broken, but given the broken model, I think this patch does just about as well as you can do. If you have merges with the same "file" having different filenames over the two branches, git will just end up picking _one_ of the pathnames at the point where the newer one goes away. It never looks at multiple pathnames in parallel. And if you understood all that, you probably didn't need it explained, and if you didn't understand the above blathering, it doesn't really mtter to you. What matters to you is that you can now do git log -p --follow builtin-rev-list.c and it will find the point where the old "rev-list.c" got renamed to "builtin-rev-list.c" and show it as such. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-19 23:22:46 +02:00
/*
* Found a source? Not only do we use that for the new
* diff_queued_diff, we will also use that as the path in
Finally implement "git log --follow" Ok, I've really held off doing this too damn long, because I'm lazy, and I was always hoping that somebody else would do it. But no, people keep asking for it, but nobody actually did anything, so I decided I might as well bite the bullet, and instead of telling people they could add a "--follow" flag to "git log" to do what they want to do, I decided that it looks like I just have to do it for them.. The code wasn't actually that complicated, in that the diffstat for this patch literally says "70 insertions(+), 1 deletions(-)", but I will have to admit that in order to get to this fairly simple patch, you did have to know and understand the internal git diff generation machinery pretty well, and had to really be able to follow how commit generation interacts with generating patches and generating the log. So I suspect that while I was right that it wasn't that hard, I might have been expecting too much of random people - this patch does seem to be firmly in the core "Linus or Junio" territory. To make a long story short: I'm sorry for it taking so long until I just did it. I'm not going to guarantee that this works for everybody, but you really can just look at the patch, and after the appropriate appreciative noises ("Ooh, aah") over how clever I am, you can then just notice that the code itself isn't really that complicated. All the real new code is in the new "try_to_follow_renames()" function. It really isn't rocket science: we notice that the pathname we were looking at went away, so we start a full tree diff and try to see if we can instead make that pathname be a rename or a copy from some other previous pathname. And if we can, we just continue, except we show *that* particular diff, and ever after we use the _previous_ pathname. One thing to look out for: the "rename detection" is considered to be a singular event in the _linear_ "git log" output! That's what people want to do, but I just wanted to point out that this patch is *not* carrying around a "commit,pathname" kind of pair and it's *not* going to be able to notice the file coming from multiple *different* files in earlier history. IOW, if you use "git log --follow", then you get the stupid CVS/SVN kind of "files have single identities" kind of semantics, and git log will just pick the identity based on the normal move/copy heuristics _as_if_ the history could be linearized. Put another way: I think the model is broken, but given the broken model, I think this patch does just about as well as you can do. If you have merges with the same "file" having different filenames over the two branches, git will just end up picking _one_ of the pathnames at the point where the newer one goes away. It never looks at multiple pathnames in parallel. And if you understood all that, you probably didn't need it explained, and if you didn't understand the above blathering, it doesn't really mtter to you. What matters to you is that you can now do git log -p --follow builtin-rev-list.c and it will find the point where the old "rev-list.c" got renamed to "builtin-rev-list.c" and show it as such. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-19 23:22:46 +02:00
* the future!
*/
if ((p->status == 'R' || p->status == 'C') && !strcmp(p->two->path, opt->paths[0])) {
/* Switch the file-pairs around */
q->queue[i] = choice;
choice = p;
/* Update the path we use from now on.. */
Finally implement "git log --follow" Ok, I've really held off doing this too damn long, because I'm lazy, and I was always hoping that somebody else would do it. But no, people keep asking for it, but nobody actually did anything, so I decided I might as well bite the bullet, and instead of telling people they could add a "--follow" flag to "git log" to do what they want to do, I decided that it looks like I just have to do it for them.. The code wasn't actually that complicated, in that the diffstat for this patch literally says "70 insertions(+), 1 deletions(-)", but I will have to admit that in order to get to this fairly simple patch, you did have to know and understand the internal git diff generation machinery pretty well, and had to really be able to follow how commit generation interacts with generating patches and generating the log. So I suspect that while I was right that it wasn't that hard, I might have been expecting too much of random people - this patch does seem to be firmly in the core "Linus or Junio" territory. To make a long story short: I'm sorry for it taking so long until I just did it. I'm not going to guarantee that this works for everybody, but you really can just look at the patch, and after the appropriate appreciative noises ("Ooh, aah") over how clever I am, you can then just notice that the code itself isn't really that complicated. All the real new code is in the new "try_to_follow_renames()" function. It really isn't rocket science: we notice that the pathname we were looking at went away, so we start a full tree diff and try to see if we can instead make that pathname be a rename or a copy from some other previous pathname. And if we can, we just continue, except we show *that* particular diff, and ever after we use the _previous_ pathname. One thing to look out for: the "rename detection" is considered to be a singular event in the _linear_ "git log" output! That's what people want to do, but I just wanted to point out that this patch is *not* carrying around a "commit,pathname" kind of pair and it's *not* going to be able to notice the file coming from multiple *different* files in earlier history. IOW, if you use "git log --follow", then you get the stupid CVS/SVN kind of "files have single identities" kind of semantics, and git log will just pick the identity based on the normal move/copy heuristics _as_if_ the history could be linearized. Put another way: I think the model is broken, but given the broken model, I think this patch does just about as well as you can do. If you have merges with the same "file" having different filenames over the two branches, git will just end up picking _one_ of the pathnames at the point where the newer one goes away. It never looks at multiple pathnames in parallel. And if you understood all that, you probably didn't need it explained, and if you didn't understand the above blathering, it doesn't really mtter to you. What matters to you is that you can now do git log -p --follow builtin-rev-list.c and it will find the point where the old "rev-list.c" got renamed to "builtin-rev-list.c" and show it as such. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-19 23:22:46 +02:00
opt->paths[0] = xstrdup(p->one->path);
diff_tree_setup_paths(opt->paths, opt);
break;
}
}
/*
* Then, discard all the non-relevane file pairs...
*/
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
diff_free_filepair(p);
}
/*
* .. and re-instate the one we want (which might be either the
* original one, or the rename/copy we found)
Finally implement "git log --follow" Ok, I've really held off doing this too damn long, because I'm lazy, and I was always hoping that somebody else would do it. But no, people keep asking for it, but nobody actually did anything, so I decided I might as well bite the bullet, and instead of telling people they could add a "--follow" flag to "git log" to do what they want to do, I decided that it looks like I just have to do it for them.. The code wasn't actually that complicated, in that the diffstat for this patch literally says "70 insertions(+), 1 deletions(-)", but I will have to admit that in order to get to this fairly simple patch, you did have to know and understand the internal git diff generation machinery pretty well, and had to really be able to follow how commit generation interacts with generating patches and generating the log. So I suspect that while I was right that it wasn't that hard, I might have been expecting too much of random people - this patch does seem to be firmly in the core "Linus or Junio" territory. To make a long story short: I'm sorry for it taking so long until I just did it. I'm not going to guarantee that this works for everybody, but you really can just look at the patch, and after the appropriate appreciative noises ("Ooh, aah") over how clever I am, you can then just notice that the code itself isn't really that complicated. All the real new code is in the new "try_to_follow_renames()" function. It really isn't rocket science: we notice that the pathname we were looking at went away, so we start a full tree diff and try to see if we can instead make that pathname be a rename or a copy from some other previous pathname. And if we can, we just continue, except we show *that* particular diff, and ever after we use the _previous_ pathname. One thing to look out for: the "rename detection" is considered to be a singular event in the _linear_ "git log" output! That's what people want to do, but I just wanted to point out that this patch is *not* carrying around a "commit,pathname" kind of pair and it's *not* going to be able to notice the file coming from multiple *different* files in earlier history. IOW, if you use "git log --follow", then you get the stupid CVS/SVN kind of "files have single identities" kind of semantics, and git log will just pick the identity based on the normal move/copy heuristics _as_if_ the history could be linearized. Put another way: I think the model is broken, but given the broken model, I think this patch does just about as well as you can do. If you have merges with the same "file" having different filenames over the two branches, git will just end up picking _one_ of the pathnames at the point where the newer one goes away. It never looks at multiple pathnames in parallel. And if you understood all that, you probably didn't need it explained, and if you didn't understand the above blathering, it doesn't really mtter to you. What matters to you is that you can now do git log -p --follow builtin-rev-list.c and it will find the point where the old "rev-list.c" got renamed to "builtin-rev-list.c" and show it as such. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-19 23:22:46 +02:00
*/
q->queue[0] = choice;
q->nr = 1;
Finally implement "git log --follow" Ok, I've really held off doing this too damn long, because I'm lazy, and I was always hoping that somebody else would do it. But no, people keep asking for it, but nobody actually did anything, so I decided I might as well bite the bullet, and instead of telling people they could add a "--follow" flag to "git log" to do what they want to do, I decided that it looks like I just have to do it for them.. The code wasn't actually that complicated, in that the diffstat for this patch literally says "70 insertions(+), 1 deletions(-)", but I will have to admit that in order to get to this fairly simple patch, you did have to know and understand the internal git diff generation machinery pretty well, and had to really be able to follow how commit generation interacts with generating patches and generating the log. So I suspect that while I was right that it wasn't that hard, I might have been expecting too much of random people - this patch does seem to be firmly in the core "Linus or Junio" territory. To make a long story short: I'm sorry for it taking so long until I just did it. I'm not going to guarantee that this works for everybody, but you really can just look at the patch, and after the appropriate appreciative noises ("Ooh, aah") over how clever I am, you can then just notice that the code itself isn't really that complicated. All the real new code is in the new "try_to_follow_renames()" function. It really isn't rocket science: we notice that the pathname we were looking at went away, so we start a full tree diff and try to see if we can instead make that pathname be a rename or a copy from some other previous pathname. And if we can, we just continue, except we show *that* particular diff, and ever after we use the _previous_ pathname. One thing to look out for: the "rename detection" is considered to be a singular event in the _linear_ "git log" output! That's what people want to do, but I just wanted to point out that this patch is *not* carrying around a "commit,pathname" kind of pair and it's *not* going to be able to notice the file coming from multiple *different* files in earlier history. IOW, if you use "git log --follow", then you get the stupid CVS/SVN kind of "files have single identities" kind of semantics, and git log will just pick the identity based on the normal move/copy heuristics _as_if_ the history could be linearized. Put another way: I think the model is broken, but given the broken model, I think this patch does just about as well as you can do. If you have merges with the same "file" having different filenames over the two branches, git will just end up picking _one_ of the pathnames at the point where the newer one goes away. It never looks at multiple pathnames in parallel. And if you understood all that, you probably didn't need it explained, and if you didn't understand the above blathering, it doesn't really mtter to you. What matters to you is that you can now do git log -p --follow builtin-rev-list.c and it will find the point where the old "rev-list.c" got renamed to "builtin-rev-list.c" and show it as such. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-19 23:22:46 +02:00
}
int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base, struct diff_options *opt)
{
void *tree1, *tree2;
struct tree_desc t1, t2;
unsigned long size1, size2;
int retval;
tree1 = read_object_with_reference(old, tree_type, &size1, NULL);
if (!tree1)
die("unable to read source tree (%s)", sha1_to_hex(old));
tree2 = read_object_with_reference(new, tree_type, &size2, NULL);
if (!tree2)
die("unable to read destination tree (%s)", sha1_to_hex(new));
init_tree_desc(&t1, tree1, size1);
init_tree_desc(&t2, tree2, size2);
retval = diff_tree(&t1, &t2, base, opt);
Finally implement "git log --follow" Ok, I've really held off doing this too damn long, because I'm lazy, and I was always hoping that somebody else would do it. But no, people keep asking for it, but nobody actually did anything, so I decided I might as well bite the bullet, and instead of telling people they could add a "--follow" flag to "git log" to do what they want to do, I decided that it looks like I just have to do it for them.. The code wasn't actually that complicated, in that the diffstat for this patch literally says "70 insertions(+), 1 deletions(-)", but I will have to admit that in order to get to this fairly simple patch, you did have to know and understand the internal git diff generation machinery pretty well, and had to really be able to follow how commit generation interacts with generating patches and generating the log. So I suspect that while I was right that it wasn't that hard, I might have been expecting too much of random people - this patch does seem to be firmly in the core "Linus or Junio" territory. To make a long story short: I'm sorry for it taking so long until I just did it. I'm not going to guarantee that this works for everybody, but you really can just look at the patch, and after the appropriate appreciative noises ("Ooh, aah") over how clever I am, you can then just notice that the code itself isn't really that complicated. All the real new code is in the new "try_to_follow_renames()" function. It really isn't rocket science: we notice that the pathname we were looking at went away, so we start a full tree diff and try to see if we can instead make that pathname be a rename or a copy from some other previous pathname. And if we can, we just continue, except we show *that* particular diff, and ever after we use the _previous_ pathname. One thing to look out for: the "rename detection" is considered to be a singular event in the _linear_ "git log" output! That's what people want to do, but I just wanted to point out that this patch is *not* carrying around a "commit,pathname" kind of pair and it's *not* going to be able to notice the file coming from multiple *different* files in earlier history. IOW, if you use "git log --follow", then you get the stupid CVS/SVN kind of "files have single identities" kind of semantics, and git log will just pick the identity based on the normal move/copy heuristics _as_if_ the history could be linearized. Put another way: I think the model is broken, but given the broken model, I think this patch does just about as well as you can do. If you have merges with the same "file" having different filenames over the two branches, git will just end up picking _one_ of the pathnames at the point where the newer one goes away. It never looks at multiple pathnames in parallel. And if you understood all that, you probably didn't need it explained, and if you didn't understand the above blathering, it doesn't really mtter to you. What matters to you is that you can now do git log -p --follow builtin-rev-list.c and it will find the point where the old "rev-list.c" got renamed to "builtin-rev-list.c" and show it as such. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-19 23:22:46 +02:00
if (opt->follow_renames && diff_might_be_rename()) {
init_tree_desc(&t1, tree1, size1);
init_tree_desc(&t2, tree2, size2);
try_to_follow_renames(&t1, &t2, base, opt);
}
free(tree1);
free(tree2);
return retval;
}
int diff_root_tree_sha1(const unsigned char *new, const char *base, struct diff_options *opt)
{
int retval;
void *tree;
unsigned long size;
struct tree_desc empty, real;
tree = read_object_with_reference(new, tree_type, &size, NULL);
if (!tree)
die("unable to read root tree (%s)", sha1_to_hex(new));
init_tree_desc(&real, tree, size);
init_tree_desc(&empty, "", 0);
retval = diff_tree(&empty, &real, base, opt);
free(tree);
return retval;
}
static int count_paths(const char **paths)
{
int i = 0;
while (*paths++)
i++;
return i;
}
void diff_tree_release_paths(struct diff_options *opt)
{
free(opt->pathlens);
}
void diff_tree_setup_paths(const char **p, struct diff_options *opt)
{
opt->nr_paths = 0;
opt->pathlens = NULL;
opt->paths = NULL;
if (p) {
int i;
opt->paths = p;
opt->nr_paths = count_paths(p);
if (opt->nr_paths == 0) {
opt->pathlens = NULL;
return;
}
opt->pathlens = xmalloc(opt->nr_paths * sizeof(int));
for (i=0; i < opt->nr_paths; i++)
opt->pathlens[i] = strlen(p[i]);
}
}