2005-05-21 11:39:09 +02:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2005 Junio C Hamano
|
|
|
|
*/
|
|
|
|
#include "cache.h"
|
|
|
|
#include "diff.h"
|
|
|
|
#include "diffcore.h"
|
|
|
|
|
2005-05-24 10:10:48 +02:00
|
|
|
/* Table of rename/copy destinations */
|
|
|
|
|
|
|
|
static struct diff_rename_dst {
|
|
|
|
struct diff_filespec *two;
|
|
|
|
struct diff_filepair *pair;
|
|
|
|
} *rename_dst;
|
|
|
|
static int rename_dst_nr, rename_dst_alloc;
|
2005-05-21 11:39:09 +02:00
|
|
|
|
2005-05-24 10:10:48 +02:00
|
|
|
static struct diff_rename_dst *locate_rename_dst(struct diff_filespec *two,
|
|
|
|
int insert_ok)
|
2005-05-21 11:39:09 +02:00
|
|
|
{
|
2005-05-24 10:10:48 +02:00
|
|
|
int first, last;
|
|
|
|
|
|
|
|
first = 0;
|
|
|
|
last = rename_dst_nr;
|
|
|
|
while (last > first) {
|
|
|
|
int next = (last + first) >> 1;
|
|
|
|
struct diff_rename_dst *dst = &(rename_dst[next]);
|
|
|
|
int cmp = strcmp(two->path, dst->two->path);
|
|
|
|
if (!cmp)
|
|
|
|
return dst;
|
|
|
|
if (cmp < 0) {
|
|
|
|
last = next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
first = next+1;
|
|
|
|
}
|
|
|
|
/* not found */
|
|
|
|
if (!insert_ok)
|
|
|
|
return NULL;
|
|
|
|
/* insert to make it at "first" */
|
|
|
|
if (rename_dst_alloc <= rename_dst_nr) {
|
|
|
|
rename_dst_alloc = alloc_nr(rename_dst_alloc);
|
|
|
|
rename_dst = xrealloc(rename_dst,
|
|
|
|
rename_dst_alloc * sizeof(*rename_dst));
|
|
|
|
}
|
|
|
|
rename_dst_nr++;
|
|
|
|
if (first < rename_dst_nr)
|
|
|
|
memmove(rename_dst + first + 1, rename_dst + first,
|
|
|
|
(rename_dst_nr - first - 1) * sizeof(*rename_dst));
|
2005-09-16 01:13:43 +02:00
|
|
|
rename_dst[first].two = alloc_filespec(two->path);
|
|
|
|
fill_filespec(rename_dst[first].two, two->sha1, two->mode);
|
2005-05-24 10:10:48 +02:00
|
|
|
rename_dst[first].pair = NULL;
|
|
|
|
return &(rename_dst[first]);
|
2005-05-21 11:39:09 +02:00
|
|
|
}
|
|
|
|
|
2005-05-28 00:55:55 +02:00
|
|
|
/* Table of rename/copy src files */
|
2005-05-24 10:10:48 +02:00
|
|
|
static struct diff_rename_src {
|
|
|
|
struct diff_filespec *one;
|
2006-04-09 05:17:46 +02:00
|
|
|
unsigned short score; /* to remember the break score */
|
2005-09-10 21:42:32 +02:00
|
|
|
unsigned src_path_left : 1;
|
2005-05-24 10:10:48 +02:00
|
|
|
} *rename_src;
|
|
|
|
static int rename_src_nr, rename_src_alloc;
|
2005-05-21 11:39:09 +02:00
|
|
|
|
2005-05-28 00:55:55 +02:00
|
|
|
static struct diff_rename_src *register_rename_src(struct diff_filespec *one,
|
2006-04-09 05:17:46 +02:00
|
|
|
int src_path_left,
|
|
|
|
unsigned short score)
|
2005-05-24 10:10:48 +02:00
|
|
|
{
|
|
|
|
int first, last;
|
|
|
|
|
|
|
|
first = 0;
|
|
|
|
last = rename_src_nr;
|
|
|
|
while (last > first) {
|
|
|
|
int next = (last + first) >> 1;
|
|
|
|
struct diff_rename_src *src = &(rename_src[next]);
|
|
|
|
int cmp = strcmp(one->path, src->one->path);
|
|
|
|
if (!cmp)
|
|
|
|
return src;
|
|
|
|
if (cmp < 0) {
|
|
|
|
last = next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
first = next+1;
|
|
|
|
}
|
2005-05-28 00:55:55 +02:00
|
|
|
|
2005-05-24 10:10:48 +02:00
|
|
|
/* insert to make it at "first" */
|
|
|
|
if (rename_src_alloc <= rename_src_nr) {
|
|
|
|
rename_src_alloc = alloc_nr(rename_src_alloc);
|
|
|
|
rename_src = xrealloc(rename_src,
|
|
|
|
rename_src_alloc * sizeof(*rename_src));
|
2005-05-21 11:39:09 +02:00
|
|
|
}
|
2005-05-24 10:10:48 +02:00
|
|
|
rename_src_nr++;
|
|
|
|
if (first < rename_src_nr)
|
|
|
|
memmove(rename_src + first + 1, rename_src + first,
|
|
|
|
(rename_src_nr - first - 1) * sizeof(*rename_src));
|
|
|
|
rename_src[first].one = one;
|
2006-04-09 05:17:46 +02:00
|
|
|
rename_src[first].score = score;
|
2005-09-10 21:42:32 +02:00
|
|
|
rename_src[first].src_path_left = src_path_left;
|
2005-05-24 10:10:48 +02:00
|
|
|
return &(rename_src[first]);
|
2005-05-21 11:39:09 +02:00
|
|
|
}
|
|
|
|
|
2006-07-06 09:35:47 +02:00
|
|
|
static int is_exact_match(struct diff_filespec *src,
|
|
|
|
struct diff_filespec *dst,
|
|
|
|
int contents_too)
|
2005-05-21 11:39:09 +02:00
|
|
|
{
|
|
|
|
if (src->sha1_valid && dst->sha1_valid &&
|
2006-08-17 20:54:57 +02:00
|
|
|
!hashcmp(src->sha1, dst->sha1))
|
2005-05-21 11:39:09 +02:00
|
|
|
return 1;
|
2006-07-06 09:35:47 +02:00
|
|
|
if (!contents_too)
|
|
|
|
return 0;
|
2005-05-28 00:56:38 +02:00
|
|
|
if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1))
|
|
|
|
return 0;
|
|
|
|
if (src->size != dst->size)
|
|
|
|
return 0;
|
2006-12-14 11:07:46 +01:00
|
|
|
if (src->sha1_valid && dst->sha1_valid)
|
|
|
|
return !hashcmp(src->sha1, dst->sha1);
|
2005-05-28 00:56:38 +02:00
|
|
|
if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
|
2005-05-21 11:39:09 +02:00
|
|
|
return 0;
|
|
|
|
if (src->size == dst->size &&
|
|
|
|
!memcmp(src->data, dst->data, src->size))
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-06-21 13:52:11 +02:00
|
|
|
static int basename_same(struct diff_filespec *src, struct diff_filespec *dst)
|
|
|
|
{
|
|
|
|
int src_len = strlen(src->path), dst_len = strlen(dst->path);
|
|
|
|
while (src_len && dst_len) {
|
|
|
|
char c1 = src->path[--src_len];
|
|
|
|
char c2 = dst->path[--dst_len];
|
|
|
|
if (c1 != c2)
|
|
|
|
return 0;
|
|
|
|
if (c1 == '/')
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return (!src_len || src->path[src_len - 1] == '/') &&
|
|
|
|
(!dst_len || dst->path[dst_len - 1] == '/');
|
|
|
|
}
|
|
|
|
|
2005-05-21 11:39:09 +02:00
|
|
|
struct diff_score {
|
2005-05-24 10:10:48 +02:00
|
|
|
int src; /* index in rename_src */
|
|
|
|
int dst; /* index in rename_dst */
|
2005-05-21 11:39:09 +02:00
|
|
|
int score;
|
2007-06-25 00:23:28 +02:00
|
|
|
int name_score;
|
2005-05-21 11:39:09 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static int estimate_similarity(struct diff_filespec *src,
|
|
|
|
struct diff_filespec *dst,
|
|
|
|
int minimum_score)
|
|
|
|
{
|
|
|
|
/* src points at a file that existed in the original tree (or
|
|
|
|
* optionally a file in the destination tree) and dst points
|
|
|
|
* at a newly created file. They may be quite similar, in which
|
|
|
|
* case we want to say src is renamed to dst or src is copied into
|
|
|
|
* dst, and then some edit has been applied to dst.
|
|
|
|
*
|
|
|
|
* Compare them and return how similar they are, representing
|
2005-05-28 00:56:38 +02:00
|
|
|
* the score as an integer between 0 and MAX_SCORE.
|
|
|
|
*
|
|
|
|
* When there is an exact match, it is considered a better
|
|
|
|
* match than anything else; the destination does not even
|
|
|
|
* call into this function in that case.
|
2005-05-21 11:39:09 +02:00
|
|
|
*/
|
2006-03-13 07:26:34 +01:00
|
|
|
unsigned long max_size, delta_size, base_size, src_copied, literal_added;
|
2005-06-29 01:58:27 +02:00
|
|
|
unsigned long delta_limit;
|
2005-05-21 11:39:09 +02:00
|
|
|
int score;
|
|
|
|
|
2005-05-23 06:24:49 +02:00
|
|
|
/* We deal only with regular files. Symlink renames are handled
|
|
|
|
* only when they are exact matches --- in other words, no edits
|
|
|
|
* after renaming.
|
|
|
|
*/
|
|
|
|
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
|
|
|
|
return 0;
|
|
|
|
|
2006-03-13 07:26:34 +01:00
|
|
|
max_size = ((src->size > dst->size) ? src->size : dst->size);
|
2005-05-22 00:55:18 +02:00
|
|
|
base_size = ((src->size < dst->size) ? src->size : dst->size);
|
2006-03-13 07:26:34 +01:00
|
|
|
delta_size = max_size - base_size;
|
2005-05-21 11:39:09 +02:00
|
|
|
|
2005-05-22 00:55:18 +02:00
|
|
|
/* We would not consider edits that change the file size so
|
|
|
|
* drastically. delta_size must be smaller than
|
2005-05-22 10:31:28 +02:00
|
|
|
* (MAX_SCORE-minimum_score)/MAX_SCORE * min(src->size, dst->size).
|
2005-05-28 00:56:38 +02:00
|
|
|
*
|
2005-05-22 00:55:18 +02:00
|
|
|
* Note that base_size == 0 case is handled here already
|
|
|
|
* and the final score computation below would not have a
|
|
|
|
* divide-by-zero issue.
|
2005-05-21 11:39:09 +02:00
|
|
|
*/
|
2005-05-22 10:31:28 +02:00
|
|
|
if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
|
2005-05-21 11:39:09 +02:00
|
|
|
return 0;
|
|
|
|
|
2007-09-25 21:29:42 +02:00
|
|
|
if ((!src->cnt_data && diff_populate_filespec(src, 0))
|
|
|
|
|| (!dst->cnt_data && diff_populate_filespec(dst, 0)))
|
2005-05-28 00:56:38 +02:00
|
|
|
return 0; /* error but caught downstream */
|
|
|
|
|
2005-05-24 21:09:32 +02:00
|
|
|
|
2007-03-07 02:44:37 +01:00
|
|
|
delta_limit = (unsigned long)
|
|
|
|
(base_size * (MAX_SCORE-minimum_score) / MAX_SCORE);
|
2007-06-29 07:54:37 +02:00
|
|
|
if (diffcore_count_changes(src, dst,
|
2006-03-12 12:22:10 +01:00
|
|
|
&src->cnt_data, &dst->cnt_data,
|
2006-03-01 01:01:36 +01:00
|
|
|
delta_limit,
|
|
|
|
&src_copied, &literal_added))
|
2005-05-24 21:09:32 +02:00
|
|
|
return 0;
|
2005-06-03 10:36:03 +02:00
|
|
|
|
2006-03-03 07:11:25 +01:00
|
|
|
/* How similar are they?
|
|
|
|
* what percentage of material in dst are from source?
|
2005-05-21 11:39:09 +02:00
|
|
|
*/
|
2006-03-13 07:26:34 +01:00
|
|
|
if (!dst->size)
|
2006-03-03 07:11:25 +01:00
|
|
|
score = 0; /* should not happen */
|
2007-06-25 00:23:28 +02:00
|
|
|
else
|
2007-03-07 02:44:37 +01:00
|
|
|
score = (int)(src_copied * MAX_SCORE / max_size);
|
2005-05-21 11:39:09 +02:00
|
|
|
return score;
|
|
|
|
}
|
|
|
|
|
2005-09-16 01:13:43 +02:00
|
|
|
static void record_rename_pair(int dst_index, int src_index, int score)
|
2005-05-21 11:39:09 +02:00
|
|
|
{
|
2005-05-24 10:10:48 +02:00
|
|
|
struct diff_filespec *one, *two, *src, *dst;
|
|
|
|
struct diff_filepair *dp;
|
[PATCH] Rename/copy detection fix.
The rename/copy detection logic in earlier round was only good
enough to show patch output and discussion on the mailing list
about the diff-raw format updates revealed many problems with
it. This patch fixes all the ones known to me, without making
things I want to do later impossible, mostly related to patch
reordering.
(1) Earlier rename/copy detector determined which one is rename
and which one is copy too early, which made it impossible
to later introduce diffcore transformers to reorder
patches. This patch fixes it by moving that logic to the
very end of the processing.
(2) Earlier output routine diff_flush() was pruning all the
"no-change" entries indiscriminatingly. This was done due
to my false assumption that one of the requirements in the
diff-raw output was not to show such an entry (which
resulted in my incorrect comment about "diff-helper never
being able to be equivalent to built-in diff driver"). My
special thanks go to Linus for correcting me about this.
When we produce diff-raw output, for the downstream to be
able to tell renames from copies, sometimes it _is_
necessary to output "no-change" entries, and this patch
adds diffcore_prune() function for doing it.
(3) Earlier diff_filepair structure was trying to be not too
specific about rename/copy operations, but the purpose of
the structure was to record one or two paths, which _was_
indeed about rename/copy. This patch discards xfrm_msg
field which was trying to be generic for this wrong reason,
and introduces a couple of fields (rename_score and
rename_rank) that are explicitly specific to rename/copy
logic. One thing to note is that the information in a
single diff_filepair structure _still_ does not distinguish
renames from copies, and it is deliberately so. This is to
allow patches to be reordered in later stages.
(4) This patch also adds some tests about diff-raw format
output and makes sure that necessary "no-change" entries
appear on the output.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-05-23 06:26:09 +02:00
|
|
|
|
2005-05-24 10:10:48 +02:00
|
|
|
if (rename_dst[dst_index].pair)
|
|
|
|
die("internal error: dst already matched.");
|
2005-05-21 11:39:09 +02:00
|
|
|
|
2005-05-24 10:10:48 +02:00
|
|
|
src = rename_src[src_index].one;
|
|
|
|
one = alloc_filespec(src->path);
|
|
|
|
fill_filespec(one, src->sha1, src->mode);
|
2005-05-21 11:39:09 +02:00
|
|
|
|
2005-05-24 10:10:48 +02:00
|
|
|
dst = rename_dst[dst_index].two;
|
|
|
|
two = alloc_filespec(dst->path);
|
|
|
|
fill_filespec(two, dst->sha1, dst->mode);
|
2005-05-21 11:39:09 +02:00
|
|
|
|
2005-09-16 01:13:43 +02:00
|
|
|
dp = diff_queue(NULL, one, two);
|
2006-08-03 21:01:01 +02:00
|
|
|
dp->renamed_pair = 1;
|
2006-04-09 05:17:46 +02:00
|
|
|
if (!strcmp(src->path, dst->path))
|
|
|
|
dp->score = rename_src[src_index].score;
|
|
|
|
else
|
|
|
|
dp->score = score;
|
2005-09-10 21:42:32 +02:00
|
|
|
dp->source_stays = rename_src[src_index].src_path_left;
|
2005-05-24 10:10:48 +02:00
|
|
|
rename_dst[dst_index].pair = dp;
|
2005-05-21 11:39:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We sort the rename similarity matrix with the score, in descending
|
2005-05-28 00:55:55 +02:00
|
|
|
* order (the most similar first).
|
2005-05-21 11:39:09 +02:00
|
|
|
*/
|
|
|
|
static int score_compare(const void *a_, const void *b_)
|
|
|
|
{
|
|
|
|
const struct diff_score *a = a_, *b = b_;
|
2007-06-25 00:23:28 +02:00
|
|
|
|
|
|
|
if (a->score == b->score)
|
|
|
|
return b->name_score - a->name_score;
|
|
|
|
|
2005-05-21 11:39:09 +02:00
|
|
|
return b->score - a->score;
|
|
|
|
}
|
|
|
|
|
2005-09-10 21:42:32 +02:00
|
|
|
static int compute_stays(struct diff_queue_struct *q,
|
|
|
|
struct diff_filespec *one)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < q->nr; i++) {
|
|
|
|
struct diff_filepair *p = q->queue[i];
|
|
|
|
if (strcmp(one->path, p->two->path))
|
|
|
|
continue;
|
|
|
|
if (DIFF_PAIR_RENAME(p)) {
|
|
|
|
return 0; /* something else is renamed into this */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2005-09-21 09:18:27 +02:00
|
|
|
void diffcore_rename(struct diff_options *options)
|
2005-05-21 11:39:09 +02:00
|
|
|
{
|
2005-09-21 09:18:27 +02:00
|
|
|
int detect_rename = options->detect_rename;
|
|
|
|
int minimum_score = options->rename_score;
|
|
|
|
int rename_limit = options->rename_limit;
|
2005-05-22 04:40:36 +02:00
|
|
|
struct diff_queue_struct *q = &diff_queued_diff;
|
2005-09-16 01:13:43 +02:00
|
|
|
struct diff_queue_struct outq;
|
2005-05-21 11:39:09 +02:00
|
|
|
struct diff_score *mx;
|
2006-07-06 09:35:47 +02:00
|
|
|
int i, j, rename_count, contents_too;
|
2005-05-24 10:10:48 +02:00
|
|
|
int num_create, num_src, dst_cnt;
|
2005-05-21 11:39:09 +02:00
|
|
|
|
2005-05-22 08:33:32 +02:00
|
|
|
if (!minimum_score)
|
[PATCH] Add -B flag to diff-* brothers.
A new diffcore transformation, diffcore-break.c, is introduced.
When the -B flag is given, a patch that represents a complete
rewrite is broken into a deletion followed by a creation. This
makes it easier to review such a complete rewrite patch.
The -B flag takes the same syntax as the -M and -C flags to
specify the minimum amount of non-source material the resulting
file needs to have to be considered a complete rewrite, and
defaults to 99% if not specified.
As the new test t4008-diff-break-rewrite.sh demonstrates, if a
file is a complete rewrite, it is broken into a delete/create
pair, which can further be subjected to the usual rename
detection if -M or -C is used. For example, if file0 gets
completely rewritten to make it as if it were rather based on
file1 which itself disappeared, the following happens:
The original change looks like this:
file0 --> file0' (quite different from file0)
file1 --> /dev/null
After diffcore-break runs, it would become this:
file0 --> /dev/null
/dev/null --> file0'
file1 --> /dev/null
Then diffcore-rename matches them up:
file1 --> file0'
The internal score values are finer grained now. Earlier
maximum of 10000 has been raised to 60000; there is no user
visible changes but there is no reason to waste available bits.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-05-30 09:08:37 +02:00
|
|
|
minimum_score = DEFAULT_RENAME_SCORE;
|
2005-09-16 01:13:43 +02:00
|
|
|
rename_count = 0;
|
2005-05-21 11:39:09 +02:00
|
|
|
|
|
|
|
for (i = 0; i < q->nr; i++) {
|
2005-05-21 11:40:01 +02:00
|
|
|
struct diff_filepair *p = q->queue[i];
|
2006-11-02 09:02:11 +01:00
|
|
|
if (!DIFF_FILE_VALID(p->one)) {
|
2005-05-22 04:42:18 +02:00
|
|
|
if (!DIFF_FILE_VALID(p->two))
|
2005-05-23 06:24:49 +02:00
|
|
|
continue; /* unmerged */
|
2006-11-02 09:02:11 +01:00
|
|
|
else if (options->single_follow &&
|
|
|
|
strcmp(options->single_follow, p->two->path))
|
|
|
|
continue; /* not interested */
|
2005-05-21 11:39:09 +02:00
|
|
|
else
|
2005-05-24 10:10:48 +02:00
|
|
|
locate_rename_dst(p->two, 1);
|
2006-11-02 09:02:11 +01:00
|
|
|
}
|
2005-06-12 05:55:20 +02:00
|
|
|
else if (!DIFF_FILE_VALID(p->two)) {
|
|
|
|
/* If the source is a broken "delete", and
|
|
|
|
* they did not really want to get broken,
|
|
|
|
* that means the source actually stays.
|
|
|
|
*/
|
|
|
|
int stays = (p->broken_pair && !p->score);
|
2006-04-09 05:17:46 +02:00
|
|
|
register_rename_src(p->one, stays, p->score);
|
2005-06-12 05:55:20 +02:00
|
|
|
}
|
2005-05-28 00:55:55 +02:00
|
|
|
else if (detect_rename == DIFF_DETECT_COPY)
|
2006-04-09 05:17:46 +02:00
|
|
|
register_rename_src(p->one, 1, p->score);
|
2005-05-21 11:39:09 +02:00
|
|
|
}
|
Fix the rename detection limit checking
This adds more proper rename detection limits. Instead of just checking
the limit against the number of potential rename destinations, we verify
that the rename matrix (which is what really matters) doesn't grow
ridiculously large, and we also make sure that we don't overflow when
doing the matrix size calculation.
This also changes the default limits from unlimited, to a rename matrix
that is limited to 100 entries on a side. You can raise it with the config
entry, or by using the "-l<n>" command line flag, but at least the default
is now a sane number that avoids spending lots of time (and memory) in
situations that likely don't merit it.
The choice of default value is of course very debatable. Limiting the
rename matrix to a 100x100 size will mean that even if you have just one
obvious rename, but you also create (or delete) 10,000 files, the rename
matrix will be so big that we disable the heuristics. Sounds reasonable to
me, but let's see if people hit this (and, perhaps more importantly,
actually *care*) in real life.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-09-14 19:39:48 +02:00
|
|
|
if (rename_dst_nr == 0 || rename_src_nr == 0)
|
2005-05-21 11:39:09 +02:00
|
|
|
goto cleanup; /* nothing to do */
|
|
|
|
|
Fix the rename detection limit checking
This adds more proper rename detection limits. Instead of just checking
the limit against the number of potential rename destinations, we verify
that the rename matrix (which is what really matters) doesn't grow
ridiculously large, and we also make sure that we don't overflow when
doing the matrix size calculation.
This also changes the default limits from unlimited, to a rename matrix
that is limited to 100 entries on a side. You can raise it with the config
entry, or by using the "-l<n>" command line flag, but at least the default
is now a sane number that avoids spending lots of time (and memory) in
situations that likely don't merit it.
The choice of default value is of course very debatable. Limiting the
rename matrix to a 100x100 size will mean that even if you have just one
obvious rename, but you also create (or delete) 10,000 files, the rename
matrix will be so big that we disable the heuristics. Sounds reasonable to
me, but let's see if people hit this (and, perhaps more importantly,
actually *care*) in real life.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-09-14 19:39:48 +02:00
|
|
|
/*
|
|
|
|
* This basically does a test for the rename matrix not
|
|
|
|
* growing larger than a "rename_limit" square matrix, ie:
|
|
|
|
*
|
|
|
|
* rename_dst_nr * rename_src_nr > rename_limit * rename_limit
|
|
|
|
*
|
|
|
|
* but handles the potential overflow case specially (and we
|
|
|
|
* assume at least 32-bit integers)
|
|
|
|
*/
|
|
|
|
if (rename_limit <= 0 || rename_limit > 32767)
|
|
|
|
rename_limit = 32767;
|
|
|
|
if (rename_dst_nr > rename_limit && rename_src_nr > rename_limit)
|
|
|
|
goto cleanup;
|
|
|
|
if (rename_dst_nr * rename_src_nr > rename_limit * rename_limit)
|
|
|
|
goto cleanup;
|
|
|
|
|
2005-05-21 11:39:09 +02:00
|
|
|
/* We really want to cull the candidates list early
|
|
|
|
* with cheap tests in order to avoid doing deltas.
|
2006-07-06 09:35:47 +02:00
|
|
|
* The first round matches up the up-to-date entries,
|
|
|
|
* and then during the second round we try to match
|
|
|
|
* cache-dirty entries as well.
|
2005-05-21 11:39:09 +02:00
|
|
|
*/
|
2006-07-06 09:35:47 +02:00
|
|
|
for (contents_too = 0; contents_too < 2; contents_too++) {
|
|
|
|
for (i = 0; i < rename_dst_nr; i++) {
|
|
|
|
struct diff_filespec *two = rename_dst[i].two;
|
|
|
|
if (rename_dst[i].pair)
|
|
|
|
continue; /* dealt with an earlier round */
|
|
|
|
for (j = 0; j < rename_src_nr; j++) {
|
2007-06-21 13:52:11 +02:00
|
|
|
int k;
|
2006-07-06 09:35:47 +02:00
|
|
|
struct diff_filespec *one = rename_src[j].one;
|
|
|
|
if (!is_exact_match(one, two, contents_too))
|
|
|
|
continue;
|
2007-06-21 13:52:11 +02:00
|
|
|
|
|
|
|
/* see if there is a basename match, too */
|
|
|
|
for (k = j; k < rename_src_nr; k++) {
|
|
|
|
one = rename_src[k].one;
|
|
|
|
if (basename_same(one, two) &&
|
|
|
|
is_exact_match(one, two,
|
|
|
|
contents_too)) {
|
|
|
|
j = k;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-03-07 02:44:37 +01:00
|
|
|
record_rename_pair(i, j, (int)MAX_SCORE);
|
2006-07-06 09:35:47 +02:00
|
|
|
rename_count++;
|
|
|
|
break; /* we are done with this entry */
|
|
|
|
}
|
2005-05-21 11:39:09 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Have we run out the created file pool? If so we can avoid
|
|
|
|
* doing the delta matrix altogether.
|
|
|
|
*/
|
2005-09-16 01:13:43 +02:00
|
|
|
if (rename_count == rename_dst_nr)
|
2005-05-28 00:55:55 +02:00
|
|
|
goto cleanup;
|
2005-05-21 11:39:09 +02:00
|
|
|
|
2005-11-21 21:18:23 +01:00
|
|
|
if (minimum_score == MAX_SCORE)
|
|
|
|
goto cleanup;
|
|
|
|
|
2005-09-16 01:13:43 +02:00
|
|
|
num_create = (rename_dst_nr - rename_count);
|
2005-05-24 10:10:48 +02:00
|
|
|
num_src = rename_src_nr;
|
2005-05-21 11:39:09 +02:00
|
|
|
mx = xmalloc(sizeof(*mx) * num_create * num_src);
|
2005-05-24 10:10:48 +02:00
|
|
|
for (dst_cnt = i = 0; i < rename_dst_nr; i++) {
|
2005-05-21 11:39:09 +02:00
|
|
|
int base = dst_cnt * num_src;
|
2005-05-24 10:10:48 +02:00
|
|
|
struct diff_filespec *two = rename_dst[i].two;
|
|
|
|
if (rename_dst[i].pair)
|
2005-05-21 11:39:09 +02:00
|
|
|
continue; /* dealt with exact match already. */
|
2005-05-24 10:10:48 +02:00
|
|
|
for (j = 0; j < rename_src_nr; j++) {
|
|
|
|
struct diff_filespec *one = rename_src[j].one;
|
|
|
|
struct diff_score *m = &mx[base+j];
|
|
|
|
m->src = j;
|
|
|
|
m->dst = i;
|
|
|
|
m->score = estimate_similarity(one, two,
|
|
|
|
minimum_score);
|
2007-06-25 00:23:28 +02:00
|
|
|
m->name_score = basename_same(one, two);
|
2007-10-03 06:01:03 +02:00
|
|
|
diff_free_filespec_blob(one);
|
2005-05-21 11:39:09 +02:00
|
|
|
}
|
2006-03-13 01:39:51 +01:00
|
|
|
/* We do not need the text anymore */
|
2007-10-03 06:01:03 +02:00
|
|
|
diff_free_filespec_blob(two);
|
2005-05-21 11:39:09 +02:00
|
|
|
dst_cnt++;
|
|
|
|
}
|
|
|
|
/* cost matrix sorted by most to least similar pair */
|
|
|
|
qsort(mx, num_create * num_src, sizeof(*mx), score_compare);
|
|
|
|
for (i = 0; i < num_create * num_src; i++) {
|
2005-05-24 10:10:48 +02:00
|
|
|
struct diff_rename_dst *dst = &rename_dst[mx[i].dst];
|
|
|
|
if (dst->pair)
|
|
|
|
continue; /* already done, either exact or fuzzy. */
|
2005-05-21 11:39:09 +02:00
|
|
|
if (mx[i].score < minimum_score)
|
2005-05-28 00:55:55 +02:00
|
|
|
break; /* there is no more usable pair. */
|
2005-09-16 01:13:43 +02:00
|
|
|
record_rename_pair(mx[i].dst, mx[i].src, mx[i].score);
|
|
|
|
rename_count++;
|
2005-05-21 11:39:09 +02:00
|
|
|
}
|
|
|
|
free(mx);
|
|
|
|
|
2005-05-28 00:55:55 +02:00
|
|
|
cleanup:
|
2005-05-21 11:39:09 +02:00
|
|
|
/* At this point, we have found some renames and copies and they
|
2005-09-16 01:13:43 +02:00
|
|
|
* are recorded in rename_dst. The original list is still in *q.
|
2005-05-21 11:39:09 +02:00
|
|
|
*/
|
2005-05-24 10:10:48 +02:00
|
|
|
outq.queue = NULL;
|
|
|
|
outq.nr = outq.alloc = 0;
|
2005-05-21 11:39:09 +02:00
|
|
|
for (i = 0; i < q->nr; i++) {
|
2005-05-24 10:10:48 +02:00
|
|
|
struct diff_filepair *p = q->queue[i];
|
|
|
|
struct diff_filepair *pair_to_free = NULL;
|
|
|
|
|
2005-05-30 09:08:07 +02:00
|
|
|
if (!DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) {
|
|
|
|
/*
|
|
|
|
* Creation
|
|
|
|
*
|
|
|
|
* We would output this create record if it has
|
|
|
|
* not been turned into a rename/copy already.
|
|
|
|
*/
|
|
|
|
struct diff_rename_dst *dst =
|
|
|
|
locate_rename_dst(p->two, 0);
|
|
|
|
if (dst && dst->pair) {
|
2005-05-24 10:10:48 +02:00
|
|
|
diff_q(&outq, dst->pair);
|
|
|
|
pair_to_free = p;
|
|
|
|
}
|
|
|
|
else
|
2005-05-30 09:08:07 +02:00
|
|
|
/* no matching rename/copy source, so
|
|
|
|
* record this as a creation.
|
2005-05-24 10:10:48 +02:00
|
|
|
*/
|
|
|
|
diff_q(&outq, p);
|
2005-05-21 11:39:09 +02:00
|
|
|
}
|
2005-05-30 09:08:07 +02:00
|
|
|
else if (DIFF_FILE_VALID(p->one) && !DIFF_FILE_VALID(p->two)) {
|
|
|
|
/*
|
|
|
|
* Deletion
|
|
|
|
*
|
[PATCH] Add -B flag to diff-* brothers.
A new diffcore transformation, diffcore-break.c, is introduced.
When the -B flag is given, a patch that represents a complete
rewrite is broken into a deletion followed by a creation. This
makes it easier to review such a complete rewrite patch.
The -B flag takes the same syntax as the -M and -C flags to
specify the minimum amount of non-source material the resulting
file needs to have to be considered a complete rewrite, and
defaults to 99% if not specified.
As the new test t4008-diff-break-rewrite.sh demonstrates, if a
file is a complete rewrite, it is broken into a delete/create
pair, which can further be subjected to the usual rename
detection if -M or -C is used. For example, if file0 gets
completely rewritten to make it as if it were rather based on
file1 which itself disappeared, the following happens:
The original change looks like this:
file0 --> file0' (quite different from file0)
file1 --> /dev/null
After diffcore-break runs, it would become this:
file0 --> /dev/null
/dev/null --> file0'
file1 --> /dev/null
Then diffcore-rename matches them up:
file1 --> file0'
The internal score values are finer grained now. Earlier
maximum of 10000 has been raised to 60000; there is no user
visible changes but there is no reason to waste available bits.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-05-30 09:08:37 +02:00
|
|
|
* We would output this delete record if:
|
|
|
|
*
|
|
|
|
* (1) this is a broken delete and the counterpart
|
|
|
|
* broken create remains in the output; or
|
2005-09-16 01:13:43 +02:00
|
|
|
* (2) this is not a broken delete, and rename_dst
|
|
|
|
* does not have a rename/copy to move p->one->path
|
|
|
|
* out of existence.
|
[PATCH] Add -B flag to diff-* brothers.
A new diffcore transformation, diffcore-break.c, is introduced.
When the -B flag is given, a patch that represents a complete
rewrite is broken into a deletion followed by a creation. This
makes it easier to review such a complete rewrite patch.
The -B flag takes the same syntax as the -M and -C flags to
specify the minimum amount of non-source material the resulting
file needs to have to be considered a complete rewrite, and
defaults to 99% if not specified.
As the new test t4008-diff-break-rewrite.sh demonstrates, if a
file is a complete rewrite, it is broken into a delete/create
pair, which can further be subjected to the usual rename
detection if -M or -C is used. For example, if file0 gets
completely rewritten to make it as if it were rather based on
file1 which itself disappeared, the following happens:
The original change looks like this:
file0 --> file0' (quite different from file0)
file1 --> /dev/null
After diffcore-break runs, it would become this:
file0 --> /dev/null
/dev/null --> file0'
file1 --> /dev/null
Then diffcore-rename matches them up:
file1 --> file0'
The internal score values are finer grained now. Earlier
maximum of 10000 has been raised to 60000; there is no user
visible changes but there is no reason to waste available bits.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-05-30 09:08:37 +02:00
|
|
|
*
|
|
|
|
* Otherwise, the counterpart broken create
|
|
|
|
* has been turned into a rename-edit; or
|
|
|
|
* delete did not have a matching create to
|
|
|
|
* begin with.
|
2005-05-30 09:08:07 +02:00
|
|
|
*/
|
[PATCH] Add -B flag to diff-* brothers.
A new diffcore transformation, diffcore-break.c, is introduced.
When the -B flag is given, a patch that represents a complete
rewrite is broken into a deletion followed by a creation. This
makes it easier to review such a complete rewrite patch.
The -B flag takes the same syntax as the -M and -C flags to
specify the minimum amount of non-source material the resulting
file needs to have to be considered a complete rewrite, and
defaults to 99% if not specified.
As the new test t4008-diff-break-rewrite.sh demonstrates, if a
file is a complete rewrite, it is broken into a delete/create
pair, which can further be subjected to the usual rename
detection if -M or -C is used. For example, if file0 gets
completely rewritten to make it as if it were rather based on
file1 which itself disappeared, the following happens:
The original change looks like this:
file0 --> file0' (quite different from file0)
file1 --> /dev/null
After diffcore-break runs, it would become this:
file0 --> /dev/null
/dev/null --> file0'
file1 --> /dev/null
Then diffcore-rename matches them up:
file1 --> file0'
The internal score values are finer grained now. Earlier
maximum of 10000 has been raised to 60000; there is no user
visible changes but there is no reason to waste available bits.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-05-30 09:08:37 +02:00
|
|
|
if (DIFF_PAIR_BROKEN(p)) {
|
|
|
|
/* broken delete */
|
|
|
|
struct diff_rename_dst *dst =
|
|
|
|
locate_rename_dst(p->one, 0);
|
|
|
|
if (dst && dst->pair)
|
|
|
|
/* counterpart is now rename/copy */
|
|
|
|
pair_to_free = p;
|
|
|
|
}
|
|
|
|
else {
|
2005-09-16 01:13:43 +02:00
|
|
|
for (j = 0; j < rename_dst_nr; j++) {
|
|
|
|
if (!rename_dst[j].pair)
|
|
|
|
continue;
|
|
|
|
if (strcmp(rename_dst[j].pair->
|
|
|
|
one->path,
|
|
|
|
p->one->path))
|
|
|
|
continue;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (j < rename_dst_nr)
|
[PATCH] Add -B flag to diff-* brothers.
A new diffcore transformation, diffcore-break.c, is introduced.
When the -B flag is given, a patch that represents a complete
rewrite is broken into a deletion followed by a creation. This
makes it easier to review such a complete rewrite patch.
The -B flag takes the same syntax as the -M and -C flags to
specify the minimum amount of non-source material the resulting
file needs to have to be considered a complete rewrite, and
defaults to 99% if not specified.
As the new test t4008-diff-break-rewrite.sh demonstrates, if a
file is a complete rewrite, it is broken into a delete/create
pair, which can further be subjected to the usual rename
detection if -M or -C is used. For example, if file0 gets
completely rewritten to make it as if it were rather based on
file1 which itself disappeared, the following happens:
The original change looks like this:
file0 --> file0' (quite different from file0)
file1 --> /dev/null
After diffcore-break runs, it would become this:
file0 --> /dev/null
/dev/null --> file0'
file1 --> /dev/null
Then diffcore-rename matches them up:
file1 --> file0'
The internal score values are finer grained now. Earlier
maximum of 10000 has been raised to 60000; there is no user
visible changes but there is no reason to waste available bits.
Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-05-30 09:08:37 +02:00
|
|
|
/* this path remains */
|
|
|
|
pair_to_free = p;
|
|
|
|
}
|
2005-05-30 09:08:07 +02:00
|
|
|
|
|
|
|
if (pair_to_free)
|
|
|
|
;
|
|
|
|
else
|
|
|
|
diff_q(&outq, p);
|
|
|
|
}
|
2005-05-24 10:10:48 +02:00
|
|
|
else if (!diff_unmodified_pair(p))
|
2005-05-28 00:55:55 +02:00
|
|
|
/* all the usual ones need to be kept */
|
2005-05-24 10:10:48 +02:00
|
|
|
diff_q(&outq, p);
|
2005-05-28 00:55:55 +02:00
|
|
|
else
|
|
|
|
/* no need to keep unmodified pairs */
|
|
|
|
pair_to_free = p;
|
|
|
|
|
2005-05-28 00:50:30 +02:00
|
|
|
if (pair_to_free)
|
|
|
|
diff_free_filepair(pair_to_free);
|
2005-05-21 11:39:09 +02:00
|
|
|
}
|
2005-05-24 10:10:48 +02:00
|
|
|
diff_debug_queue("done copying original", &outq);
|
2005-05-21 11:39:09 +02:00
|
|
|
|
2005-05-24 10:10:48 +02:00
|
|
|
free(q->queue);
|
|
|
|
*q = outq;
|
|
|
|
diff_debug_queue("done collapsing", q);
|
2005-05-21 11:39:09 +02:00
|
|
|
|
2005-09-10 21:42:32 +02:00
|
|
|
/* We need to see which rename source really stays here;
|
|
|
|
* earlier we only checked if the path is left in the result,
|
|
|
|
* but even if a path remains in the result, if that is coming
|
|
|
|
* from copying something else on top of it, then the original
|
|
|
|
* source is lost and does not stay.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < q->nr; i++) {
|
|
|
|
struct diff_filepair *p = q->queue[i];
|
|
|
|
if (DIFF_PAIR_RENAME(p) && p->source_stays) {
|
|
|
|
/* If one appears as the target of a rename-copy,
|
|
|
|
* then mark p->source_stays = 0; otherwise
|
|
|
|
* leave it as is.
|
|
|
|
*/
|
|
|
|
p->source_stays = compute_stays(q, p->one);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-09-16 01:13:43 +02:00
|
|
|
for (i = 0; i < rename_dst_nr; i++) {
|
|
|
|
diff_free_filespec_data(rename_dst[i].two);
|
|
|
|
free(rename_dst[i].two);
|
|
|
|
}
|
|
|
|
|
2005-05-24 10:10:48 +02:00
|
|
|
free(rename_dst);
|
|
|
|
rename_dst = NULL;
|
|
|
|
rename_dst_nr = rename_dst_alloc = 0;
|
|
|
|
free(rename_src);
|
|
|
|
rename_src = NULL;
|
|
|
|
rename_src_nr = rename_src_alloc = 0;
|
2005-05-21 11:39:09 +02:00
|
|
|
return;
|
|
|
|
}
|