mirror of
https://github.com/git/git.git
synced 2024-11-01 14:57:52 +01:00
64c4e8bccd
This fixes a memory issue when recursing a lot, which can be reproduced as seq 1 100000 >one seq 1 4 100000 >two git diff --no-index --histogram one two Before this patch, histogram_diff would call itself recursively before calling free_index, which would mean a lot of memory is allocated during the recursion and only freed afterwards. By moving the memory allocation (and its free call) into find_lcs, the memory is free'd before we recurse, such that memory is reused in the next step of the recursion instead of using new memory. This addresses only the memory pressure, not the run time complexity, that is also awful for the corner case outlined above. Helpful in understanding the code (in addition to the sparse history of this file), was https://stackoverflow.com/a/32367597 which reproduces most of the code comments of the JGit implementation. Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
378 lines
9.2 KiB
C
378 lines
9.2 KiB
C
/*
|
|
* Copyright (C) 2010, Google Inc.
|
|
* and other copyright owners as documented in JGit's IP log.
|
|
*
|
|
* This program and the accompanying materials are made available
|
|
* under the terms of the Eclipse Distribution License v1.0 which
|
|
* accompanies this distribution, is reproduced below, and is
|
|
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
|
*
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or
|
|
* without modification, are permitted provided that the following
|
|
* conditions are met:
|
|
*
|
|
* - Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* - Redistributions in binary form must reproduce the above
|
|
* copyright notice, this list of conditions and the following
|
|
* disclaimer in the documentation and/or other materials provided
|
|
* with the distribution.
|
|
*
|
|
* - Neither the name of the Eclipse Foundation, Inc. nor the
|
|
* names of its contributors may be used to endorse or promote
|
|
* products derived from this software without specific prior
|
|
* written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
|
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
|
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "xinclude.h"
|
|
#include "xtypes.h"
|
|
#include "xdiff.h"
|
|
|
|
#define MAX_PTR UINT_MAX
|
|
#define MAX_CNT UINT_MAX
|
|
|
|
#define LINE_END(n) (line##n + count##n - 1)
|
|
#define LINE_END_PTR(n) (*line##n + *count##n - 1)
|
|
|
|
struct histindex {
|
|
struct record {
|
|
unsigned int ptr, cnt;
|
|
struct record *next;
|
|
} **records, /* an occurrence */
|
|
**line_map; /* map of line to record chain */
|
|
chastore_t rcha;
|
|
unsigned int *next_ptrs;
|
|
unsigned int table_bits,
|
|
records_size,
|
|
line_map_size;
|
|
|
|
unsigned int max_chain_length,
|
|
key_shift,
|
|
ptr_shift;
|
|
|
|
unsigned int cnt,
|
|
has_common;
|
|
|
|
xdfenv_t *env;
|
|
xpparam_t const *xpp;
|
|
};
|
|
|
|
struct region {
|
|
unsigned int begin1, end1;
|
|
unsigned int begin2, end2;
|
|
};
|
|
|
|
#define LINE_MAP(i, a) (i->line_map[(a) - i->ptr_shift])
|
|
|
|
#define NEXT_PTR(index, ptr) \
|
|
(index->next_ptrs[(ptr) - index->ptr_shift])
|
|
|
|
#define CNT(index, ptr) \
|
|
((LINE_MAP(index, ptr))->cnt)
|
|
|
|
#define REC(env, s, l) \
|
|
(env->xdf##s.recs[l - 1])
|
|
|
|
static int cmp_recs(xpparam_t const *xpp,
|
|
xrecord_t *r1, xrecord_t *r2)
|
|
{
|
|
return r1->ha == r2->ha &&
|
|
xdl_recmatch(r1->ptr, r1->size, r2->ptr, r2->size,
|
|
xpp->flags);
|
|
}
|
|
|
|
#define CMP_ENV(xpp, env, s1, l1, s2, l2) \
|
|
(cmp_recs(xpp, REC(env, s1, l1), REC(env, s2, l2)))
|
|
|
|
#define CMP(i, s1, l1, s2, l2) \
|
|
(cmp_recs(i->xpp, REC(i->env, s1, l1), REC(i->env, s2, l2)))
|
|
|
|
#define TABLE_HASH(index, side, line) \
|
|
XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits)
|
|
|
|
static int scanA(struct histindex *index, int line1, int count1)
|
|
{
|
|
unsigned int ptr, tbl_idx;
|
|
unsigned int chain_len;
|
|
struct record **rec_chain, *rec;
|
|
|
|
for (ptr = LINE_END(1); line1 <= ptr; ptr--) {
|
|
tbl_idx = TABLE_HASH(index, 1, ptr);
|
|
rec_chain = index->records + tbl_idx;
|
|
rec = *rec_chain;
|
|
|
|
chain_len = 0;
|
|
while (rec) {
|
|
if (CMP(index, 1, rec->ptr, 1, ptr)) {
|
|
/*
|
|
* ptr is identical to another element. Insert
|
|
* it onto the front of the existing element
|
|
* chain.
|
|
*/
|
|
NEXT_PTR(index, ptr) = rec->ptr;
|
|
rec->ptr = ptr;
|
|
/* cap rec->cnt at MAX_CNT */
|
|
rec->cnt = XDL_MIN(MAX_CNT, rec->cnt + 1);
|
|
LINE_MAP(index, ptr) = rec;
|
|
goto continue_scan;
|
|
}
|
|
|
|
rec = rec->next;
|
|
chain_len++;
|
|
}
|
|
|
|
if (chain_len == index->max_chain_length)
|
|
return -1;
|
|
|
|
/*
|
|
* This is the first time we have ever seen this particular
|
|
* element in the sequence. Construct a new chain for it.
|
|
*/
|
|
if (!(rec = xdl_cha_alloc(&index->rcha)))
|
|
return -1;
|
|
rec->ptr = ptr;
|
|
rec->cnt = 1;
|
|
rec->next = *rec_chain;
|
|
*rec_chain = rec;
|
|
LINE_MAP(index, ptr) = rec;
|
|
|
|
continue_scan:
|
|
; /* no op */
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr,
|
|
int line1, int count1, int line2, int count2)
|
|
{
|
|
unsigned int b_next = b_ptr + 1;
|
|
struct record *rec = index->records[TABLE_HASH(index, 2, b_ptr)];
|
|
unsigned int as, ae, bs, be, np, rc;
|
|
int should_break;
|
|
|
|
for (; rec; rec = rec->next) {
|
|
if (rec->cnt > index->cnt) {
|
|
if (!index->has_common)
|
|
index->has_common = CMP(index, 1, rec->ptr, 2, b_ptr);
|
|
continue;
|
|
}
|
|
|
|
as = rec->ptr;
|
|
if (!CMP(index, 1, as, 2, b_ptr))
|
|
continue;
|
|
|
|
index->has_common = 1;
|
|
for (;;) {
|
|
should_break = 0;
|
|
np = NEXT_PTR(index, as);
|
|
bs = b_ptr;
|
|
ae = as;
|
|
be = bs;
|
|
rc = rec->cnt;
|
|
|
|
while (line1 < as && line2 < bs
|
|
&& CMP(index, 1, as - 1, 2, bs - 1)) {
|
|
as--;
|
|
bs--;
|
|
if (1 < rc)
|
|
rc = XDL_MIN(rc, CNT(index, as));
|
|
}
|
|
while (ae < LINE_END(1) && be < LINE_END(2)
|
|
&& CMP(index, 1, ae + 1, 2, be + 1)) {
|
|
ae++;
|
|
be++;
|
|
if (1 < rc)
|
|
rc = XDL_MIN(rc, CNT(index, ae));
|
|
}
|
|
|
|
if (b_next <= be)
|
|
b_next = be + 1;
|
|
if (lcs->end1 - lcs->begin1 < ae - as || rc < index->cnt) {
|
|
lcs->begin1 = as;
|
|
lcs->begin2 = bs;
|
|
lcs->end1 = ae;
|
|
lcs->end2 = be;
|
|
index->cnt = rc;
|
|
}
|
|
|
|
if (np == 0)
|
|
break;
|
|
|
|
while (np <= ae) {
|
|
np = NEXT_PTR(index, np);
|
|
if (np == 0) {
|
|
should_break = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (should_break)
|
|
break;
|
|
|
|
as = np;
|
|
}
|
|
}
|
|
return b_next;
|
|
}
|
|
|
|
static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env,
|
|
int line1, int count1, int line2, int count2)
|
|
{
|
|
xpparam_t xpparam;
|
|
xpparam.flags = xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
|
|
|
|
return xdl_fall_back_diff(env, &xpparam,
|
|
line1, count1, line2, count2);
|
|
}
|
|
|
|
static inline void free_index(struct histindex *index)
|
|
{
|
|
xdl_free(index->records);
|
|
xdl_free(index->line_map);
|
|
xdl_free(index->next_ptrs);
|
|
xdl_cha_free(&index->rcha);
|
|
}
|
|
|
|
static int find_lcs(xpparam_t const *xpp, xdfenv_t *env,
|
|
struct region *lcs,
|
|
int line1, int count1, int line2, int count2)
|
|
{
|
|
int b_ptr;
|
|
int sz, ret = -1;
|
|
struct histindex index;
|
|
|
|
memset(&index, 0, sizeof(index));
|
|
|
|
index.env = env;
|
|
index.xpp = xpp;
|
|
|
|
index.records = NULL;
|
|
index.line_map = NULL;
|
|
/* in case of early xdl_cha_free() */
|
|
index.rcha.head = NULL;
|
|
|
|
index.table_bits = xdl_hashbits(count1);
|
|
sz = index.records_size = 1 << index.table_bits;
|
|
sz *= sizeof(struct record *);
|
|
if (!(index.records = (struct record **) xdl_malloc(sz)))
|
|
goto cleanup;
|
|
memset(index.records, 0, sz);
|
|
|
|
sz = index.line_map_size = count1;
|
|
sz *= sizeof(struct record *);
|
|
if (!(index.line_map = (struct record **) xdl_malloc(sz)))
|
|
goto cleanup;
|
|
memset(index.line_map, 0, sz);
|
|
|
|
sz = index.line_map_size;
|
|
sz *= sizeof(unsigned int);
|
|
if (!(index.next_ptrs = (unsigned int *) xdl_malloc(sz)))
|
|
goto cleanup;
|
|
memset(index.next_ptrs, 0, sz);
|
|
|
|
/* lines / 4 + 1 comes from xprepare.c:xdl_prepare_ctx() */
|
|
if (xdl_cha_init(&index.rcha, sizeof(struct record), count1 / 4 + 1) < 0)
|
|
goto cleanup;
|
|
|
|
index.ptr_shift = line1;
|
|
index.max_chain_length = 64;
|
|
|
|
if (scanA(&index, line1, count1))
|
|
goto cleanup;
|
|
|
|
index.cnt = index.max_chain_length + 1;
|
|
|
|
for (b_ptr = line2; b_ptr <= LINE_END(2); )
|
|
b_ptr = try_lcs(&index, lcs, b_ptr, line1, count1, line2, count2);
|
|
|
|
if (index.has_common && index.max_chain_length < index.cnt)
|
|
ret = 1;
|
|
else
|
|
ret = 0;
|
|
|
|
cleanup:
|
|
free_index(&index);
|
|
return ret;
|
|
}
|
|
|
|
static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
|
|
int line1, int count1, int line2, int count2)
|
|
{
|
|
struct region lcs;
|
|
int lcs_found;
|
|
int result = -1;
|
|
|
|
if (count1 <= 0 && count2 <= 0)
|
|
return 0;
|
|
|
|
if (LINE_END(1) >= MAX_PTR)
|
|
return -1;
|
|
|
|
if (!count1) {
|
|
while(count2--)
|
|
env->xdf2.rchg[line2++ - 1] = 1;
|
|
return 0;
|
|
} else if (!count2) {
|
|
while(count1--)
|
|
env->xdf1.rchg[line1++ - 1] = 1;
|
|
return 0;
|
|
}
|
|
|
|
memset(&lcs, 0, sizeof(lcs));
|
|
lcs_found = find_lcs(xpp, env, &lcs, line1, count1, line2, count2);
|
|
if (lcs_found < 0)
|
|
goto out;
|
|
else if (lcs_found)
|
|
result = fall_back_to_classic_diff(xpp, env, line1, count1, line2, count2);
|
|
else {
|
|
if (lcs.begin1 == 0 && lcs.begin2 == 0) {
|
|
while (count1--)
|
|
env->xdf1.rchg[line1++ - 1] = 1;
|
|
while (count2--)
|
|
env->xdf2.rchg[line2++ - 1] = 1;
|
|
result = 0;
|
|
} else {
|
|
result = histogram_diff(xpp, env,
|
|
line1, lcs.begin1 - line1,
|
|
line2, lcs.begin2 - line2);
|
|
if (result)
|
|
goto out;
|
|
result = histogram_diff(xpp, env,
|
|
lcs.end1 + 1, LINE_END(1) - lcs.end1,
|
|
lcs.end2 + 1, LINE_END(2) - lcs.end2);
|
|
if (result)
|
|
goto out;
|
|
}
|
|
}
|
|
out:
|
|
return result;
|
|
}
|
|
|
|
int xdl_do_histogram_diff(mmfile_t *file1, mmfile_t *file2,
|
|
xpparam_t const *xpp, xdfenv_t *env)
|
|
{
|
|
if (xdl_prepare_env(file1, file2, xpp, env) < 0)
|
|
return -1;
|
|
|
|
return histogram_diff(xpp, env,
|
|
env->xdf1.dstart + 1, env->xdf1.dend - env->xdf1.dstart + 1,
|
|
env->xdf2.dstart + 1, env->xdf2.dend - env->xdf2.dstart + 1);
|
|
}
|