2006-01-07 10:33:54 +01:00
|
|
|
#include "cache.h"
|
2005-04-18 20:39:48 +02:00
|
|
|
#include "object.h"
|
2005-04-28 16:46:33 +02:00
|
|
|
#include "blob.h"
|
|
|
|
#include "tree.h"
|
|
|
|
#include "commit.h"
|
|
|
|
#include "tag.h"
|
2005-04-18 20:39:48 +02:00
|
|
|
|
|
|
|
struct object **objs;
|
2006-02-12 02:57:57 +01:00
|
|
|
static int nr_objs;
|
|
|
|
int obj_allocs;
|
2005-04-18 20:39:48 +02:00
|
|
|
|
Shrink "struct object" a bit
This shrinks "struct object" by a small amount, by getting rid of the
"struct type *" pointer and replacing it with a 3-bit bitfield instead.
In addition, we merge the bitfields and the "flags" field, which
incidentally should also remove a useless 4-byte padding from the object
when in 64-bit mode.
Now, our "struct object" is still too damn large, but it's now less
obviously bloated, and of the remaining fields, only the "util" (which is
not used by most things) is clearly something that should be eventually
discarded.
This shrinks the "git-rev-list --all" memory use by about 2.5% on the
kernel archive (and, perhaps more importantly, on the larger mozilla
archive). That may not sound like much, but I suspect it's more on a
64-bit platform.
There are other remaining inefficiencies (the parent lists, for example,
probably have horrible malloc overhead), but this was pretty obvious.
Most of the patch is just changing the comparison of the "type" pointer
from one of the constant string pointers to the appropriate new TYPE_xxx
small integer constant.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-15 01:45:13 +02:00
|
|
|
const char *type_names[] = {
|
|
|
|
"none", "blob", "tree", "commit", "bad"
|
|
|
|
};
|
|
|
|
|
2006-02-12 02:57:57 +01:00
|
|
|
static int hashtable_index(const unsigned char *sha1)
|
|
|
|
{
|
2006-02-12 03:51:19 +01:00
|
|
|
unsigned int i;
|
|
|
|
memcpy(&i, sha1, sizeof(unsigned int));
|
2006-02-12 02:57:57 +01:00
|
|
|
return (int)(i % obj_allocs);
|
|
|
|
}
|
|
|
|
|
2005-06-03 17:05:39 +02:00
|
|
|
static int find_object(const unsigned char *sha1)
|
2005-04-18 20:39:48 +02:00
|
|
|
{
|
2006-02-12 03:51:19 +01:00
|
|
|
int i;
|
2005-04-18 20:39:48 +02:00
|
|
|
|
2006-02-12 02:57:57 +01:00
|
|
|
if (!objs)
|
|
|
|
return -1;
|
2005-04-18 20:39:48 +02:00
|
|
|
|
2006-02-12 03:51:19 +01:00
|
|
|
i = hashtable_index(sha1);
|
2006-02-12 02:57:57 +01:00
|
|
|
while (objs[i]) {
|
|
|
|
if (memcmp(sha1, objs[i]->sha1, 20) == 0)
|
|
|
|
return i;
|
|
|
|
i++;
|
|
|
|
if (i == obj_allocs)
|
|
|
|
i = 0;
|
|
|
|
}
|
|
|
|
return -1 - i;
|
2005-04-18 20:39:48 +02:00
|
|
|
}
|
|
|
|
|
2005-06-03 17:05:39 +02:00
|
|
|
struct object *lookup_object(const unsigned char *sha1)
|
2005-04-18 20:39:48 +02:00
|
|
|
{
|
|
|
|
int pos = find_object(sha1);
|
|
|
|
if (pos >= 0)
|
|
|
|
return objs[pos];
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2005-06-03 17:05:39 +02:00
|
|
|
void created_object(const unsigned char *sha1, struct object *obj)
|
2005-04-18 20:39:48 +02:00
|
|
|
{
|
2006-02-12 02:57:57 +01:00
|
|
|
int pos;
|
2005-04-18 20:39:48 +02:00
|
|
|
|
|
|
|
obj->parsed = 0;
|
|
|
|
memcpy(obj->sha1, sha1, 20);
|
Shrink "struct object" a bit
This shrinks "struct object" by a small amount, by getting rid of the
"struct type *" pointer and replacing it with a 3-bit bitfield instead.
In addition, we merge the bitfields and the "flags" field, which
incidentally should also remove a useless 4-byte padding from the object
when in 64-bit mode.
Now, our "struct object" is still too damn large, but it's now less
obviously bloated, and of the remaining fields, only the "util" (which is
not used by most things) is clearly something that should be eventually
discarded.
This shrinks the "git-rev-list --all" memory use by about 2.5% on the
kernel archive (and, perhaps more importantly, on the larger mozilla
archive). That may not sound like much, but I suspect it's more on a
64-bit platform.
There are other remaining inefficiencies (the parent lists, for example,
probably have horrible malloc overhead), but this was pretty obvious.
Most of the patch is just changing the comparison of the "type" pointer
from one of the constant string pointers to the appropriate new TYPE_xxx
small integer constant.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-15 01:45:13 +02:00
|
|
|
obj->type = TYPE_NONE;
|
2005-04-18 20:39:48 +02:00
|
|
|
obj->used = 0;
|
|
|
|
|
2006-02-12 02:57:57 +01:00
|
|
|
if (obj_allocs - 1 <= nr_objs * 2) {
|
|
|
|
int i, count = obj_allocs;
|
|
|
|
obj_allocs = (obj_allocs < 32 ? 32 : 2 * obj_allocs);
|
2005-04-26 21:00:58 +02:00
|
|
|
objs = xrealloc(objs, obj_allocs * sizeof(struct object *));
|
2006-02-12 02:57:57 +01:00
|
|
|
memset(objs + count, 0, (obj_allocs - count)
|
|
|
|
* sizeof(struct object *));
|
2006-02-12 20:24:50 +01:00
|
|
|
for (i = 0; i < obj_allocs; i++)
|
2006-02-12 02:57:57 +01:00
|
|
|
if (objs[i]) {
|
|
|
|
int j = find_object(objs[i]->sha1);
|
|
|
|
if (j != i) {
|
|
|
|
j = -1 - j;
|
|
|
|
objs[j] = objs[i];
|
|
|
|
objs[i] = NULL;
|
|
|
|
}
|
|
|
|
}
|
2005-04-18 20:39:48 +02:00
|
|
|
}
|
|
|
|
|
2006-02-12 02:57:57 +01:00
|
|
|
pos = find_object(sha1);
|
|
|
|
if (pos >= 0)
|
|
|
|
die("Inserting %s twice\n", sha1_to_hex(sha1));
|
|
|
|
pos = -pos-1;
|
2005-04-18 20:39:48 +02:00
|
|
|
|
|
|
|
objs[pos] = obj;
|
|
|
|
nr_objs++;
|
|
|
|
}
|
|
|
|
|
2005-06-22 02:35:10 +02:00
|
|
|
struct object *lookup_object_type(const unsigned char *sha1, const char *type)
|
|
|
|
{
|
2005-08-03 01:45:48 +02:00
|
|
|
if (!type) {
|
|
|
|
return lookup_unknown_object(sha1);
|
|
|
|
} else if (!strcmp(type, blob_type)) {
|
2005-06-22 02:35:10 +02:00
|
|
|
return &lookup_blob(sha1)->object;
|
|
|
|
} else if (!strcmp(type, tree_type)) {
|
|
|
|
return &lookup_tree(sha1)->object;
|
|
|
|
} else if (!strcmp(type, commit_type)) {
|
|
|
|
return &lookup_commit(sha1)->object;
|
|
|
|
} else if (!strcmp(type, tag_type)) {
|
|
|
|
return &lookup_tag(sha1)->object;
|
|
|
|
} else {
|
|
|
|
error("Unknown type %s", type);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-08-03 01:45:48 +02:00
|
|
|
union any_object {
|
|
|
|
struct object object;
|
|
|
|
struct commit commit;
|
|
|
|
struct tree tree;
|
|
|
|
struct blob blob;
|
|
|
|
struct tag tag;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct object *lookup_unknown_object(const unsigned char *sha1)
|
|
|
|
{
|
|
|
|
struct object *obj = lookup_object(sha1);
|
|
|
|
if (!obj) {
|
2006-04-03 20:30:46 +02:00
|
|
|
union any_object *ret = xcalloc(1, sizeof(*ret));
|
2005-08-03 01:45:48 +02:00
|
|
|
created_object(sha1, &ret->object);
|
Shrink "struct object" a bit
This shrinks "struct object" by a small amount, by getting rid of the
"struct type *" pointer and replacing it with a 3-bit bitfield instead.
In addition, we merge the bitfields and the "flags" field, which
incidentally should also remove a useless 4-byte padding from the object
when in 64-bit mode.
Now, our "struct object" is still too damn large, but it's now less
obviously bloated, and of the remaining fields, only the "util" (which is
not used by most things) is clearly something that should be eventually
discarded.
This shrinks the "git-rev-list --all" memory use by about 2.5% on the
kernel archive (and, perhaps more importantly, on the larger mozilla
archive). That may not sound like much, but I suspect it's more on a
64-bit platform.
There are other remaining inefficiencies (the parent lists, for example,
probably have horrible malloc overhead), but this was pretty obvious.
Most of the patch is just changing the comparison of the "type" pointer
from one of the constant string pointers to the appropriate new TYPE_xxx
small integer constant.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-15 01:45:13 +02:00
|
|
|
ret->object.type = TYPE_NONE;
|
2005-08-03 01:45:48 +02:00
|
|
|
return &ret->object;
|
|
|
|
}
|
|
|
|
return obj;
|
|
|
|
}
|
|
|
|
|
2005-06-03 17:05:39 +02:00
|
|
|
struct object *parse_object(const unsigned char *sha1)
|
2005-04-28 16:46:33 +02:00
|
|
|
{
|
2005-06-27 12:33:33 +02:00
|
|
|
unsigned long size;
|
|
|
|
char type[20];
|
|
|
|
void *buffer = read_sha1_file(sha1, type, &size);
|
|
|
|
if (buffer) {
|
2005-05-06 19:48:34 +02:00
|
|
|
struct object *obj;
|
2005-06-27 12:33:33 +02:00
|
|
|
if (check_sha1_signature(sha1, buffer, size, type) < 0)
|
2005-04-28 16:46:33 +02:00
|
|
|
printf("sha1 mismatch %s\n", sha1_to_hex(sha1));
|
2006-04-02 14:44:09 +02:00
|
|
|
if (!strcmp(type, blob_type)) {
|
2005-05-06 19:48:34 +02:00
|
|
|
struct blob *blob = lookup_blob(sha1);
|
|
|
|
parse_blob_buffer(blob, buffer, size);
|
|
|
|
obj = &blob->object;
|
2006-04-02 14:44:09 +02:00
|
|
|
} else if (!strcmp(type, tree_type)) {
|
2005-05-06 19:48:34 +02:00
|
|
|
struct tree *tree = lookup_tree(sha1);
|
|
|
|
obj = &tree->object;
|
2006-05-29 21:16:12 +02:00
|
|
|
if (!tree->object.parsed) {
|
|
|
|
parse_tree_buffer(tree, buffer, size);
|
|
|
|
buffer = NULL;
|
|
|
|
}
|
2006-04-02 14:44:09 +02:00
|
|
|
} else if (!strcmp(type, commit_type)) {
|
2005-05-06 19:48:34 +02:00
|
|
|
struct commit *commit = lookup_commit(sha1);
|
|
|
|
parse_commit_buffer(commit, buffer, size);
|
2005-05-26 04:26:28 +02:00
|
|
|
if (!commit->buffer) {
|
|
|
|
commit->buffer = buffer;
|
|
|
|
buffer = NULL;
|
|
|
|
}
|
2005-05-06 19:48:34 +02:00
|
|
|
obj = &commit->object;
|
2006-04-02 14:44:09 +02:00
|
|
|
} else if (!strcmp(type, tag_type)) {
|
2005-05-06 19:48:34 +02:00
|
|
|
struct tag *tag = lookup_tag(sha1);
|
|
|
|
parse_tag_buffer(tag, buffer, size);
|
|
|
|
obj = &tag->object;
|
2005-04-28 16:46:33 +02:00
|
|
|
} else {
|
2005-05-06 19:48:34 +02:00
|
|
|
obj = NULL;
|
2005-04-28 16:46:33 +02:00
|
|
|
}
|
2005-05-06 19:48:34 +02:00
|
|
|
free(buffer);
|
|
|
|
return obj;
|
2005-04-28 16:46:33 +02:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
2005-08-03 01:45:48 +02:00
|
|
|
|
|
|
|
struct object_list *object_list_insert(struct object *item,
|
|
|
|
struct object_list **list_p)
|
|
|
|
{
|
|
|
|
struct object_list *new_list = xmalloc(sizeof(struct object_list));
|
|
|
|
new_list->item = item;
|
|
|
|
new_list->next = *list_p;
|
|
|
|
*list_p = new_list;
|
|
|
|
return new_list;
|
|
|
|
}
|
|
|
|
|
2005-09-05 08:04:18 +02:00
|
|
|
void object_list_append(struct object *item,
|
|
|
|
struct object_list **list_p)
|
|
|
|
{
|
|
|
|
while (*list_p) {
|
|
|
|
list_p = &((*list_p)->next);
|
|
|
|
}
|
|
|
|
*list_p = xmalloc(sizeof(struct object_list));
|
|
|
|
(*list_p)->next = NULL;
|
|
|
|
(*list_p)->item = item;
|
|
|
|
}
|
|
|
|
|
2005-08-03 01:45:48 +02:00
|
|
|
unsigned object_list_length(struct object_list *list)
|
|
|
|
{
|
|
|
|
unsigned ret = 0;
|
|
|
|
while (list) {
|
|
|
|
list = list->next;
|
|
|
|
ret++;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int object_list_contains(struct object_list *list, struct object *obj)
|
|
|
|
{
|
|
|
|
while (list) {
|
|
|
|
if (list->item == obj)
|
|
|
|
return 1;
|
|
|
|
list = list->next;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
|
|
|
|
void add_object_array(struct object *obj, const char *name, struct object_array *array)
|
|
|
|
{
|
|
|
|
unsigned nr = array->nr;
|
|
|
|
unsigned alloc = array->alloc;
|
|
|
|
struct object_array_entry *objects = array->objects;
|
|
|
|
|
|
|
|
if (nr >= alloc) {
|
|
|
|
alloc = (alloc + 32) * 2;
|
|
|
|
objects = xrealloc(objects, alloc * sizeof(*objects));
|
|
|
|
array->alloc = alloc;
|
|
|
|
array->objects = objects;
|
|
|
|
}
|
|
|
|
objects[nr].item = obj;
|
|
|
|
objects[nr].name = name;
|
|
|
|
array->nr = ++nr;
|
|
|
|
}
|
2006-07-03 12:05:20 +02:00
|
|
|
|
|
|
|
void clear_object_marks(unsigned mark)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < obj_allocs; i++)
|
|
|
|
if (objs[i])
|
|
|
|
objs[i]->flags &= ~mark;
|
|
|
|
}
|