diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt index 71ce557276..9fa4847d56 100644 --- a/Documentation/git-index-pack.txt +++ b/Documentation/git-index-pack.txt @@ -8,7 +8,8 @@ git-index-pack - Build pack index file for an existing packed archive SYNOPSIS -------- -'git-index-pack' [-o ] +'git-index-pack' [-v] [-o ] +'git-index-pack' --stdin [--fix-thin] [-v] [-o ] [] DESCRIPTION @@ -21,6 +22,9 @@ objects/pack/ directory of a git repository. OPTIONS ------- +-v:: + Be verbose about what is going on, including progress status. + -o :: Write the generated pack index into the specified file. Without this option the name of pack index @@ -29,6 +33,23 @@ OPTIONS fails if the name of packed archive does not end with .pack). +--stdin:: + When this flag is provided, the pack is read from stdin + instead and a copy is then written to . If + is not specified, the pack is written to + objects/pack/ directory of the current git repository with + a default name determined from the pack content. + +--fix-thin:: + It is possible for gitlink:git-pack-objects[1] to build + "thin" pack, which records objects in deltified form based on + objects not included in the pack to reduce network traffic. + Those objects are expected to be present on the receiving end + and they must be included in the pack for that pack to be self + contained and indexable. Without this option any attempt to + index a thin pack will fail. This option only makes sense in + conjonction with --stdin. + Author ------ diff --git a/fetch-clone.c b/fetch-clone.c index 76b99afcdb..96cdab43c9 100644 --- a/fetch-clone.c +++ b/fetch-clone.c @@ -3,97 +3,6 @@ #include "pkt-line.h" #include "sideband.h" #include -#include - -static int finish_pack(const char *pack_tmp_name, const char *me) -{ - int pipe_fd[2]; - pid_t pid; - char idx[PATH_MAX]; - char final[PATH_MAX]; - char hash[41]; - unsigned char sha1[20]; - char *cp; - int err = 0; - - if (pipe(pipe_fd) < 0) - die("%s: unable to set up pipe", me); - - strcpy(idx, pack_tmp_name); /* ".git/objects/pack-XXXXXX" */ - cp = strrchr(idx, '/'); - memcpy(cp, "/pidx", 5); - - pid = fork(); - if (pid < 0) - die("%s: unable to fork off git-index-pack", me); - if (!pid) { - close(0); - dup2(pipe_fd[1], 1); - close(pipe_fd[0]); - close(pipe_fd[1]); - execl_git_cmd("index-pack", "-o", idx, pack_tmp_name, NULL); - error("cannot exec git-index-pack <%s> <%s>", - idx, pack_tmp_name); - exit(1); - } - close(pipe_fd[1]); - if (read(pipe_fd[0], hash, 40) != 40) { - error("%s: unable to read from git-index-pack", me); - err = 1; - } - close(pipe_fd[0]); - - for (;;) { - int status, code; - - if (waitpid(pid, &status, 0) < 0) { - if (errno == EINTR) - continue; - error("waitpid failed (%s)", strerror(errno)); - goto error_die; - } - if (WIFSIGNALED(status)) { - int sig = WTERMSIG(status); - error("git-index-pack died of signal %d", sig); - goto error_die; - } - if (!WIFEXITED(status)) { - error("git-index-pack died of unnatural causes %d", - status); - goto error_die; - } - code = WEXITSTATUS(status); - if (code) { - error("git-index-pack died with error code %d", code); - goto error_die; - } - if (err) - goto error_die; - break; - } - hash[40] = 0; - if (get_sha1_hex(hash, sha1)) { - error("git-index-pack reported nonsense '%s'", hash); - goto error_die; - } - /* Now we have pack in pack_tmp_name[], and - * idx in idx[]; rename them to their final names. - */ - snprintf(final, sizeof(final), - "%s/pack/pack-%s.pack", get_object_directory(), hash); - move_temp_to_file(pack_tmp_name, final); - chmod(final, 0444); - snprintf(final, sizeof(final), - "%s/pack/pack-%s.idx", get_object_directory(), hash); - move_temp_to_file(idx, final); - chmod(final, 0444); - return 0; - - error_die: - unlink(idx); - unlink(pack_tmp_name); - exit(1); -} static pid_t setup_sideband(int sideband, const char *me, int fd[2], int xd[2]) { @@ -128,7 +37,7 @@ static pid_t setup_sideband(int sideband, const char *me, int fd[2], int xd[2]) return side_pid; } -int receive_unpack_pack(int xd[2], const char *me, int quiet, int sideband) +static int get_pack(int xd[2], const char *me, int sideband, const char **argv) { int status; pid_t pid, side_pid; @@ -142,135 +51,37 @@ int receive_unpack_pack(int xd[2], const char *me, int quiet, int sideband) dup2(fd[0], 0); close(fd[0]); close(fd[1]); - execl_git_cmd("unpack-objects", quiet ? "-q" : NULL, NULL); - die("git-unpack-objects exec failed"); + execv_git_cmd(argv); + die("%s exec failed", argv[0]); } close(fd[0]); close(fd[1]); while (waitpid(pid, &status, 0) < 0) { if (errno != EINTR) - die("waiting for git-unpack-objects: %s", - strerror(errno)); + die("waiting for %s: %s", argv[0], strerror(errno)); } if (WIFEXITED(status)) { int code = WEXITSTATUS(status); if (code) - die("git-unpack-objects died with error code %d", - code); + die("%s died with error code %d", argv[0], code); return 0; } if (WIFSIGNALED(status)) { int sig = WTERMSIG(status); - die("git-unpack-objects died of signal %d", sig); + die("%s died of signal %d", argv[0], sig); } - die("git-unpack-objects died of unnatural causes %d", status); + die("%s died of unnatural causes %d", argv[0], status); } -/* - * We average out the download speed over this many "events", where - * an event is a minimum of about half a second. That way, we get - * a reasonably stable number. - */ -#define NR_AVERAGE (4) - -/* - * A "binary msec" is a power-of-two-msec, aka 1/1024th of a second. - * Keeping the time in that format means that "bytes / msecs" means - * the same as kB/s (modulo rounding). - * - * 1000512 is a magic number (usecs in a second, rounded up by half - * of 1024, to make "rounding" come out right ;) - */ -#define usec_to_binarymsec(x) ((int)(x) / (1000512 >> 10)) +int receive_unpack_pack(int xd[2], const char *me, int quiet, int sideband) +{ + const char *argv[3] = { "unpack-objects", quiet ? "-q" : NULL, NULL }; + return get_pack(xd, me, sideband, argv); +} int receive_keep_pack(int xd[2], const char *me, int quiet, int sideband) { - char tmpfile[PATH_MAX]; - int ofd, ifd, fd[2]; - unsigned long total; - static struct timeval prev_tv; - struct average { - unsigned long bytes; - unsigned long time; - } download[NR_AVERAGE] = { {0, 0}, }; - unsigned long avg_bytes, avg_time; - int idx = 0; - - setup_sideband(sideband, me, fd, xd); - - ifd = fd[0]; - snprintf(tmpfile, sizeof(tmpfile), - "%s/pack/tmp-XXXXXX", get_object_directory()); - ofd = mkstemp(tmpfile); - if (ofd < 0) - return error("unable to create temporary file %s", tmpfile); - - gettimeofday(&prev_tv, NULL); - total = 0; - avg_bytes = 0; - avg_time = 0; - while (1) { - char buf[8192]; - ssize_t sz, wsz, pos; - sz = read(ifd, buf, sizeof(buf)); - if (sz == 0) - break; - if (sz < 0) { - if (errno != EINTR && errno != EAGAIN) { - error("error reading pack (%s)", strerror(errno)); - close(ofd); - unlink(tmpfile); - return -1; - } - sz = 0; - } - pos = 0; - while (pos < sz) { - wsz = write(ofd, buf + pos, sz - pos); - if (wsz < 0) { - error("error writing pack (%s)", - strerror(errno)); - close(ofd); - unlink(tmpfile); - return -1; - } - pos += wsz; - } - total += sz; - if (!quiet) { - static unsigned long last; - struct timeval tv; - unsigned long diff = total - last; - /* not really "msecs", but a power-of-two millisec (1/1024th of a sec) */ - unsigned long msecs; - - gettimeofday(&tv, NULL); - msecs = tv.tv_sec - prev_tv.tv_sec; - msecs <<= 10; - msecs += usec_to_binarymsec(tv.tv_usec - prev_tv.tv_usec); - - if (msecs > 500) { - prev_tv = tv; - last = total; - - /* Update averages ..*/ - avg_bytes += diff; - avg_time += msecs; - avg_bytes -= download[idx].bytes; - avg_time -= download[idx].time; - download[idx].bytes = diff; - download[idx].time = msecs; - idx++; - if (idx >= NR_AVERAGE) - idx = 0; - - fprintf(stderr, "%4lu.%03luMB (%lu kB/s) \r", - total >> 20, - 1000*((total >> 10) & 1023)>>10, - avg_bytes / avg_time ); - } - } - } - close(ofd); - return finish_pack(tmpfile, me); + const char *argv[5] = { "index-pack", "--stdin", "--fix-thin", + quiet ? NULL : "-v", NULL }; + return get_pack(xd, me, sideband, argv); } diff --git a/fetch-pack.c b/fetch-pack.c index 474d54520e..8720ed42e9 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -518,8 +518,6 @@ int main(int argc, char **argv) } if (!dest) usage(fetch_pack_usage); - if (keep_pack) - use_thin_pack = 0; pid = git_connect(fd, dest, exec); if (pid < 0) return 1; diff --git a/index-pack.c b/index-pack.c index e33f60524f..866a054056 100644 --- a/index-pack.c +++ b/index-pack.c @@ -6,9 +6,11 @@ #include "commit.h" #include "tag.h" #include "tree.h" +#include +#include static const char index_pack_usage[] = -"git-index-pack [-o index-file] pack-file"; +"git-index-pack [-v] [-o ] { | --stdin [--fix-thin] [] }"; struct object_entry { @@ -33,37 +35,83 @@ union delta_base { struct delta_entry { - struct object_entry *obj; union delta_base base; + int obj_no; }; -static const char *pack_name; static struct object_entry *objects; static struct delta_entry *deltas; static int nr_objects; static int nr_deltas; +static int nr_resolved_deltas; + +static int from_stdin; +static int verbose; + +static volatile sig_atomic_t progress_update; + +static void progress_interval(int signum) +{ + progress_update = 1; +} + +static void setup_progress_signal(void) +{ + struct sigaction sa; + struct itimerval v; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = progress_interval; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART; + sigaction(SIGALRM, &sa, NULL); + + v.it_interval.tv_sec = 1; + v.it_interval.tv_usec = 0; + v.it_value = v.it_interval; + setitimer(ITIMER_REAL, &v, NULL); + +} + +static unsigned display_progress(unsigned n, unsigned total, unsigned last_pc) +{ + unsigned percent = n * 100 / total; + if (percent != last_pc || progress_update) { + fprintf(stderr, "%4u%% (%u/%u) done\r", percent, n, total); + progress_update = 0; + } + return percent; +} /* We always read in 4kB chunks. */ static unsigned char input_buffer[4096]; static unsigned long input_offset, input_len, consumed_bytes; static SHA_CTX input_ctx; -static int input_fd; +static int input_fd, output_fd, mmap_fd; + +/* Discard current buffer used content. */ +static void flush() +{ + if (input_offset) { + if (output_fd >= 0) + write_or_die(output_fd, input_buffer, input_offset); + SHA1_Update(&input_ctx, input_buffer, input_offset); + memcpy(input_buffer, input_buffer + input_offset, input_len); + input_offset = 0; + } +} /* * Make sure at least "min" bytes are available in the buffer, and * return the pointer to the buffer. */ -static void * fill(int min) +static void *fill(int min) { if (min <= input_len) return input_buffer + input_offset; if (min > sizeof(input_buffer)) die("cannot fill %d bytes", min); - if (input_offset) { - SHA1_Update(&input_ctx, input_buffer, input_offset); - memcpy(input_buffer, input_buffer + input_offset, input_len); - input_offset = 0; - } + flush(); do { int ret = xread(input_fd, input_buffer + input_len, sizeof(input_buffer) - input_len); @@ -86,13 +134,31 @@ static void use(int bytes) consumed_bytes += bytes; } -static void open_pack_file(void) +static const char *open_pack_file(const char *pack_name) { - input_fd = open(pack_name, O_RDONLY); - if (input_fd < 0) - die("cannot open packfile '%s': %s", pack_name, - strerror(errno)); + if (from_stdin) { + input_fd = 0; + if (!pack_name) { + static char tmpfile[PATH_MAX]; + snprintf(tmpfile, sizeof(tmpfile), + "%s/pack_XXXXXX", get_object_directory()); + output_fd = mkstemp(tmpfile); + pack_name = xstrdup(tmpfile); + } else + output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600); + if (output_fd < 0) + die("unable to create %s: %s\n", pack_name, strerror(errno)); + mmap_fd = output_fd; + } else { + input_fd = open(pack_name, O_RDONLY); + if (input_fd < 0) + die("cannot open packfile '%s': %s", + pack_name, strerror(errno)); + output_fd = -1; + mmap_fd = input_fd; + } SHA1_Init(&input_ctx); + return pack_name; } static void parse_pack_header(void) @@ -101,14 +167,12 @@ static void parse_pack_header(void) /* Header consistency check */ if (hdr->hdr_signature != htonl(PACK_SIGNATURE)) - die("packfile '%s' signature mismatch", pack_name); + die("pack signature mismatch"); if (!pack_version_ok(hdr->hdr_version)) - die("packfile '%s' version %d unsupported", - pack_name, ntohl(hdr->hdr_version)); + die("pack version %d unsupported", ntohl(hdr->hdr_version)); nr_objects = ntohl(hdr->hdr_entries); use(sizeof(struct pack_header)); - /*fprintf(stderr, "Indexing %d objects\n", nr_objects);*/ } static void bad_object(unsigned long offset, const char *format, @@ -122,8 +186,7 @@ static void bad_object(unsigned long offset, const char *format, ...) va_start(params, format); vsnprintf(buf, sizeof(buf), format, params); va_end(params); - die("packfile '%s': bad object at offset %lu: %s", - pack_name, offset, buf); + die("pack has bad object at offset %lu: %s", offset, buf); } static void *unpack_entry_data(unsigned long offset, unsigned long size) @@ -212,7 +275,7 @@ static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_ return unpack_entry_data(obj->offset, obj->size); } -static void * get_data_from_pack(struct object_entry *obj) +static void *get_data_from_pack(struct object_entry *obj) { unsigned long from = obj[0].offset + obj[0].hdr_size; unsigned long len = obj[1].offset - from; @@ -222,9 +285,9 @@ static void * get_data_from_pack(struct object_entry *obj) int st; map = mmap(NULL, len + pg_offset, PROT_READ, MAP_PRIVATE, - input_fd, from - pg_offset); + mmap_fd, from - pg_offset); if (map == MAP_FAILED) - die("cannot mmap packfile '%s': %s", pack_name, strerror(errno)); + die("cannot mmap pack file: %s", strerror(errno)); data = xmalloc(obj->size); memset(&stream, 0, sizeof(stream)); stream.next_out = data; @@ -261,8 +324,8 @@ static int find_delta(const union delta_base *base) return -first-1; } -static int find_delta_childs(const union delta_base *base, - int *first_index, int *last_index) +static int find_delta_children(const union delta_base *base, + int *first_index, int *last_index) { int first = find_delta(base); int last = first; @@ -304,10 +367,9 @@ static void sha1_object(const void *data, unsigned long size, SHA1_Final(sha1, &ctx); } -static void resolve_delta(struct delta_entry *delta, void *base_data, +static void resolve_delta(struct object_entry *delta_obj, void *base_data, unsigned long base_size, enum object_type type) { - struct object_entry *obj = delta->obj; void *delta_data; unsigned long delta_size; void *result; @@ -315,29 +377,34 @@ static void resolve_delta(struct delta_entry *delta, void *base_data, union delta_base delta_base; int j, first, last; - obj->real_type = type; - delta_data = get_data_from_pack(obj); - delta_size = obj->size; + delta_obj->real_type = type; + delta_data = get_data_from_pack(delta_obj); + delta_size = delta_obj->size; result = patch_delta(base_data, base_size, delta_data, delta_size, &result_size); free(delta_data); if (!result) - bad_object(obj->offset, "failed to apply delta"); - sha1_object(result, result_size, type, obj->sha1); + bad_object(delta_obj->offset, "failed to apply delta"); + sha1_object(result, result_size, type, delta_obj->sha1); + nr_resolved_deltas++; - hashcpy(delta_base.sha1, obj->sha1); - if (!find_delta_childs(&delta_base, &first, &last)) { - for (j = first; j <= last; j++) - if (deltas[j].obj->type == OBJ_REF_DELTA) - resolve_delta(&deltas[j], result, result_size, type); + hashcpy(delta_base.sha1, delta_obj->sha1); + if (!find_delta_children(&delta_base, &first, &last)) { + for (j = first; j <= last; j++) { + struct object_entry *child = objects + deltas[j].obj_no; + if (child->real_type == OBJ_REF_DELTA) + resolve_delta(child, result, result_size, type); + } } memset(&delta_base, 0, sizeof(delta_base)); - delta_base.offset = obj->offset; - if (!find_delta_childs(&delta_base, &first, &last)) { - for (j = first; j <= last; j++) - if (deltas[j].obj->type == OBJ_OFS_DELTA) - resolve_delta(&deltas[j], result, result_size, type); + delta_base.offset = delta_obj->offset; + if (!find_delta_children(&delta_base, &first, &last)) { + for (j = first; j <= last; j++) { + struct object_entry *child = objects + deltas[j].obj_no; + if (child->real_type == OBJ_OFS_DELTA) + resolve_delta(child, result, result_size, type); + } } free(result); @@ -353,7 +420,7 @@ static int compare_delta_entry(const void *a, const void *b) /* Parse all objects and return the pack content SHA1 hash */ static void parse_pack_objects(unsigned char *sha1) { - int i; + int i, percent = -1; struct delta_entry *delta = deltas; void *data; struct stat st; @@ -362,34 +429,43 @@ static void parse_pack_objects(unsigned char *sha1) * First pass: * - find locations of all objects; * - calculate SHA1 of all non-delta objects; - * - remember base SHA1 for all deltas. + * - remember base (SHA1 or offset) for all deltas. */ + if (verbose) + fprintf(stderr, "Indexing %d objects.\n", nr_objects); for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; data = unpack_raw_entry(obj, &delta->base); obj->real_type = obj->type; if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) { nr_deltas++; - delta->obj = obj; + delta->obj_no = i; delta++; } else sha1_object(data, obj->size, obj->type, obj->sha1); free(data); + if (verbose) + percent = display_progress(i+1, nr_objects, percent); } objects[i].offset = consumed_bytes; + if (verbose) + fputc('\n', stderr); /* Check pack integrity */ - SHA1_Update(&input_ctx, input_buffer, input_offset); + flush(); SHA1_Final(sha1, &input_ctx); if (hashcmp(fill(20), sha1)) - die("packfile '%s' SHA1 mismatch", pack_name); + die("pack is corrupted (SHA1 mismatch)"); use(20); /* If input_fd is a file, we should have reached its end now. */ if (fstat(input_fd, &st)) - die("cannot fstat packfile '%s': %s", pack_name, strerror(errno)); + die("cannot fstat packfile: %s", strerror(errno)); if (S_ISREG(st.st_mode) && st.st_size != consumed_bytes) - die("packfile '%s' has junk at the end", pack_name); + die("pack has junk at the end"); + + if (!nr_deltas) + return; /* Sort deltas by base SHA1/offset for fast searching */ qsort(deltas, nr_deltas, sizeof(struct delta_entry), @@ -403,6 +479,8 @@ static void parse_pack_objects(unsigned char *sha1) * recursively checking if the resulting object is used as a base * for some more deltas. */ + if (verbose) + fprintf(stderr, "Resolving %d deltas.\n", nr_deltas); for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; union delta_base base; @@ -411,32 +489,179 @@ static void parse_pack_objects(unsigned char *sha1) if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) continue; hashcpy(base.sha1, obj->sha1); - ref = !find_delta_childs(&base, &ref_first, &ref_last); + ref = !find_delta_children(&base, &ref_first, &ref_last); memset(&base, 0, sizeof(base)); base.offset = obj->offset; - ofs = !find_delta_childs(&base, &ofs_first, &ofs_last); + ofs = !find_delta_children(&base, &ofs_first, &ofs_last); if (!ref && !ofs) continue; data = get_data_from_pack(obj); if (ref) - for (j = ref_first; j <= ref_last; j++) - if (deltas[j].obj->type == OBJ_REF_DELTA) - resolve_delta(&deltas[j], data, + for (j = ref_first; j <= ref_last; j++) { + struct object_entry *child = objects + deltas[j].obj_no; + if (child->real_type == OBJ_REF_DELTA) + resolve_delta(child, data, obj->size, obj->type); + } if (ofs) - for (j = ofs_first; j <= ofs_last; j++) - if (deltas[j].obj->type == OBJ_OFS_DELTA) - resolve_delta(&deltas[j], data, + for (j = ofs_first; j <= ofs_last; j++) { + struct object_entry *child = objects + deltas[j].obj_no; + if (child->real_type == OBJ_OFS_DELTA) + resolve_delta(child, data, obj->size, obj->type); + } free(data); + if (verbose) + percent = display_progress(nr_resolved_deltas, + nr_deltas, percent); } + if (verbose && nr_resolved_deltas == nr_deltas) + fputc('\n', stderr); +} - /* Check for unresolved deltas */ - for (i = 0; i < nr_deltas; i++) { - if (deltas[i].obj->real_type == OBJ_REF_DELTA || - deltas[i].obj->real_type == OBJ_OFS_DELTA) - die("packfile '%s' has unresolved deltas", pack_name); +static int write_compressed(int fd, void *in, unsigned int size) +{ + z_stream stream; + unsigned long maxsize; + void *out; + + memset(&stream, 0, sizeof(stream)); + deflateInit(&stream, zlib_compression_level); + maxsize = deflateBound(&stream, size); + out = xmalloc(maxsize); + + /* Compress it */ + stream.next_in = in; + stream.avail_in = size; + stream.next_out = out; + stream.avail_out = maxsize; + while (deflate(&stream, Z_FINISH) == Z_OK); + deflateEnd(&stream); + + size = stream.total_out; + write_or_die(fd, out, size); + free(out); + return size; +} + +static void append_obj_to_pack(void *buf, + unsigned long size, enum object_type type) +{ + struct object_entry *obj = &objects[nr_objects++]; + unsigned char header[10]; + unsigned long s = size; + int n = 0; + unsigned char c = (type << 4) | (s & 15); + s >>= 4; + while (s) { + header[n++] = c | 0x80; + c = s & 0x7f; + s >>= 7; } + header[n++] = c; + write_or_die(output_fd, header, n); + obj[1].offset = obj[0].offset + n; + obj[1].offset += write_compressed(output_fd, buf, size); + sha1_object(buf, size, type, obj->sha1); +} + +static int delta_pos_compare(const void *_a, const void *_b) +{ + struct delta_entry *a = *(struct delta_entry **)_a; + struct delta_entry *b = *(struct delta_entry **)_b; + return a->obj_no - b->obj_no; +} + +static void fix_unresolved_deltas(int nr_unresolved) +{ + struct delta_entry **sorted_by_pos; + int i, n = 0, percent = -1; + + /* + * Since many unresolved deltas may well be themselves base objects + * for more unresolved deltas, we really want to include the + * smallest number of base objects that would cover as much delta + * as possible by picking the + * trunc deltas first, allowing for other deltas to resolve without + * additional base objects. Since most base objects are to be found + * before deltas depending on them, a good heuristic is to start + * resolving deltas in the same order as their position in the pack. + */ + sorted_by_pos = xmalloc(nr_unresolved * sizeof(*sorted_by_pos)); + for (i = 0; i < nr_deltas; i++) { + if (objects[deltas[i].obj_no].real_type != OBJ_REF_DELTA) + continue; + sorted_by_pos[n++] = &deltas[i]; + } + qsort(sorted_by_pos, n, sizeof(*sorted_by_pos), delta_pos_compare); + + for (i = 0; i < n; i++) { + struct delta_entry *d = sorted_by_pos[i]; + void *data; + unsigned long size; + char type[10]; + enum object_type obj_type; + int j, first, last; + + if (objects[d->obj_no].real_type != OBJ_REF_DELTA) + continue; + data = read_sha1_file(d->base.sha1, type, &size); + if (!data) + continue; + if (!strcmp(type, blob_type)) obj_type = OBJ_BLOB; + else if (!strcmp(type, tree_type)) obj_type = OBJ_TREE; + else if (!strcmp(type, commit_type)) obj_type = OBJ_COMMIT; + else if (!strcmp(type, tag_type)) obj_type = OBJ_TAG; + else die("base object %s is of type '%s'", + sha1_to_hex(d->base.sha1), type); + + find_delta_children(&d->base, &first, &last); + for (j = first; j <= last; j++) { + struct object_entry *child = objects + deltas[j].obj_no; + if (child->real_type == OBJ_REF_DELTA) + resolve_delta(child, data, size, obj_type); + } + + append_obj_to_pack(data, size, obj_type); + free(data); + if (verbose) + percent = display_progress(nr_resolved_deltas, + nr_deltas, percent); + } + free(sorted_by_pos); + if (verbose) + fputc('\n', stderr); +} + +static void readjust_pack_header_and_sha1(unsigned char *sha1) +{ + struct pack_header hdr; + SHA_CTX ctx; + int size; + + /* Rewrite pack header with updated object number */ + if (lseek(output_fd, 0, SEEK_SET) != 0) + die("cannot seek back: %s", strerror(errno)); + if (xread(output_fd, &hdr, sizeof(hdr)) != sizeof(hdr)) + die("cannot read pack header back: %s", strerror(errno)); + hdr.hdr_entries = htonl(nr_objects); + if (lseek(output_fd, 0, SEEK_SET) != 0) + die("cannot seek back: %s", strerror(errno)); + write_or_die(output_fd, &hdr, sizeof(hdr)); + if (lseek(output_fd, 0, SEEK_SET) != 0) + die("cannot seek back: %s", strerror(errno)); + + /* Recompute and store the new pack's SHA1 */ + SHA1_Init(&ctx); + do { + unsigned char *buf[4096]; + size = xread(output_fd, buf, sizeof(buf)); + if (size < 0) + die("cannot read pack data back: %s", strerror(errno)); + SHA1_Update(&ctx, buf, size); + } while (size > 0); + SHA1_Final(sha1, &ctx); + write_or_die(output_fd, sha1, 20); } static int sha1_compare(const void *_a, const void *_b) @@ -450,12 +675,12 @@ static int sha1_compare(const void *_a, const void *_b) * On entry *sha1 contains the pack content SHA1 hash, on exit it is * the SHA1 hash of sorted object names. */ -static void write_index_file(const char *index_name, unsigned char *sha1) +static const char *write_index_file(const char *index_name, unsigned char *sha1) { struct sha1file *f; struct object_entry **sorted_by_sha, **list, **last; unsigned int array[256]; - int i; + int i, fd; SHA_CTX ctx; if (nr_objects) { @@ -472,8 +697,19 @@ static void write_index_file(const char *index_name, unsigned char *sha1) else sorted_by_sha = list = last = NULL; - unlink(index_name); - f = sha1create("%s", index_name); + if (!index_name) { + static char tmpfile[PATH_MAX]; + snprintf(tmpfile, sizeof(tmpfile), + "%s/index_XXXXXX", get_object_directory()); + fd = mkstemp(tmpfile); + index_name = xstrdup(tmpfile); + } else { + unlink(index_name); + fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600); + } + if (fd < 0) + die("unable to create %s: %s", index_name, strerror(errno)); + f = sha1fd(fd, index_name); /* * Write the first-level table (the list is sorted, @@ -513,12 +749,64 @@ static void write_index_file(const char *index_name, unsigned char *sha1) sha1close(f, NULL, 1); free(sorted_by_sha); SHA1_Final(sha1, &ctx); + return index_name; +} + +static void final(const char *final_pack_name, const char *curr_pack_name, + const char *final_index_name, const char *curr_index_name, + unsigned char *sha1) +{ + char name[PATH_MAX]; + int err; + + if (!from_stdin) { + close(input_fd); + } else { + err = close(output_fd); + if (err) + die("error while closing pack file: %s", strerror(errno)); + chmod(curr_pack_name, 0444); + + /* + * Let's just mimic git-unpack-objects here and write + * the last part of the buffer to stdout. + */ + while (input_len) { + err = xwrite(1, input_buffer + input_offset, input_len); + if (err <= 0) + break; + input_len -= err; + input_offset += err; + } + } + + if (final_pack_name != curr_pack_name) { + if (!final_pack_name) { + snprintf(name, sizeof(name), "%s/pack/pack-%s.pack", + get_object_directory(), sha1_to_hex(sha1)); + final_pack_name = name; + } + if (move_temp_to_file(curr_pack_name, final_pack_name)) + die("cannot store pack file"); + } + + chmod(curr_index_name, 0444); + if (final_index_name != curr_index_name) { + if (!final_index_name) { + snprintf(name, sizeof(name), "%s/pack/pack-%s.idx", + get_object_directory(), sha1_to_hex(sha1)); + final_index_name = name; + } + if (move_temp_to_file(curr_index_name, final_index_name)) + die("cannot store index file"); + } } int main(int argc, char **argv) { - int i; - char *index_name = NULL; + int i, fix_thin_pack = 0; + const char *curr_pack, *pack_name = NULL; + const char *curr_index, *index_name = NULL; char *index_name_buf = NULL; unsigned char sha1[20]; @@ -526,7 +814,13 @@ int main(int argc, char **argv) const char *arg = argv[i]; if (*arg == '-') { - if (!strcmp(arg, "-o")) { + if (!strcmp(arg, "--stdin")) { + from_stdin = 1; + } else if (!strcmp(arg, "--fix-thin")) { + fix_thin_pack = 1; + } else if (!strcmp(arg, "-v")) { + verbose = 1; + } else if (!strcmp(arg, "-o")) { if (index_name || (i+1) >= argc) usage(index_pack_usage); index_name = argv[++i]; @@ -540,9 +834,11 @@ int main(int argc, char **argv) pack_name = arg; } - if (!pack_name) + if (!pack_name && !from_stdin) usage(index_pack_usage); - if (!index_name) { + if (fix_thin_pack && !from_stdin) + die("--fix-thin cannot be used without --stdin"); + if (!index_name && pack_name) { int len = strlen(pack_name); if (!has_extension(pack_name, ".pack")) die("packfile name '%s' does not end with '.pack'", @@ -553,17 +849,43 @@ int main(int argc, char **argv) index_name = index_name_buf; } - open_pack_file(); + curr_pack = open_pack_file(pack_name); parse_pack_header(); - objects = xcalloc(nr_objects + 1, sizeof(struct object_entry)); - deltas = xcalloc(nr_objects, sizeof(struct delta_entry)); + objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry)); + deltas = xmalloc(nr_objects * sizeof(struct delta_entry)); + if (verbose) + setup_progress_signal(); parse_pack_objects(sha1); + if (nr_deltas != nr_resolved_deltas) { + if (fix_thin_pack) { + int nr_unresolved = nr_deltas - nr_resolved_deltas; + int nr_objects_initial = nr_objects; + if (nr_unresolved <= 0) + die("confusion beyond insanity"); + objects = xrealloc(objects, + (nr_objects + nr_unresolved + 1) + * sizeof(*objects)); + fix_unresolved_deltas(nr_unresolved); + if (verbose) + fprintf(stderr, "%d objects were added to complete this thin pack.\n", + nr_objects - nr_objects_initial); + readjust_pack_header_and_sha1(sha1); + } + if (nr_deltas != nr_resolved_deltas) + die("pack has %d unresolved deltas", + nr_deltas - nr_resolved_deltas); + } else { + /* Flush remaining pack final 20-byte SHA1. */ + flush(); + } free(deltas); - write_index_file(index_name, sha1); + curr_index = write_index_file(index_name, sha1); + final(pack_name, curr_pack, index_name, curr_index, sha1); free(objects); free(index_name_buf); - printf("%s\n", sha1_to_hex(sha1)); + if (!from_stdin) + printf("%s\n", sha1_to_hex(sha1)); return 0; }