1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-10-28 12:59:41 +01:00
git/archive.c
brian m. carlson ab90ecae99 convert: permit passing additional metadata to filter processes
There are a variety of situations where a filter process can make use of
some additional metadata.  For example, some people find the ident
filter too limiting and would like to include the commit or the branch
in their smudged files.  This information isn't available during
checkout as HEAD hasn't been updated at that point, and it wouldn't be
available in archives either.

Let's add a way to pass this metadata down to the filter.  We pass the
blob we're operating on, the treeish (preferring the commit over the
tree if one exists), and the ref we're operating on.  Note that we won't
pass this information in all cases, such as when renormalizing or when
we're performing diffs, since it doesn't make sense in those cases.

The data we currently get from the filter process looks like the
following:

  command=smudge
  pathname=git.c
  0000

With this change, we'll get data more like this:

  command=smudge
  pathname=git.c
  refname=refs/tags/v2.25.1
  treeish=c522f061d551c9bb8684a7c3859b2ece4499b56b
  blob=7be7ad34bd053884ec48923706e70c81719a8660
  0000

There are a couple things to note about this approach.  For operations
like checkout, treeish will always be a commit, since we cannot check
out individual trees, but for other operations, like archive, we can end
up operating on only a particular tree, so we'll provide only a tree as
the treeish.  Similar comments apply for refname, since there are a
variety of cases in which we won't have a ref.

This commit wires up the code to print this information, but doesn't
pass any of it at this point.  In a future commit, we'll have various
code paths pass the actual useful data down.

Signed-off-by: brian m. carlson <bk2204@github.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-03-16 11:37:02 -07:00

583 lines
15 KiB
C

#include "cache.h"
#include "config.h"
#include "refs.h"
#include "object-store.h"
#include "commit.h"
#include "tree-walk.h"
#include "attr.h"
#include "archive.h"
#include "parse-options.h"
#include "unpack-trees.h"
#include "dir.h"
static char const * const archive_usage[] = {
N_("git archive [<options>] <tree-ish> [<path>...]"),
N_("git archive --list"),
N_("git archive --remote <repo> [--exec <cmd>] [<options>] <tree-ish> [<path>...]"),
N_("git archive --remote <repo> [--exec <cmd>] --list"),
NULL
};
static const struct archiver **archivers;
static int nr_archivers;
static int alloc_archivers;
static int remote_allow_unreachable;
void register_archiver(struct archiver *ar)
{
ALLOC_GROW(archivers, nr_archivers + 1, alloc_archivers);
archivers[nr_archivers++] = ar;
}
void init_archivers(void)
{
init_tar_archiver();
init_zip_archiver();
}
static void format_subst(const struct commit *commit,
const char *src, size_t len,
struct strbuf *buf)
{
char *to_free = NULL;
struct strbuf fmt = STRBUF_INIT;
struct pretty_print_context ctx = {0};
ctx.date_mode.type = DATE_NORMAL;
ctx.abbrev = DEFAULT_ABBREV;
if (src == buf->buf)
to_free = strbuf_detach(buf, NULL);
for (;;) {
const char *b, *c;
b = memmem(src, len, "$Format:", 8);
if (!b)
break;
c = memchr(b + 8, '$', (src + len) - b - 8);
if (!c)
break;
strbuf_reset(&fmt);
strbuf_add(&fmt, b + 8, c - b - 8);
strbuf_add(buf, src, b - src);
format_commit_message(commit, fmt.buf, buf, &ctx);
len -= c + 1 - src;
src = c + 1;
}
strbuf_add(buf, src, len);
strbuf_release(&fmt);
free(to_free);
}
void *object_file_to_archive(const struct archiver_args *args,
const char *path, const struct object_id *oid,
unsigned int mode, enum object_type *type,
unsigned long *sizep)
{
void *buffer;
const struct commit *commit = args->convert ? args->commit : NULL;
path += args->baselen;
buffer = read_object_file(oid, type, sizep);
if (buffer && S_ISREG(mode)) {
struct strbuf buf = STRBUF_INIT;
size_t size = 0;
strbuf_attach(&buf, buffer, *sizep, *sizep + 1);
convert_to_working_tree(args->repo->index, path, buf.buf, buf.len, &buf, NULL);
if (commit)
format_subst(commit, buf.buf, buf.len, &buf);
buffer = strbuf_detach(&buf, &size);
*sizep = size;
}
return buffer;
}
struct directory {
struct directory *up;
struct object_id oid;
int baselen, len;
unsigned mode;
int stage;
char path[FLEX_ARRAY];
};
struct archiver_context {
struct archiver_args *args;
write_archive_entry_fn_t write_entry;
struct directory *bottom;
};
static const struct attr_check *get_archive_attrs(struct index_state *istate,
const char *path)
{
static struct attr_check *check;
if (!check)
check = attr_check_initl("export-ignore", "export-subst", NULL);
git_check_attr(istate, path, check);
return check;
}
static int check_attr_export_ignore(const struct attr_check *check)
{
return check && ATTR_TRUE(check->items[0].value);
}
static int check_attr_export_subst(const struct attr_check *check)
{
return check && ATTR_TRUE(check->items[1].value);
}
static int write_archive_entry(const struct object_id *oid, const char *base,
int baselen, const char *filename, unsigned mode, int stage,
void *context)
{
static struct strbuf path = STRBUF_INIT;
struct archiver_context *c = context;
struct archiver_args *args = c->args;
write_archive_entry_fn_t write_entry = c->write_entry;
int err;
const char *path_without_prefix;
args->convert = 0;
strbuf_reset(&path);
strbuf_grow(&path, PATH_MAX);
strbuf_add(&path, args->base, args->baselen);
strbuf_add(&path, base, baselen);
strbuf_addstr(&path, filename);
if (S_ISDIR(mode) || S_ISGITLINK(mode))
strbuf_addch(&path, '/');
path_without_prefix = path.buf + args->baselen;
if (!S_ISDIR(mode)) {
const struct attr_check *check;
check = get_archive_attrs(args->repo->index, path_without_prefix);
if (check_attr_export_ignore(check))
return 0;
args->convert = check_attr_export_subst(check);
}
if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
if (args->verbose)
fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
err = write_entry(args, oid, path.buf, path.len, mode);
if (err)
return err;
return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
}
if (args->verbose)
fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
return write_entry(args, oid, path.buf, path.len, mode);
}
static void queue_directory(const unsigned char *sha1,
struct strbuf *base, const char *filename,
unsigned mode, int stage, struct archiver_context *c)
{
struct directory *d;
size_t len = st_add4(base->len, 1, strlen(filename), 1);
d = xmalloc(st_add(sizeof(*d), len));
d->up = c->bottom;
d->baselen = base->len;
d->mode = mode;
d->stage = stage;
c->bottom = d;
d->len = xsnprintf(d->path, len, "%.*s%s/", (int)base->len, base->buf, filename);
hashcpy(d->oid.hash, sha1);
}
static int write_directory(struct archiver_context *c)
{
struct directory *d = c->bottom;
int ret;
if (!d)
return 0;
c->bottom = d->up;
d->path[d->len - 1] = '\0'; /* no trailing slash */
ret =
write_directory(c) ||
write_archive_entry(&d->oid, d->path, d->baselen,
d->path + d->baselen, d->mode,
d->stage, c) != READ_TREE_RECURSIVE;
free(d);
return ret ? -1 : 0;
}
static int queue_or_write_archive_entry(const struct object_id *oid,
struct strbuf *base, const char *filename,
unsigned mode, int stage, void *context)
{
struct archiver_context *c = context;
while (c->bottom &&
!(base->len >= c->bottom->len &&
!strncmp(base->buf, c->bottom->path, c->bottom->len))) {
struct directory *next = c->bottom->up;
free(c->bottom);
c->bottom = next;
}
if (S_ISDIR(mode)) {
size_t baselen = base->len;
const struct attr_check *check;
/* Borrow base, but restore its original value when done. */
strbuf_addstr(base, filename);
strbuf_addch(base, '/');
check = get_archive_attrs(c->args->repo->index, base->buf);
strbuf_setlen(base, baselen);
if (check_attr_export_ignore(check))
return 0;
queue_directory(oid->hash, base, filename,
mode, stage, c);
return READ_TREE_RECURSIVE;
}
if (write_directory(c))
return -1;
return write_archive_entry(oid, base->buf, base->len, filename, mode,
stage, context);
}
int write_archive_entries(struct archiver_args *args,
write_archive_entry_fn_t write_entry)
{
struct archiver_context context;
struct unpack_trees_options opts;
struct tree_desc t;
int err;
if (args->baselen > 0 && args->base[args->baselen - 1] == '/') {
size_t len = args->baselen;
while (len > 1 && args->base[len - 2] == '/')
len--;
if (args->verbose)
fprintf(stderr, "%.*s\n", (int)len, args->base);
err = write_entry(args, &args->tree->object.oid, args->base,
len, 040777);
if (err)
return err;
}
memset(&context, 0, sizeof(context));
context.args = args;
context.write_entry = write_entry;
/*
* Setup index and instruct attr to read index only
*/
if (!args->worktree_attributes) {
memset(&opts, 0, sizeof(opts));
opts.index_only = 1;
opts.head_idx = -1;
opts.src_index = args->repo->index;
opts.dst_index = args->repo->index;
opts.fn = oneway_merge;
init_tree_desc(&t, args->tree->buffer, args->tree->size);
if (unpack_trees(1, &t, &opts))
return -1;
git_attr_set_direction(GIT_ATTR_INDEX);
}
err = read_tree_recursive(args->repo, args->tree, "",
0, 0, &args->pathspec,
queue_or_write_archive_entry,
&context);
if (err == READ_TREE_RECURSIVE)
err = 0;
while (context.bottom) {
struct directory *next = context.bottom->up;
free(context.bottom);
context.bottom = next;
}
return err;
}
static const struct archiver *lookup_archiver(const char *name)
{
int i;
if (!name)
return NULL;
for (i = 0; i < nr_archivers; i++) {
if (!strcmp(name, archivers[i]->name))
return archivers[i];
}
return NULL;
}
struct path_exists_context {
struct pathspec pathspec;
struct archiver_args *args;
};
static int reject_entry(const struct object_id *oid, struct strbuf *base,
const char *filename, unsigned mode,
int stage, void *context)
{
int ret = -1;
struct path_exists_context *ctx = context;
if (S_ISDIR(mode)) {
struct strbuf sb = STRBUF_INIT;
strbuf_addbuf(&sb, base);
strbuf_addstr(&sb, filename);
if (!match_pathspec(ctx->args->repo->index,
&ctx->pathspec,
sb.buf, sb.len, 0, NULL, 1))
ret = READ_TREE_RECURSIVE;
strbuf_release(&sb);
}
return ret;
}
static int path_exists(struct archiver_args *args, const char *path)
{
const char *paths[] = { path, NULL };
struct path_exists_context ctx;
int ret;
ctx.args = args;
parse_pathspec(&ctx.pathspec, 0, 0, "", paths);
ctx.pathspec.recursive = 1;
ret = read_tree_recursive(args->repo, args->tree, "",
0, 0, &ctx.pathspec,
reject_entry, &ctx);
clear_pathspec(&ctx.pathspec);
return ret != 0;
}
static void parse_pathspec_arg(const char **pathspec,
struct archiver_args *ar_args)
{
/*
* must be consistent with parse_pathspec in path_exists()
* Also if pathspec patterns are dependent, we're in big
* trouble as we test each one separately
*/
parse_pathspec(&ar_args->pathspec, 0,
PATHSPEC_PREFER_FULL,
"", pathspec);
ar_args->pathspec.recursive = 1;
if (pathspec) {
while (*pathspec) {
if (**pathspec && !path_exists(ar_args, *pathspec))
die(_("pathspec '%s' did not match any files"), *pathspec);
pathspec++;
}
}
}
static void parse_treeish_arg(const char **argv,
struct archiver_args *ar_args, const char *prefix,
int remote)
{
const char *name = argv[0];
const struct object_id *commit_oid;
time_t archive_time;
struct tree *tree;
const struct commit *commit;
struct object_id oid;
/* Remotes are only allowed to fetch actual refs */
if (remote && !remote_allow_unreachable) {
char *ref = NULL;
const char *colon = strchrnul(name, ':');
int refnamelen = colon - name;
if (!dwim_ref(name, refnamelen, &oid, &ref))
die(_("no such ref: %.*s"), refnamelen, name);
free(ref);
}
if (get_oid(name, &oid))
die(_("not a valid object name: %s"), name);
commit = lookup_commit_reference_gently(ar_args->repo, &oid, 1);
if (commit) {
commit_oid = &commit->object.oid;
archive_time = commit->date;
} else {
commit_oid = NULL;
archive_time = time(NULL);
}
tree = parse_tree_indirect(&oid);
if (tree == NULL)
die(_("not a tree object: %s"), oid_to_hex(&oid));
if (prefix) {
struct object_id tree_oid;
unsigned short mode;
int err;
err = get_tree_entry(ar_args->repo,
&tree->object.oid,
prefix, &tree_oid,
&mode);
if (err || !S_ISDIR(mode))
die(_("current working directory is untracked"));
tree = parse_tree_indirect(&tree_oid);
}
ar_args->tree = tree;
ar_args->commit_oid = commit_oid;
ar_args->commit = commit;
ar_args->time = archive_time;
}
#define OPT__COMPR(s, v, h, p) \
OPT_SET_INT_F(s, NULL, v, h, p, PARSE_OPT_NONEG)
#define OPT__COMPR_HIDDEN(s, v, p) \
OPT_SET_INT_F(s, NULL, v, "", p, PARSE_OPT_NONEG | PARSE_OPT_HIDDEN)
static int parse_archive_args(int argc, const char **argv,
const struct archiver **ar, struct archiver_args *args,
const char *name_hint, int is_remote)
{
const char *format = NULL;
const char *base = NULL;
const char *remote = NULL;
const char *exec = NULL;
const char *output = NULL;
int compression_level = -1;
int verbose = 0;
int i;
int list = 0;
int worktree_attributes = 0;
struct option opts[] = {
OPT_GROUP(""),
OPT_STRING(0, "format", &format, N_("fmt"), N_("archive format")),
OPT_STRING(0, "prefix", &base, N_("prefix"),
N_("prepend prefix to each pathname in the archive")),
OPT_STRING('o', "output", &output, N_("file"),
N_("write the archive to this file")),
OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
N_("read .gitattributes in working directory")),
OPT__VERBOSE(&verbose, N_("report archived files on stderr")),
OPT__COMPR('0', &compression_level, N_("store only"), 0),
OPT__COMPR('1', &compression_level, N_("compress faster"), 1),
OPT__COMPR_HIDDEN('2', &compression_level, 2),
OPT__COMPR_HIDDEN('3', &compression_level, 3),
OPT__COMPR_HIDDEN('4', &compression_level, 4),
OPT__COMPR_HIDDEN('5', &compression_level, 5),
OPT__COMPR_HIDDEN('6', &compression_level, 6),
OPT__COMPR_HIDDEN('7', &compression_level, 7),
OPT__COMPR_HIDDEN('8', &compression_level, 8),
OPT__COMPR('9', &compression_level, N_("compress better"), 9),
OPT_GROUP(""),
OPT_BOOL('l', "list", &list,
N_("list supported archive formats")),
OPT_GROUP(""),
OPT_STRING(0, "remote", &remote, N_("repo"),
N_("retrieve the archive from remote repository <repo>")),
OPT_STRING(0, "exec", &exec, N_("command"),
N_("path to the remote git-upload-archive command")),
OPT_END()
};
argc = parse_options(argc, argv, NULL, opts, archive_usage, 0);
if (remote)
die(_("Unexpected option --remote"));
if (exec)
die(_("Option --exec can only be used together with --remote"));
if (output)
die(_("Unexpected option --output"));
if (!base)
base = "";
if (list) {
for (i = 0; i < nr_archivers; i++)
if (!is_remote || archivers[i]->flags & ARCHIVER_REMOTE)
printf("%s\n", archivers[i]->name);
exit(0);
}
if (!format && name_hint)
format = archive_format_from_filename(name_hint);
if (!format)
format = "tar";
/* We need at least one parameter -- tree-ish */
if (argc < 1)
usage_with_options(archive_usage, opts);
*ar = lookup_archiver(format);
if (!*ar || (is_remote && !((*ar)->flags & ARCHIVER_REMOTE)))
die(_("Unknown archive format '%s'"), format);
args->compression_level = Z_DEFAULT_COMPRESSION;
if (compression_level != -1) {
if ((*ar)->flags & ARCHIVER_WANT_COMPRESSION_LEVELS)
args->compression_level = compression_level;
else {
die(_("Argument not supported for format '%s': -%d"),
format, compression_level);
}
}
args->verbose = verbose;
args->base = base;
args->baselen = strlen(base);
args->worktree_attributes = worktree_attributes;
return argc;
}
int write_archive(int argc, const char **argv, const char *prefix,
struct repository *repo,
const char *name_hint, int remote)
{
const struct archiver *ar = NULL;
struct archiver_args args;
git_config_get_bool("uploadarchive.allowunreachable", &remote_allow_unreachable);
git_config(git_default_config, NULL);
args.repo = repo;
argc = parse_archive_args(argc, argv, &ar, &args, name_hint, remote);
if (!startup_info->have_repository) {
/*
* We know this will die() with an error, so we could just
* die ourselves; but its error message will be more specific
* than what we could write here.
*/
setup_git_directory();
}
parse_treeish_arg(argv, &args, prefix, remote);
parse_pathspec_arg(argv + 1, &args);
return ar->write_archive(ar, &args);
}
static int match_extension(const char *filename, const char *ext)
{
int prefixlen = strlen(filename) - strlen(ext);
/*
* We need 1 character for the '.', and 1 character to ensure that the
* prefix is non-empty (k.e., we don't match .tar.gz with no actual
* filename).
*/
if (prefixlen < 2 || filename[prefixlen - 1] != '.')
return 0;
return !strcmp(filename + prefixlen, ext);
}
const char *archive_format_from_filename(const char *filename)
{
int i;
for (i = 0; i < nr_archivers; i++)
if (match_extension(filename, archivers[i]->name))
return archivers[i]->name;
return NULL;
}