1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-11-01 23:07:55 +01:00
git/vcs-svn/svndump.c
David Barr 7c5817d3ba vcs-svn: use strbuf for author, UUID, and URL
Use strbufs and strings instead of interned strings for values of rev,
dump, and node fields that happen to be strings.  After this change,
the only remaining string_pool use is for paths in the repo_tree API
and internals.

Functional change: treat an empty author, UUID, or URL as none at all.
So for example, in repos where the first revision has an empty
svn:author property, the first rev will be treated as by "nobody"
rather than by a person with empty name and email address created by
prepending an @ sign to the repository UUID.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2011-03-22 18:01:48 -05:00

420 lines
11 KiB
C

/*
* Parse and rearrange a svnadmin dump.
* Create the dump with:
* svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile
*
* Licensed under a two-clause BSD-style license.
* See LICENSE for details.
*/
#include "cache.h"
#include "repo_tree.h"
#include "fast_export.h"
#include "line_buffer.h"
#include "string_pool.h"
#include "strbuf.h"
#define NODEACT_REPLACE 4
#define NODEACT_DELETE 3
#define NODEACT_ADD 2
#define NODEACT_CHANGE 1
#define NODEACT_UNKNOWN 0
#define DUMP_CTX 0
#define REV_CTX 1
#define NODE_CTX 2
#define LENGTH_UNKNOWN (~0)
#define DATE_RFC2822_LEN 31
static struct line_buffer input = LINE_BUFFER_INIT;
static struct {
uint32_t action, propLength, textLength, srcRev, type;
uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH];
uint32_t text_delta, prop_delta;
} node_ctx;
static struct {
uint32_t revision;
unsigned long timestamp;
struct strbuf log, author;
} rev_ctx;
static struct {
uint32_t version;
struct strbuf uuid, url;
} dump_ctx;
static struct {
uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid,
revision_number, node_path, node_kind, node_action,
node_copyfrom_path, node_copyfrom_rev, text_content_length,
prop_content_length, content_length, svn_fs_dump_format_version,
/* version 3 format */
text_delta, prop_delta;
} keys;
static void reset_node_ctx(char *fname)
{
node_ctx.type = 0;
node_ctx.action = NODEACT_UNKNOWN;
node_ctx.propLength = LENGTH_UNKNOWN;
node_ctx.textLength = LENGTH_UNKNOWN;
node_ctx.src[0] = ~0;
node_ctx.srcRev = 0;
pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname);
node_ctx.text_delta = 0;
node_ctx.prop_delta = 0;
}
static void reset_rev_ctx(uint32_t revision)
{
rev_ctx.revision = revision;
rev_ctx.timestamp = 0;
strbuf_reset(&rev_ctx.log);
strbuf_reset(&rev_ctx.author);
}
static void reset_dump_ctx(const char *url)
{
strbuf_reset(&dump_ctx.url);
if (url)
strbuf_addstr(&dump_ctx.url, url);
dump_ctx.version = 1;
strbuf_reset(&dump_ctx.uuid);
}
static void init_keys(void)
{
keys.svn_log = pool_intern("svn:log");
keys.svn_author = pool_intern("svn:author");
keys.svn_date = pool_intern("svn:date");
keys.svn_executable = pool_intern("svn:executable");
keys.svn_special = pool_intern("svn:special");
keys.uuid = pool_intern("UUID");
keys.revision_number = pool_intern("Revision-number");
keys.node_path = pool_intern("Node-path");
keys.node_kind = pool_intern("Node-kind");
keys.node_action = pool_intern("Node-action");
keys.node_copyfrom_path = pool_intern("Node-copyfrom-path");
keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev");
keys.text_content_length = pool_intern("Text-content-length");
keys.prop_content_length = pool_intern("Prop-content-length");
keys.content_length = pool_intern("Content-length");
keys.svn_fs_dump_format_version = pool_intern("SVN-fs-dump-format-version");
/* version 3 format (Subversion 1.1.0) */
keys.text_delta = pool_intern("Text-delta");
keys.prop_delta = pool_intern("Prop-delta");
}
static void handle_property(uint32_t key, const char *val, uint32_t len,
uint32_t *type_set)
{
if (key == keys.svn_log) {
if (!val)
die("invalid dump: unsets svn:log");
strbuf_reset(&rev_ctx.log);
strbuf_add(&rev_ctx.log, val, len);
} else if (key == keys.svn_author) {
strbuf_reset(&rev_ctx.author);
if (val)
strbuf_add(&rev_ctx.author, val, len);
} else if (key == keys.svn_date) {
if (!val)
die("invalid dump: unsets svn:date");
if (parse_date_basic(val, &rev_ctx.timestamp, NULL))
warning("invalid timestamp: %s", val);
} else if (key == keys.svn_executable || key == keys.svn_special) {
if (*type_set) {
if (!val)
return;
die("invalid dump: sets type twice");
}
if (!val) {
node_ctx.type = REPO_MODE_BLB;
return;
}
*type_set = 1;
node_ctx.type = key == keys.svn_executable ?
REPO_MODE_EXE :
REPO_MODE_LNK;
}
}
static void die_short_read(void)
{
if (buffer_ferror(&input))
die_errno("error reading dump file");
die("invalid dump: unexpected end of file");
}
static void read_props(void)
{
uint32_t key = ~0;
const char *t;
/*
* NEEDSWORK: to support simple mode changes like
* K 11
* svn:special
* V 1
* *
* D 14
* svn:executable
* we keep track of whether a mode has been set and reset to
* plain file only if not. We should be keeping track of the
* symlink and executable bits separately instead.
*/
uint32_t type_set = 0;
while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) {
uint32_t len;
const char *val;
const char type = t[0];
int ch;
if (!type || t[1] != ' ')
die("invalid property line: %s\n", t);
len = atoi(&t[2]);
val = buffer_read_string(&input, len);
if (!val || strlen(val) != len)
die_short_read();
/* Discard trailing newline. */
ch = buffer_read_char(&input);
if (ch == EOF)
die_short_read();
if (ch != '\n')
die("invalid dump: expected newline after %s", val);
switch (type) {
case 'K':
key = pool_intern(val);
continue;
case 'D':
key = pool_intern(val);
val = NULL;
len = 0;
/* fall through */
case 'V':
handle_property(key, val, len, &type_set);
key = ~0;
continue;
default:
die("invalid property line: %s\n", t);
}
}
}
static void handle_node(void)
{
uint32_t mark = 0;
const uint32_t type = node_ctx.type;
const int have_props = node_ctx.propLength != LENGTH_UNKNOWN;
const int have_text = node_ctx.textLength != LENGTH_UNKNOWN;
if (node_ctx.text_delta)
die("text deltas not supported");
if (have_text)
mark = next_blob_mark();
if (node_ctx.action == NODEACT_DELETE) {
if (have_text || have_props || node_ctx.srcRev)
die("invalid dump: deletion node has "
"copyfrom info, text, or properties");
return repo_delete(node_ctx.dst);
}
if (node_ctx.action == NODEACT_REPLACE) {
repo_delete(node_ctx.dst);
node_ctx.action = NODEACT_ADD;
}
if (node_ctx.srcRev) {
repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst);
if (node_ctx.action == NODEACT_ADD)
node_ctx.action = NODEACT_CHANGE;
}
if (have_text && type == REPO_MODE_DIR)
die("invalid dump: directories cannot have text attached");
/*
* Decide on the new content (mark) and mode (node_ctx.type).
*/
if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) {
if (type != REPO_MODE_DIR)
die("invalid dump: root of tree is not a regular file");
} else if (node_ctx.action == NODEACT_CHANGE) {
uint32_t mode;
if (!have_text)
mark = repo_read_path(node_ctx.dst);
mode = repo_read_mode(node_ctx.dst);
if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR)
die("invalid dump: cannot modify a directory into a file");
if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
die("invalid dump: cannot modify a file into a directory");
node_ctx.type = mode;
} else if (node_ctx.action == NODEACT_ADD) {
if (!have_text && type != REPO_MODE_DIR)
die("invalid dump: adds node without text");
} else {
die("invalid dump: Node-path block lacks Node-action");
}
/*
* Adjust mode to reflect properties.
*/
if (have_props) {
if (!node_ctx.prop_delta)
node_ctx.type = type;
if (node_ctx.propLength)
read_props();
}
/*
* Save the result.
*/
repo_add(node_ctx.dst, node_ctx.type, mark);
if (have_text)
fast_export_blob(node_ctx.type, mark,
node_ctx.textLength, &input);
}
static void handle_revision(void)
{
if (rev_ctx.revision)
repo_commit(rev_ctx.revision, rev_ctx.author.buf,
rev_ctx.log.buf, dump_ctx.uuid.buf, dump_ctx.url.buf,
rev_ctx.timestamp);
}
void svndump_read(const char *url)
{
char *val;
char *t;
uint32_t active_ctx = DUMP_CTX;
uint32_t len;
uint32_t key;
reset_dump_ctx(url);
while ((t = buffer_read_line(&input))) {
val = strstr(t, ": ");
if (!val)
continue;
*val++ = '\0';
*val++ = '\0';
key = pool_intern(t);
if (key == keys.svn_fs_dump_format_version) {
dump_ctx.version = atoi(val);
if (dump_ctx.version > 3)
die("expected svn dump format version <= 3, found %"PRIu32,
dump_ctx.version);
} else if (key == keys.uuid) {
strbuf_reset(&dump_ctx.uuid);
strbuf_addstr(&dump_ctx.uuid, val);
} else if (key == keys.revision_number) {
if (active_ctx == NODE_CTX)
handle_node();
if (active_ctx != DUMP_CTX)
handle_revision();
active_ctx = REV_CTX;
reset_rev_ctx(atoi(val));
} else if (key == keys.node_path) {
if (active_ctx == NODE_CTX)
handle_node();
active_ctx = NODE_CTX;
reset_node_ctx(val);
} else if (key == keys.node_kind) {
if (!strcmp(val, "dir"))
node_ctx.type = REPO_MODE_DIR;
else if (!strcmp(val, "file"))
node_ctx.type = REPO_MODE_BLB;
else
fprintf(stderr, "Unknown node-kind: %s\n", val);
} else if (key == keys.node_action) {
if (!strcmp(val, "delete")) {
node_ctx.action = NODEACT_DELETE;
} else if (!strcmp(val, "add")) {
node_ctx.action = NODEACT_ADD;
} else if (!strcmp(val, "change")) {
node_ctx.action = NODEACT_CHANGE;
} else if (!strcmp(val, "replace")) {
node_ctx.action = NODEACT_REPLACE;
} else {
fprintf(stderr, "Unknown node-action: %s\n", val);
node_ctx.action = NODEACT_UNKNOWN;
}
} else if (key == keys.node_copyfrom_path) {
pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val);
} else if (key == keys.node_copyfrom_rev) {
node_ctx.srcRev = atoi(val);
} else if (key == keys.text_content_length) {
node_ctx.textLength = atoi(val);
} else if (key == keys.prop_content_length) {
node_ctx.propLength = atoi(val);
} else if (key == keys.text_delta) {
node_ctx.text_delta = !strcmp(val, "true");
} else if (key == keys.prop_delta) {
node_ctx.prop_delta = !strcmp(val, "true");
} else if (key == keys.content_length) {
len = atoi(val);
t = buffer_read_line(&input);
if (!t)
die_short_read();
if (*t)
die("invalid dump: expected blank line after content length header");
if (active_ctx == REV_CTX) {
read_props();
} else if (active_ctx == NODE_CTX) {
handle_node();
active_ctx = REV_CTX;
} else {
fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len);
if (buffer_skip_bytes(&input, len) != len)
die_short_read();
}
}
}
if (buffer_ferror(&input))
die_short_read();
if (active_ctx == NODE_CTX)
handle_node();
if (active_ctx != DUMP_CTX)
handle_revision();
}
int svndump_init(const char *filename)
{
if (buffer_init(&input, filename))
return error("cannot open %s: %s", filename, strerror(errno));
repo_init();
strbuf_init(&dump_ctx.uuid, 4096);
strbuf_init(&dump_ctx.url, 4096);
strbuf_init(&rev_ctx.log, 4096);
strbuf_init(&rev_ctx.author, 4096);
reset_dump_ctx(NULL);
reset_rev_ctx(0);
reset_node_ctx(NULL);
init_keys();
return 0;
}
void svndump_deinit(void)
{
repo_reset();
reset_dump_ctx(NULL);
reset_rev_ctx(0);
reset_node_ctx(NULL);
strbuf_release(&rev_ctx.log);
if (buffer_deinit(&input))
fprintf(stderr, "Input error\n");
if (ferror(stdout))
fprintf(stderr, "Output error\n");
}
void svndump_reset(void)
{
buffer_reset(&input);
repo_reset();
strbuf_release(&dump_ctx.uuid);
strbuf_release(&dump_ctx.url);
strbuf_release(&rev_ctx.log);
strbuf_release(&rev_ctx.author);
}