mirrors/git - Incest Forge: Beyond sex. We incest.

mirrors/git

mirror of https://github.com/git/git.git synced 2024-11-05 00:37:55 +01:00

826 lines

20 KiB

C

Raw Normal View History

Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00			`#include "cache.h"`
			`#include "commit.h"`
			`#include "utf8.h"`
			`#include "diff.h"`
			`#include "revision.h"`

			`static struct cmt_fmt_map {`
			`const char *n;`
			`size_t cmp_len;`
			`enum cmit_fmt v;`
			`} cmt_fmts[] = {`
			`{ "raw", 1, CMIT_FMT_RAW },`
			`{ "medium", 1, CMIT_FMT_MEDIUM },`
			`{ "short", 1, CMIT_FMT_SHORT },`
			`{ "email", 1, CMIT_FMT_EMAIL },`
			`{ "full", 5, CMIT_FMT_FULL },`
			`{ "fuller", 5, CMIT_FMT_FULLER },`
			`{ "oneline", 1, CMIT_FMT_ONELINE },`
			`{ "format:", 7, CMIT_FMT_USERFORMAT},`
			`};`

			`static char *user_format;`

			`enum cmit_fmt get_commit_format(const char *arg)`
			`{`
			`int i;`

			`if (!arg \|\| !*arg)`
			`return CMIT_FMT_DEFAULT;`
			`if (*arg == '=')`
			`arg++;`
			`if (!prefixcmp(arg, "format:")) {`
			`if (user_format)`
			`free(user_format);`
			`user_format = xstrdup(arg + 7);`
			`return CMIT_FMT_USERFORMAT;`
			`}`
			`for (i = 0; i < ARRAY_SIZE(cmt_fmts); i++) {`
			`if (!strncmp(arg, cmt_fmts[i].n, cmt_fmts[i].cmp_len) &&`
			`!strncmp(arg, cmt_fmts[i].n, strlen(arg)))`
			`return cmt_fmts[i].v;`
			`}`

			`die("invalid --pretty format: %s", arg);`
			`}`

			`/*`
			`* Generic support for pretty-printing the header`
			`*/`
			`static int get_one_line(const char *msg)`
			`{`
			`int ret = 0;`

			`for (;;) {`
			`char c = *msg++;`
			`if (!c)`
			`break;`
			`ret++;`
			`if (c == '\n')`
			`break;`
			`}`
			`return ret;`
			`}`

			`/* High bit set, or ISO-2022-INT */`
			`int non_ascii(int ch)`
			`{`
			`ch = (ch & 0xff);`
			`return ((ch & 0x80) \|\| (ch == 0x1b));`
			`}`

			`static int is_rfc2047_special(char ch)`
			`{`
			`return (non_ascii(ch) \|\| (ch == '=') \|\| (ch == '?') \|\| (ch == '_'));`
			`}`

			`static void add_rfc2047(struct strbuf sb, const char line, int len,`
			`const char *encoding)`
			`{`
			`int i, last;`

			`for (i = 0; i < len; i++) {`
			`int ch = line[i];`
			`if (non_ascii(ch))`
			`goto needquote;`
			`if ((i + 1 < len) && (ch == '=' && line[i+1] == '?'))`
			`goto needquote;`
			`}`
			`strbuf_add(sb, line, len);`
			`return;`

			`needquote:`
			`strbuf_grow(sb, len * 3 + strlen(encoding) + 100);`
			`strbuf_addf(sb, "=?%s?q?", encoding);`
			`for (i = last = 0; i < len; i++) {`
			`unsigned ch = line[i] & 0xFF;`
			`/*`
			`* We encode ' ' using '=20' even though rfc2047`
			`* allows using '_' for readability. Unfortunately,`
			`* many programs do not understand this and just`
			`* leave the underscore in place.`
			`*/`
			`if (is_rfc2047_special(ch) \|\| ch == ' ') {`
			`strbuf_add(sb, line + last, i - last);`
			`strbuf_addf(sb, "=%02X", ch);`
			`last = i + 1;`
			`}`
			`}`
			`strbuf_add(sb, line + last, len - last);`
			`strbuf_addstr(sb, "?=");`
			`}`

			`static void add_user_info(const char what, enum cmit_fmt fmt, struct strbuf sb,`
			`const char *line, enum date_mode dmode,`
			`const char *encoding)`
			`{`
			`char *date;`
			`int namelen;`
			`unsigned long time;`
			`int tz;`
			`const char *filler = " ";`

			`if (fmt == CMIT_FMT_ONELINE)`
			`return;`
			`date = strchr(line, '>');`
			`if (!date)`
			`return;`
			`namelen = ++date - line;`
			`time = strtoul(date, &date, 10);`
			`tz = strtol(date, NULL, 10);`

			`if (fmt == CMIT_FMT_EMAIL) {`
			`char *name_tail = strchr(line, '<');`
			`int display_name_length;`
			`if (!name_tail)`
			`return;`
			`while (line < name_tail && isspace(name_tail[-1]))`
			`name_tail--;`
			`display_name_length = name_tail - line;`
			`filler = "";`
			`strbuf_addstr(sb, "From: ");`
			`add_rfc2047(sb, line, display_name_length, encoding);`
			`strbuf_add(sb, name_tail, namelen - display_name_length);`
			`strbuf_addch(sb, '\n');`
			`} else {`
			`strbuf_addf(sb, "%s: %.s%.s\n", what,`
			`(fmt == CMIT_FMT_FULLER) ? 4 : 0,`
			`filler, namelen, line);`
			`}`
			`switch (fmt) {`
			`case CMIT_FMT_MEDIUM:`
			`strbuf_addf(sb, "Date: %s\n", show_date(time, tz, dmode));`
			`break;`
			`case CMIT_FMT_EMAIL:`
			`strbuf_addf(sb, "Date: %s\n", show_date(time, tz, DATE_RFC2822));`
			`break;`
			`case CMIT_FMT_FULLER:`
			`strbuf_addf(sb, "%sDate: %s\n", what, show_date(time, tz, dmode));`
			`break;`
			`default:`
			`/* notin' */`
			`break;`
			`}`
			`}`

			`static int is_empty_line(const char line, int len_p)`
			`{`
			`int len = *len_p;`
			`while (len && isspace(line[len-1]))`
			`len--;`
			`*len_p = len;`
			`return !len;`
			`}`

			`static void add_merge_info(enum cmit_fmt fmt, struct strbuf *sb,`
			`const struct commit *commit, int abbrev)`
			`{`
			`struct commit_list *parent = commit->parents;`

			`if ((fmt == CMIT_FMT_ONELINE) \|\| (fmt == CMIT_FMT_EMAIL) \|\|`
			`!parent \|\| !parent->next)`
			`return;`

			`strbuf_addstr(sb, "Merge:");`

			`while (parent) {`
			`struct commit *p = parent->item;`
			`const char *hex = NULL;`
			`const char *dots;`
			`if (abbrev)`
			`hex = find_unique_abbrev(p->object.sha1, abbrev);`
			`if (!hex)`
			`hex = sha1_to_hex(p->object.sha1);`
			`dots = (abbrev && strlen(hex) != 40) ? "..." : "";`
			`parent = parent->next;`

			`strbuf_addf(sb, " %s%s", hex, dots);`
			`}`
			`strbuf_addch(sb, '\n');`
			`}`

			`static char get_header(const struct commit commit, const char *key)`
			`{`
			`int key_len = strlen(key);`
			`const char *line = commit->buffer;`

			`for (;;) {`
			`const char eol = strchr(line, '\n'), next;`

			`if (line == eol)`
			`return NULL;`
			`if (!eol) {`
			`eol = line + strlen(line);`
			`next = NULL;`
			`} else`
			`next = eol + 1;`
			`if (eol - line > key_len &&`
			`!strncmp(line, key, key_len) &&`
			`line[key_len] == ' ') {`
			`return xmemdupz(line + key_len + 1, eol - line - key_len - 1);`
			`}`
			`line = next;`
			`}`
			`}`

			`static char replace_encoding_header(char buf, const char *encoding)`
			`{`
			`struct strbuf tmp;`
			`size_t start, len;`
			`char *cp = buf;`

			`/* guess if there is an encoding header before a \n\n */`
			`while (strncmp(cp, "encoding ", strlen("encoding "))) {`
			`cp = strchr(cp, '\n');`
			`if (!cp \|\| *++cp == '\n')`
			`return buf;`
			`}`
			`start = cp - buf;`
			`cp = strchr(cp, '\n');`
			`if (!cp)`
			`return buf; /* should not happen but be defensive */`
			`len = cp + 1 - (buf + start);`

			`strbuf_init(&tmp, 0);`
			`strbuf_attach(&tmp, buf, strlen(buf), strlen(buf) + 1);`
			`if (is_encoding_utf8(encoding)) {`
			`/* we have re-coded to UTF-8; drop the header */`
			`strbuf_remove(&tmp, start, len);`
			`} else {`
			`/* just replaces XXXX in 'encoding XXXX\n' */`
			`strbuf_splice(&tmp, start + strlen("encoding "),`
			`len - strlen("encoding \n"),`
			`encoding, strlen(encoding));`
			`}`
			`return strbuf_detach(&tmp, NULL);`
			`}`

			`static char logmsg_reencode(const struct commit commit,`
			`const char *output_encoding)`
			`{`
			`static const char *utf8 = "utf-8";`
			`const char *use_encoding;`
			`char *encoding;`
			`char *out;`

			`if (!*output_encoding)`
			`return NULL;`
			`encoding = get_header(commit, "encoding");`
			`use_encoding = encoding ? encoding : utf8;`
			`if (!strcmp(use_encoding, output_encoding))`
			`if (encoding) /* we'll strip encoding header later */`
			`out = xstrdup(commit->buffer);`
			`else`
			`return NULL; /* nothing to do */`
			`else`
			`out = reencode_string(commit->buffer,`
			`output_encoding, use_encoding);`
			`if (out)`
			`out = replace_encoding_header(out, output_encoding);`

			`free(encoding);`
			`return out;`
			`}`

--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`static void format_person_part(struct strbuf *sb, char part,`
			`const char *msg, int len)`
Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00			`{`
			`int start, end, tz = 0;`
			`unsigned long date;`
			`char *ep;`

			`/* parse name */`
			`for (end = 0; end < len && msg[end] != '<'; end++)`
			`; /* do nothing */`
			`start = end + 1;`
			`while (end > 0 && isspace(msg[end - 1]))`
			`end--;`
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`if (part == 'n') { /* name */`
			`strbuf_add(sb, msg, end);`
			`return;`
			`}`
Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00
			`if (start >= len)`
			`return;`

			`/* parse email */`
			`for (end = start + 1; end < len && msg[end] != '>'; end++)`
			`; /* do nothing */`

			`if (end >= len)`
			`return;`

--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`if (part == 'e') { /* email */`
			`strbuf_add(sb, msg + start, end - start);`
			`return;`
			`}`
Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00
			`/* parse date */`
			`for (start = end + 1; start < len && isspace(msg[start]); start++)`
			`; /* do nothing */`
			`if (start >= len)`
			`return;`
			`date = strtoul(msg + start, &ep, 10);`
			`if (msg + start == ep)`
			`return;`

--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`if (part == 't') { /* date, UNIX timestamp */`
			`strbuf_add(sb, msg + start, ep - (msg + start));`
			`return;`
			`}`
Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00
			`/* parse tz */`
			`for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)`
			`; /* do nothing */`
			`if (start + 1 < len) {`
			`tz = strtoul(msg + start + 1, NULL, 10);`
			`if (msg[start] == '-')`
			`tz = -tz;`
			`}`

--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`switch (part) {`
			`case 'd': /* date */`
			`strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));`
			`return;`
			`case 'D': /* date, RFC2822 style */`
			`strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));`
			`return;`
			`case 'r': /* date, relative */`
			`strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));`
			`return;`
			`case 'i': /* date, ISO 8601 */`
			`strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));`
			`return;`
			`}`
Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00			`}`

--pretty=format: parse commit message only once As Jeff King pointed out, some placeholder expansions are related to each other: the steps to calculate one go most of the way towards calculating the other, too. This patch makes format_commit_message() parse the commit message only once, remembering the position of each item. This speeds up handling of format strings containing multiple placeholders from the set %s, %a, %c, %e, %b. Here are the timings for the git version in next. The first one is to estimate the overhead of the caching, the second one is taken from http://svn.tue.mpg.de/tentakel/trunk/tentakel/Makefile as an example of a format string found in the wild. The times are the fastest of three consecutive runs in each case: $ time git log --pretty=format:%e >/dev/null real 0m0.381s user 0m0.340s sys 0m0.024s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.623s user 0m0.556s sys 0m0.052s And here the times with this patch: $ time git log --pretty=format:%e >/dev/null real 0m0.385s user 0m0.332s sys 0m0.040s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.563s user 0m0.504s sys 0m0.048s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:14:20 +01:00			`struct chunk {`
			`size_t off;`
			`size_t len;`
			`};`

			`struct format_commit_context {`
			`const struct commit *commit;`

			`/* These offsets are relative to the start of the commit message. */`
			`int commit_header_parsed;`
			`struct chunk subject;`
			`struct chunk author;`
			`struct chunk committer;`
			`struct chunk encoding;`
			`size_t body_off;`
--format=pretty: avoid calculating expensive expansions twice As Jeff King remarked, format strings with duplicate placeholders can be slow to expand, because each instance is calculated anew. This patch makes use of the fact that format_commit_message() and its helper functions only ever add stuff to the end of the strbuf. For certain expensive placeholders, store the offset and length of their expansion with the strbuf at the first occurrence. Later they expansion result can simply be copied from there -- no malloc() or strdup() required. These certain placeholders are the abbreviated commit, tree and parent hashes, as the search for a unique abbreviated hash is quite costly. Here are the times for next (best of three runs): $ time git log --pretty=format:%h >/dev/null real 0m0.611s user 0m0.404s sys 0m0.204s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m1.206s user 0m0.744s sys 0m0.452s And here those with this patch (and the previous two); the speedup of the single placeholder case is just noise: $ time git log --pretty=format:%h >/dev/null real 0m0.608s user 0m0.416s sys 0m0.192s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m0.639s user 0m0.488s sys 0m0.140s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:18:26 +01:00
			`/* The following ones are relative to the result struct strbuf. */`
			`struct chunk abbrev_commit_hash;`
			`struct chunk abbrev_tree_hash;`
			`struct chunk abbrev_parent_hashes;`
--pretty=format: parse commit message only once As Jeff King pointed out, some placeholder expansions are related to each other: the steps to calculate one go most of the way towards calculating the other, too. This patch makes format_commit_message() parse the commit message only once, remembering the position of each item. This speeds up handling of format strings containing multiple placeholders from the set %s, %a, %c, %e, %b. Here are the timings for the git version in next. The first one is to estimate the overhead of the caching, the second one is taken from http://svn.tue.mpg.de/tentakel/trunk/tentakel/Makefile as an example of a format string found in the wild. The times are the fastest of three consecutive runs in each case: $ time git log --pretty=format:%e >/dev/null real 0m0.381s user 0m0.340s sys 0m0.024s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.623s user 0m0.556s sys 0m0.052s And here the times with this patch: $ time git log --pretty=format:%e >/dev/null real 0m0.385s user 0m0.332s sys 0m0.040s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.563s user 0m0.504s sys 0m0.048s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:14:20 +01:00			`};`

--format=pretty: avoid calculating expensive expansions twice As Jeff King remarked, format strings with duplicate placeholders can be slow to expand, because each instance is calculated anew. This patch makes use of the fact that format_commit_message() and its helper functions only ever add stuff to the end of the strbuf. For certain expensive placeholders, store the offset and length of their expansion with the strbuf at the first occurrence. Later they expansion result can simply be copied from there -- no malloc() or strdup() required. These certain placeholders are the abbreviated commit, tree and parent hashes, as the search for a unique abbreviated hash is quite costly. Here are the times for next (best of three runs): $ time git log --pretty=format:%h >/dev/null real 0m0.611s user 0m0.404s sys 0m0.204s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m1.206s user 0m0.744s sys 0m0.452s And here those with this patch (and the previous two); the speedup of the single placeholder case is just noise: $ time git log --pretty=format:%h >/dev/null real 0m0.608s user 0m0.416s sys 0m0.192s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m0.639s user 0m0.488s sys 0m0.140s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:18:26 +01:00			`static int add_again(struct strbuf sb, struct chunk chunk)`
			`{`
			`if (chunk->len) {`
			`strbuf_adddup(sb, chunk->off, chunk->len);`
			`return 1;`
			`}`

			`/*`
			`* We haven't seen this chunk before. Our caller is surely`
			`* going to add it the hard way now. Remember the most likely`
			`* start of the to-be-added chunk: the current end of the`
			`* struct strbuf.`
			`*/`
			`chunk->off = sb->len;`
			`return 0;`
			`}`

--pretty=format: parse commit message only once As Jeff King pointed out, some placeholder expansions are related to each other: the steps to calculate one go most of the way towards calculating the other, too. This patch makes format_commit_message() parse the commit message only once, remembering the position of each item. This speeds up handling of format strings containing multiple placeholders from the set %s, %a, %c, %e, %b. Here are the timings for the git version in next. The first one is to estimate the overhead of the caching, the second one is taken from http://svn.tue.mpg.de/tentakel/trunk/tentakel/Makefile as an example of a format string found in the wild. The times are the fastest of three consecutive runs in each case: $ time git log --pretty=format:%e >/dev/null real 0m0.381s user 0m0.340s sys 0m0.024s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.623s user 0m0.556s sys 0m0.052s And here the times with this patch: $ time git log --pretty=format:%e >/dev/null real 0m0.385s user 0m0.332s sys 0m0.040s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.563s user 0m0.504s sys 0m0.048s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:14:20 +01:00			`static void parse_commit_header(struct format_commit_context *context)`
Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00			`{`
--pretty=format: parse commit message only once As Jeff King pointed out, some placeholder expansions are related to each other: the steps to calculate one go most of the way towards calculating the other, too. This patch makes format_commit_message() parse the commit message only once, remembering the position of each item. This speeds up handling of format strings containing multiple placeholders from the set %s, %a, %c, %e, %b. Here are the timings for the git version in next. The first one is to estimate the overhead of the caching, the second one is taken from http://svn.tue.mpg.de/tentakel/trunk/tentakel/Makefile as an example of a format string found in the wild. The times are the fastest of three consecutive runs in each case: $ time git log --pretty=format:%e >/dev/null real 0m0.381s user 0m0.340s sys 0m0.024s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.623s user 0m0.556s sys 0m0.052s And here the times with this patch: $ time git log --pretty=format:%e >/dev/null real 0m0.385s user 0m0.332s sys 0m0.040s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.563s user 0m0.504s sys 0m0.048s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:14:20 +01:00			`const char *msg = context->commit->buffer;`
Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00			`int i;`
			`enum { HEADER, SUBJECT, BODY } state;`
--pretty=format: parse commit message only once As Jeff King pointed out, some placeholder expansions are related to each other: the steps to calculate one go most of the way towards calculating the other, too. This patch makes format_commit_message() parse the commit message only once, remembering the position of each item. This speeds up handling of format strings containing multiple placeholders from the set %s, %a, %c, %e, %b. Here are the timings for the git version in next. The first one is to estimate the overhead of the caching, the second one is taken from http://svn.tue.mpg.de/tentakel/trunk/tentakel/Makefile as an example of a format string found in the wild. The times are the fastest of three consecutive runs in each case: $ time git log --pretty=format:%e >/dev/null real 0m0.381s user 0m0.340s sys 0m0.024s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.623s user 0m0.556s sys 0m0.052s And here the times with this patch: $ time git log --pretty=format:%e >/dev/null real 0m0.385s user 0m0.332s sys 0m0.040s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.563s user 0m0.504s sys 0m0.048s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:14:20 +01:00
			`for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {`
			`int eol;`
			`for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)`
			`; /* do nothing */`

			`if (state == SUBJECT) {`
			`context->subject.off = i;`
			`context->subject.len = eol - i;`
			`i = eol;`
			`}`
			`if (i == eol) {`
			`state++;`
			`/* strip empty lines */`
			`while (msg[eol + 1] == '\n')`
			`eol++;`
			`} else if (!prefixcmp(msg + i, "author ")) {`
			`context->author.off = i + 7;`
			`context->author.len = eol - i - 7;`
			`} else if (!prefixcmp(msg + i, "committer ")) {`
			`context->committer.off = i + 10;`
			`context->committer.len = eol - i - 10;`
			`} else if (!prefixcmp(msg + i, "encoding ")) {`
			`context->encoding.off = i + 9;`
			`context->encoding.len = eol - i - 9;`
			`}`
			`i = eol;`
			`}`
			`context->body_off = i;`
			`context->commit_header_parsed = 1;`
			`}`

			`static void format_commit_item(struct strbuf sb, const char placeholder,`
			`void *context)`
			`{`
			`struct format_commit_context *c = context;`
			`const struct commit *commit = c->commit;`
Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00			`const char *msg = commit->buffer;`
--pretty=format: parse commit message only once As Jeff King pointed out, some placeholder expansions are related to each other: the steps to calculate one go most of the way towards calculating the other, too. This patch makes format_commit_message() parse the commit message only once, remembering the position of each item. This speeds up handling of format strings containing multiple placeholders from the set %s, %a, %c, %e, %b. Here are the timings for the git version in next. The first one is to estimate the overhead of the caching, the second one is taken from http://svn.tue.mpg.de/tentakel/trunk/tentakel/Makefile as an example of a format string found in the wild. The times are the fastest of three consecutive runs in each case: $ time git log --pretty=format:%e >/dev/null real 0m0.381s user 0m0.340s sys 0m0.024s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.623s user 0m0.556s sys 0m0.052s And here the times with this patch: $ time git log --pretty=format:%e >/dev/null real 0m0.385s user 0m0.332s sys 0m0.040s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.563s user 0m0.504s sys 0m0.048s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:14:20 +01:00			`struct commit_list *p;`
Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00
			`/* these are independent of the commit */`
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`switch (placeholder[0]) {`
			`case 'C':`
			`switch (placeholder[3]) {`
			`case 'd': /* red */`
			`strbuf_addstr(sb, "\033[31m");`
			`return;`
			`case 'e': /* green */`
			`strbuf_addstr(sb, "\033[32m");`
			`return;`
			`case 'u': /* blue */`
			`strbuf_addstr(sb, "\033[34m");`
			`return;`
			`case 's': /* reset color */`
			`strbuf_addstr(sb, "\033[m");`
			`return;`
			`}`
			`case 'n': /* newline */`
			`strbuf_addch(sb, '\n');`
			`return;`
			`}`
Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00
			`/* these depend on the commit */`
			`if (!commit->object.parsed)`
			`parse_object(commit->object.sha1);`

--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`switch (placeholder[0]) {`
			`case 'H': /* commit hash */`
			`strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));`
			`return;`
			`case 'h': /* abbreviated commit hash */`
--format=pretty: avoid calculating expensive expansions twice As Jeff King remarked, format strings with duplicate placeholders can be slow to expand, because each instance is calculated anew. This patch makes use of the fact that format_commit_message() and its helper functions only ever add stuff to the end of the strbuf. For certain expensive placeholders, store the offset and length of their expansion with the strbuf at the first occurrence. Later they expansion result can simply be copied from there -- no malloc() or strdup() required. These certain placeholders are the abbreviated commit, tree and parent hashes, as the search for a unique abbreviated hash is quite costly. Here are the times for next (best of three runs): $ time git log --pretty=format:%h >/dev/null real 0m0.611s user 0m0.404s sys 0m0.204s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m1.206s user 0m0.744s sys 0m0.452s And here those with this patch (and the previous two); the speedup of the single placeholder case is just noise: $ time git log --pretty=format:%h >/dev/null real 0m0.608s user 0m0.416s sys 0m0.192s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m0.639s user 0m0.488s sys 0m0.140s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:18:26 +01:00			`if (add_again(sb, &c->abbrev_commit_hash))`
			`return;`
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,`
			`DEFAULT_ABBREV));`
--format=pretty: avoid calculating expensive expansions twice As Jeff King remarked, format strings with duplicate placeholders can be slow to expand, because each instance is calculated anew. This patch makes use of the fact that format_commit_message() and its helper functions only ever add stuff to the end of the strbuf. For certain expensive placeholders, store the offset and length of their expansion with the strbuf at the first occurrence. Later they expansion result can simply be copied from there -- no malloc() or strdup() required. These certain placeholders are the abbreviated commit, tree and parent hashes, as the search for a unique abbreviated hash is quite costly. Here are the times for next (best of three runs): $ time git log --pretty=format:%h >/dev/null real 0m0.611s user 0m0.404s sys 0m0.204s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m1.206s user 0m0.744s sys 0m0.452s And here those with this patch (and the previous two); the speedup of the single placeholder case is just noise: $ time git log --pretty=format:%h >/dev/null real 0m0.608s user 0m0.416s sys 0m0.192s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m0.639s user 0m0.488s sys 0m0.140s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:18:26 +01:00			`c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;`
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`return;`
			`case 'T': /* tree hash */`
			`strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));`
			`return;`
			`case 't': /* abbreviated tree hash */`
--format=pretty: avoid calculating expensive expansions twice As Jeff King remarked, format strings with duplicate placeholders can be slow to expand, because each instance is calculated anew. This patch makes use of the fact that format_commit_message() and its helper functions only ever add stuff to the end of the strbuf. For certain expensive placeholders, store the offset and length of their expansion with the strbuf at the first occurrence. Later they expansion result can simply be copied from there -- no malloc() or strdup() required. These certain placeholders are the abbreviated commit, tree and parent hashes, as the search for a unique abbreviated hash is quite costly. Here are the times for next (best of three runs): $ time git log --pretty=format:%h >/dev/null real 0m0.611s user 0m0.404s sys 0m0.204s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m1.206s user 0m0.744s sys 0m0.452s And here those with this patch (and the previous two); the speedup of the single placeholder case is just noise: $ time git log --pretty=format:%h >/dev/null real 0m0.608s user 0m0.416s sys 0m0.192s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m0.639s user 0m0.488s sys 0m0.140s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:18:26 +01:00			`if (add_again(sb, &c->abbrev_tree_hash))`
			`return;`
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,`
			`DEFAULT_ABBREV));`
--format=pretty: avoid calculating expensive expansions twice As Jeff King remarked, format strings with duplicate placeholders can be slow to expand, because each instance is calculated anew. This patch makes use of the fact that format_commit_message() and its helper functions only ever add stuff to the end of the strbuf. For certain expensive placeholders, store the offset and length of their expansion with the strbuf at the first occurrence. Later they expansion result can simply be copied from there -- no malloc() or strdup() required. These certain placeholders are the abbreviated commit, tree and parent hashes, as the search for a unique abbreviated hash is quite costly. Here are the times for next (best of three runs): $ time git log --pretty=format:%h >/dev/null real 0m0.611s user 0m0.404s sys 0m0.204s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m1.206s user 0m0.744s sys 0m0.452s And here those with this patch (and the previous two); the speedup of the single placeholder case is just noise: $ time git log --pretty=format:%h >/dev/null real 0m0.608s user 0m0.416s sys 0m0.192s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m0.639s user 0m0.488s sys 0m0.140s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:18:26 +01:00			`c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off;`
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`return;`
			`case 'P': /* parent hashes */`
			`for (p = commit->parents; p; p = p->next) {`
			`if (p != commit->parents)`
			`strbuf_addch(sb, ' ');`
			`strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));`
			`}`
			`return;`
			`case 'p': /* abbreviated parent hashes */`
--format=pretty: avoid calculating expensive expansions twice As Jeff King remarked, format strings with duplicate placeholders can be slow to expand, because each instance is calculated anew. This patch makes use of the fact that format_commit_message() and its helper functions only ever add stuff to the end of the strbuf. For certain expensive placeholders, store the offset and length of their expansion with the strbuf at the first occurrence. Later they expansion result can simply be copied from there -- no malloc() or strdup() required. These certain placeholders are the abbreviated commit, tree and parent hashes, as the search for a unique abbreviated hash is quite costly. Here are the times for next (best of three runs): $ time git log --pretty=format:%h >/dev/null real 0m0.611s user 0m0.404s sys 0m0.204s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m1.206s user 0m0.744s sys 0m0.452s And here those with this patch (and the previous two); the speedup of the single placeholder case is just noise: $ time git log --pretty=format:%h >/dev/null real 0m0.608s user 0m0.416s sys 0m0.192s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m0.639s user 0m0.488s sys 0m0.140s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:18:26 +01:00			`if (add_again(sb, &c->abbrev_parent_hashes))`
			`return;`
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`for (p = commit->parents; p; p = p->next) {`
			`if (p != commit->parents)`
			`strbuf_addch(sb, ' ');`
			`strbuf_addstr(sb, find_unique_abbrev(`
			`p->item->object.sha1, DEFAULT_ABBREV));`
			`}`
--format=pretty: avoid calculating expensive expansions twice As Jeff King remarked, format strings with duplicate placeholders can be slow to expand, because each instance is calculated anew. This patch makes use of the fact that format_commit_message() and its helper functions only ever add stuff to the end of the strbuf. For certain expensive placeholders, store the offset and length of their expansion with the strbuf at the first occurrence. Later they expansion result can simply be copied from there -- no malloc() or strdup() required. These certain placeholders are the abbreviated commit, tree and parent hashes, as the search for a unique abbreviated hash is quite costly. Here are the times for next (best of three runs): $ time git log --pretty=format:%h >/dev/null real 0m0.611s user 0m0.404s sys 0m0.204s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m1.206s user 0m0.744s sys 0m0.452s And here those with this patch (and the previous two); the speedup of the single placeholder case is just noise: $ time git log --pretty=format:%h >/dev/null real 0m0.608s user 0m0.416s sys 0m0.192s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m0.639s user 0m0.488s sys 0m0.140s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:18:26 +01:00			`c->abbrev_parent_hashes.len = sb->len -`
			`c->abbrev_parent_hashes.off;`
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`return;`
			`case 'm': /* left/right/bottom */`
			`strbuf_addch(sb, (commit->object.flags & BOUNDARY)`
			`? '-'`
			`: (commit->object.flags & SYMMETRIC_LEFT)`
			`? '<'`
			`: '>');`
			`return;`
			`}`

			`/* For the rest we have to parse the commit header. */`
--pretty=format: parse commit message only once As Jeff King pointed out, some placeholder expansions are related to each other: the steps to calculate one go most of the way towards calculating the other, too. This patch makes format_commit_message() parse the commit message only once, remembering the position of each item. This speeds up handling of format strings containing multiple placeholders from the set %s, %a, %c, %e, %b. Here are the timings for the git version in next. The first one is to estimate the overhead of the caching, the second one is taken from http://svn.tue.mpg.de/tentakel/trunk/tentakel/Makefile as an example of a format string found in the wild. The times are the fastest of three consecutive runs in each case: $ time git log --pretty=format:%e >/dev/null real 0m0.381s user 0m0.340s sys 0m0.024s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.623s user 0m0.556s sys 0m0.052s And here the times with this patch: $ time git log --pretty=format:%e >/dev/null real 0m0.385s user 0m0.332s sys 0m0.040s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.563s user 0m0.504s sys 0m0.048s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:14:20 +01:00			`if (!c->commit_header_parsed)`
			`parse_commit_header(c);`
Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00
--pretty=format: parse commit message only once As Jeff King pointed out, some placeholder expansions are related to each other: the steps to calculate one go most of the way towards calculating the other, too. This patch makes format_commit_message() parse the commit message only once, remembering the position of each item. This speeds up handling of format strings containing multiple placeholders from the set %s, %a, %c, %e, %b. Here are the timings for the git version in next. The first one is to estimate the overhead of the caching, the second one is taken from http://svn.tue.mpg.de/tentakel/trunk/tentakel/Makefile as an example of a format string found in the wild. The times are the fastest of three consecutive runs in each case: $ time git log --pretty=format:%e >/dev/null real 0m0.381s user 0m0.340s sys 0m0.024s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.623s user 0m0.556s sys 0m0.052s And here the times with this patch: $ time git log --pretty=format:%e >/dev/null real 0m0.385s user 0m0.332s sys 0m0.040s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.563s user 0m0.504s sys 0m0.048s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:14:20 +01:00			`switch (placeholder[0]) {`
			`case 's':`
			`strbuf_add(sb, msg + c->subject.off, c->subject.len);`
			`return;`
			`case 'a':`
			`format_person_part(sb, placeholder[1],`
			`msg + c->author.off, c->author.len);`
			`return;`
			`case 'c':`
			`format_person_part(sb, placeholder[1],`
			`msg + c->committer.off, c->committer.len);`
			`return;`
			`case 'e':`
			`strbuf_add(sb, msg + c->encoding.off, c->encoding.len);`
			`return;`
			`case 'b':`
			`strbuf_addstr(sb, msg + c->body_off);`
			`return;`
Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00			`}`
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-09 01:49:42 +01:00			`}`

			`void format_commit_message(const struct commit *commit,`
			`const void format, struct strbuf sb)`
			`{`
			`const char *placeholders[] = {`
			`"H", /* commit hash */`
			`"h", /* abbreviated commit hash */`
			`"T", /* tree hash */`
			`"t", /* abbreviated tree hash */`
			`"P", /* parent hashes */`
			`"p", /* abbreviated parent hashes */`
			`"an", /* author name */`
			`"ae", /* author email */`
			`"ad", /* author date */`
			`"aD", /* author date, RFC2822 style */`
			`"ar", /* author date, relative */`
			`"at", /* author date, UNIX timestamp */`
			`"ai", /* author date, ISO 8601 */`
			`"cn", /* committer name */`
			`"ce", /* committer email */`
			`"cd", /* committer date */`
			`"cD", /* committer date, RFC2822 style */`
			`"cr", /* committer date, relative */`
			`"ct", /* committer date, UNIX timestamp */`
			`"ci", /* committer date, ISO 8601 */`
			`"e", /* encoding */`
			`"s", /* subject */`
			`"b", /* body */`
			`"Cred", /* red */`
			`"Cgreen", /* green */`
			`"Cblue", /* blue */`
			`"Creset", /* reset color */`
			`"n", /* newline */`
			`"m", /* left/right/bottom */`
			`NULL`
			`};`
--pretty=format: parse commit message only once As Jeff King pointed out, some placeholder expansions are related to each other: the steps to calculate one go most of the way towards calculating the other, too. This patch makes format_commit_message() parse the commit message only once, remembering the position of each item. This speeds up handling of format strings containing multiple placeholders from the set %s, %a, %c, %e, %b. Here are the timings for the git version in next. The first one is to estimate the overhead of the caching, the second one is taken from http://svn.tue.mpg.de/tentakel/trunk/tentakel/Makefile as an example of a format string found in the wild. The times are the fastest of three consecutive runs in each case: $ time git log --pretty=format:%e >/dev/null real 0m0.381s user 0m0.340s sys 0m0.024s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.623s user 0m0.556s sys 0m0.052s And here the times with this patch: $ time git log --pretty=format:%e >/dev/null real 0m0.385s user 0m0.332s sys 0m0.040s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.563s user 0m0.504s sys 0m0.048s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-10 12:14:20 +01:00			`struct format_commit_context context;`

			`memset(&context, 0, sizeof(context));`
			`context.commit = commit;`
			`strbuf_expand(sb, format, placeholders, format_commit_item, &context);`
Split off the pretty print stuff into its own file The file commit.c got quite large, but it does not have to be: the code concerning pretty printing is pretty well contained. In fact, this commit just splits it off into pretty.c, leaving commit.c with just 672 lines. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com> 2007-11-04 20:15:06 +01:00			`}`

			`static void pp_header(enum cmit_fmt fmt,`
			`int abbrev,`
			`enum date_mode dmode,`
			`const char *encoding,`
			`const struct commit *commit,`
			`const char **msg_p,`
			`struct strbuf *sb)`
			`{`
			`int parents_shown = 0;`

			`for (;;) {`
			`const char line = msg_p;`
			`int linelen = get_one_line(*msg_p);`

			`if (!linelen)`
			`return;`
			`*msg_p += linelen;`

			`if (linelen == 1)`
			`/* End of header */`
			`return;`

			`if (fmt == CMIT_FMT_RAW) {`
			`strbuf_add(sb, line, linelen);`
			`continue;`
			`}`

			`if (!memcmp(line, "parent ", 7)) {`
			`if (linelen != 48)`
			`die("bad parent line in commit");`
			`continue;`
			`}`

			`if (!parents_shown) {`
			`struct commit_list *parent;`
			`int num;`
			`for (parent = commit->parents, num = 0;`
			`parent;`
			`parent = parent->next, num++)`
			`;`
			`/* with enough slop */`
			`strbuf_grow(sb, num * 50 + 20);`
			`add_merge_info(fmt, sb, commit, abbrev);`
			`parents_shown = 1;`
			`}`

			`/*`
			`* MEDIUM == DEFAULT shows only author with dates.`
			`* FULL shows both authors but not dates.`
			`* FULLER shows both authors and dates.`
			`*/`
			`if (!memcmp(line, "author ", 7)) {`
			`strbuf_grow(sb, linelen + 80);`
			`add_user_info("Author", fmt, sb, line + 7, dmode, encoding);`
			`}`
			`if (!memcmp(line, "committer ", 10) &&`
			`(fmt == CMIT_FMT_FULL \|\| fmt == CMIT_FMT_FULLER)) {`
			`strbuf_grow(sb, linelen + 80);`
			`add_user_info("Commit", fmt, sb, line + 10, dmode, encoding);`
			`}`
			`}`
			`}`

			`static void pp_title_line(enum cmit_fmt fmt,`
			`const char **msg_p,`
			`struct strbuf *sb,`
			`const char *subject,`
			`const char *after_subject,`
			`const char *encoding,`
			`int plain_non_ascii)`
			`{`
			`struct strbuf title;`

			`strbuf_init(&title, 80);`

			`for (;;) {`
			`const char line = msg_p;`
			`int linelen = get_one_line(line);`

			`*msg_p += linelen;`
			`if (!linelen \|\| is_empty_line(line, &linelen))`
			`break;`

			`strbuf_grow(&title, linelen + 2);`
			`if (title.len) {`
			`if (fmt == CMIT_FMT_EMAIL) {`
			`strbuf_addch(&title, '\n');`
			`}`
			`strbuf_addch(&title, ' ');`
			`}`
			`strbuf_add(&title, line, linelen);`
			`}`

			`strbuf_grow(sb, title.len + 1024);`
			`if (subject) {`
			`strbuf_addstr(sb, subject);`
			`add_rfc2047(sb, title.buf, title.len, encoding);`
			`} else {`
			`strbuf_addbuf(sb, &title);`
			`}`
			`strbuf_addch(sb, '\n');`

			`if (plain_non_ascii) {`
			`const char *header_fmt =`
			`"MIME-Version: 1.0\n"`
			`"Content-Type: text/plain; charset=%s\n"`
			`"Content-Transfer-Encoding: 8bit\n";`
			`strbuf_addf(sb, header_fmt, encoding);`
			`}`
			`if (after_subject) {`
			`strbuf_addstr(sb, after_subject);`
			`}`
			`if (fmt == CMIT_FMT_EMAIL) {`
			`strbuf_addch(sb, '\n');`
			`}`
			`strbuf_release(&title);`
			`}`

			`static void pp_remainder(enum cmit_fmt fmt,`
			`const char **msg_p,`
			`struct strbuf *sb,`
			`int indent)`
			`{`
			`int first = 1;`
			`for (;;) {`
			`const char line = msg_p;`
			`int linelen = get_one_line(line);`
			`*msg_p += linelen;`

			`if (!linelen)`
			`break;`

			`if (is_empty_line(line, &linelen)) {`
			`if (first)`
			`continue;`
			`if (fmt == CMIT_FMT_SHORT)`
			`break;`
			`}`
			`first = 0;`

			`strbuf_grow(sb, linelen + indent + 20);`
			`if (indent) {`
			`memset(sb->buf + sb->len, ' ', indent);`
			`strbuf_setlen(sb, sb->len + indent);`
			`}`
			`strbuf_add(sb, line, linelen);`
			`strbuf_addch(sb, '\n');`
			`}`
			`}`

			`void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,`
			`struct strbuf *sb, int abbrev,`
			`const char subject, const char after_subject,`
			`enum date_mode dmode, int plain_non_ascii)`
			`{`
			`unsigned long beginning_of_body;`
			`int indent = 4;`
			`const char *msg = commit->buffer;`
			`char *reencoded;`
			`const char *encoding;`

			`if (fmt == CMIT_FMT_USERFORMAT) {`
			`format_commit_message(commit, user_format, sb);`
			`return;`
			`}`

			`encoding = (git_log_output_encoding`
			`? git_log_output_encoding`
			`: git_commit_encoding);`
			`if (!encoding)`
			`encoding = "utf-8";`
			`reencoded = logmsg_reencode(commit, encoding);`
			`if (reencoded) {`
			`msg = reencoded;`
			`}`

			`if (fmt == CMIT_FMT_ONELINE \|\| fmt == CMIT_FMT_EMAIL)`
			`indent = 0;`

			`/* After-subject is used to pass in Content-Type: multipart`
			`* MIME header; in that case we do not have to do the`
			`* plaintext content type even if the commit message has`
			`* non 7-bit ASCII character. Otherwise, check if we need`
			`* to say this is not a 7-bit ASCII.`
			`*/`
			`if (fmt == CMIT_FMT_EMAIL && !after_subject) {`
			`int i, ch, in_body;`

			`for (in_body = i = 0; (ch = msg[i]); i++) {`
			`if (!in_body) {`
			`/* author could be non 7-bit ASCII but`
			`* the log may be so; skip over the`
			`* header part first.`
			`*/`
			`if (ch == '\n' && msg[i+1] == '\n')`
			`in_body = 1;`
			`}`
			`else if (non_ascii(ch)) {`
			`plain_non_ascii = 1;`
			`break;`
			`}`
			`}`
			`}`

			`pp_header(fmt, abbrev, dmode, encoding, commit, &msg, sb);`
			`if (fmt != CMIT_FMT_ONELINE && !subject) {`
			`strbuf_addch(sb, '\n');`
			`}`

			`/* Skip excess blank lines at the beginning of body, if any... */`
			`for (;;) {`
			`int linelen = get_one_line(msg);`
			`int ll = linelen;`
			`if (!linelen)`
			`break;`
			`if (!is_empty_line(msg, &ll))`
			`break;`
			`msg += linelen;`
			`}`

			`/* These formats treat the title line specially. */`
			`if (fmt == CMIT_FMT_ONELINE \|\| fmt == CMIT_FMT_EMAIL)`
			`pp_title_line(fmt, &msg, sb, subject,`
			`after_subject, encoding, plain_non_ascii);`

			`beginning_of_body = sb->len;`
			`if (fmt != CMIT_FMT_ONELINE)`
			`pp_remainder(fmt, &msg, sb, indent);`
			`strbuf_rtrim(sb);`

			`/* Make sure there is an EOLN for the non-oneline case */`
			`if (fmt != CMIT_FMT_ONELINE)`
			`strbuf_addch(sb, '\n');`

			`/*`
			`* The caller may append additional body text in e-mail`
			`* format. Make sure we did not strip the blank line`
			`* between the header and the body.`
			`*/`
			`if (fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body)`
			`strbuf_addch(sb, '\n');`
			`free(reencoded);`
			`}`