From f462ebb48bf9126335671e878336e3faf3914802 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 2 May 2006 15:17:05 -0700 Subject: [PATCH 1/4] builtin-grep: allow - and -[ABC] notation for context lines. Signed-off-by: Junio C Hamano --- builtin-grep.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/builtin-grep.c b/builtin-grep.c index eb821b41b9..a551d34021 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -402,18 +402,34 @@ int cmd_grep(int argc, const char **argv, char **envp) opt.name_only = 1; continue; } - if (!strcmp("-A", arg) || - !strcmp("-B", arg) || - !strcmp("-C", arg)) { + if (!strncmp("-A", arg, 2) || + !strncmp("-B", arg, 2) || + !strncmp("-C", arg, 2) || + (arg[0] == '-' && '1' <= arg[1] && arg[1] <= '9')) { unsigned num; - if (argc <= 1 || - sscanf(*++argv, "%u", &num) != 1) + const char *scan; + switch (arg[1]) { + case 'A': case 'B': case 'C': + if (!arg[2]) { + if (argc <= 1) + usage(builtin_grep_usage); + scan = *++argv; + argc--; + } + else + scan = arg + 2; + break; + default: + scan = arg + 1; + break; + } + if (sscanf(scan, "%u", &num) != 1) usage(builtin_grep_usage); - argc--; switch (arg[1]) { case 'A': opt.post_context = num; break; + default: case 'C': opt.post_context = num; case 'B': From f9b9faf6f8a408b7129e2c95234f698da2171f4b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 2 May 2006 15:40:49 -0700 Subject: [PATCH 2/4] builtin-grep: allow more than one patterns. Signed-off-by: Junio C Hamano --- builtin-grep.c | 72 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 21 deletions(-) diff --git a/builtin-grep.c b/builtin-grep.c index a551d34021..b8351200dd 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -75,9 +75,16 @@ static int pathspec_matches(const char **paths, const char *name) return 0; } -struct grep_opt { +struct grep_pat { + struct grep_pat *next; const char *pattern; regex_t regexp; +}; + +struct grep_opt { + struct grep_pat *pattern_list; + struct grep_pat **pattern_tail; + regex_t regexp; unsigned linenum:1; unsigned invert:1; unsigned name_only:1; @@ -86,6 +93,29 @@ struct grep_opt { unsigned post_context; }; +static void add_pattern(struct grep_opt *opt, const char *pat) +{ + struct grep_pat *p = xcalloc(1, sizeof(*p)); + p->pattern = pat; + *opt->pattern_tail = p; + opt->pattern_tail = &p->next; + p->next = NULL; +} + +static void compile_patterns(struct grep_opt *opt) +{ + struct grep_pat *p; + for (p = opt->pattern_list; p; p = p->next) { + int err = regcomp(&p->regexp, p->pattern, opt->regflags); + if (err) { + char errbuf[1024]; + regerror(err, &p->regexp, errbuf, 1024); + regfree(&p->regexp); + die("'%s': %s", p->pattern, errbuf); + } + } +} + static char *end_of_line(char *cp, unsigned long *left) { unsigned long l = *left; @@ -128,14 +158,24 @@ static int grep_buffer(struct grep_opt *opt, const char *name, while (left) { regmatch_t pmatch[10]; char *eol, ch; - int hit; + int hit = 0; + struct grep_pat *p; eol = end_of_line(bol, &left); ch = *eol; *eol = 0; - hit = !regexec(&opt->regexp, bol, ARRAY_SIZE(pmatch), - pmatch, 0); + for (p = opt->pattern_list; p; p = p->next) { + regex_t *exp = &p->regexp; + hit = !regexec(exp, bol, ARRAY_SIZE(pmatch), + pmatch, 0); + if (hit) + break; + } + /* "grep -v -e foo -e bla" should list lines + * that do not have either, so inversion should + * be done outside. + */ if (opt->invert) hit = !hit; if (hit) { @@ -344,7 +384,6 @@ static const char builtin_grep_usage[] = int cmd_grep(int argc, const char **argv, char **envp) { - int err; int hit = 0; int no_more_flags = 0; int seen_noncommit = 0; @@ -355,6 +394,7 @@ int cmd_grep(int argc, const char **argv, char **envp) const char **paths = NULL; memset(&opt, 0, sizeof(opt)); + opt.pattern_tail = &opt.pattern_list; opt.regflags = REG_NEWLINE; /* @@ -440,12 +480,8 @@ int cmd_grep(int argc, const char **argv, char **envp) } if (!strcmp("-e", arg)) { if (1 < argc) { - /* We probably would want to do - * -e pat1 -e pat2 as well later... - */ - if (opt.pattern) - die("more than one pattern?"); - opt.pattern = *++argv; + add_pattern(&opt, argv[1]); + argv++; argc--; continue; } @@ -458,8 +494,8 @@ int cmd_grep(int argc, const char **argv, char **envp) /* Either unrecognized option or a single pattern */ if (!no_more_flags && *arg == '-') usage(builtin_grep_usage); - if (!opt.pattern) { - opt.pattern = arg; + if (!opt.pattern_list) { + add_pattern(&opt, arg); break; } else { @@ -471,15 +507,9 @@ int cmd_grep(int argc, const char **argv, char **envp) break; } } - if (!opt.pattern) + if (!opt.pattern_list) die("no pattern given."); - err = regcomp(&opt.regexp, opt.pattern, opt.regflags); - if (err) { - char errbuf[1024]; - regerror(err, &opt.regexp, errbuf, 1024); - regfree(&opt.regexp); - die("'%s': %s", opt.pattern, errbuf); - } + compile_patterns(&opt); tail = &object_list; while (1 < argc) { struct object *object; From 2c866cf1c2b2f3a289b7f3c21adb4f05af6626e4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 2 May 2006 15:40:49 -0700 Subject: [PATCH 3/4] builtin-grep: support -c (--count). Signed-off-by: Junio C Hamano --- builtin-grep.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/builtin-grep.c b/builtin-grep.c index b8351200dd..f1800a54d4 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -88,6 +88,7 @@ struct grep_opt { unsigned linenum:1; unsigned invert:1; unsigned name_only:1; + unsigned count:1; int regflags; unsigned pre_context; unsigned post_context; @@ -149,6 +150,7 @@ static int grep_buffer(struct grep_opt *opt, const char *name, unsigned last_hit = 0; unsigned last_shown = 0; const char *hunk_mark = ""; + unsigned count = 0; if (opt->pre_context) prev = xcalloc(opt->pre_context, sizeof(*prev)); @@ -179,12 +181,16 @@ static int grep_buffer(struct grep_opt *opt, const char *name, if (opt->invert) hit = !hit; if (hit) { + count++; if (opt->name_only) { printf("%s\n", name); return 1; } /* Hit at this line. If we haven't shown the * pre-context lines, we would need to show them. + * When asked to do "count", this still show + * the context which is nonsense, but the user + * deserves to get that ;-). */ if (opt->pre_context) { unsigned from; @@ -206,7 +212,8 @@ static int grep_buffer(struct grep_opt *opt, const char *name, } if (last_shown && lno != last_shown + 1) printf(hunk_mark); - show_line(opt, bol, eol, name, lno, ':'); + if (!opt->count) + show_line(opt, bol, eol, name, lno, ':'); last_shown = last_hit = lno; } else if (last_hit && @@ -230,6 +237,13 @@ static int grep_buffer(struct grep_opt *opt, const char *name, left--; lno++; } + /* NEEDSWORK: + * The real "grep -c foo *.c" gives many "bar.c:0" lines, + * which feels mostly useless but sometimes useful. Maybe + * make it another option? For now suppress them. + */ + if (opt->count && count) + printf("%s:%u\n", name, count); return !!last_hit; } @@ -442,6 +456,11 @@ int cmd_grep(int argc, const char **argv, char **envp) opt.name_only = 1; continue; } + if (!strcmp("-c", arg) || + !strcmp("--count", arg)) { + opt.count = 1; + continue; + } if (!strncmp("-A", arg, 2) || !strncmp("-B", arg, 2) || !strncmp("-C", arg, 2) || From 7839a25eab7177024b809fbb3796907e3eed17c1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 2 May 2006 15:40:49 -0700 Subject: [PATCH 4/4] builtin-grep: support -w (--word-regexp). Signed-off-by: Junio C Hamano --- builtin-grep.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/builtin-grep.c b/builtin-grep.c index f1800a54d4..09e3677824 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -89,6 +89,7 @@ struct grep_opt { unsigned invert:1; unsigned name_only:1; unsigned count:1; + unsigned word_regexp:1; int regflags; unsigned pre_context; unsigned post_context; @@ -128,6 +129,11 @@ static char *end_of_line(char *cp, unsigned long *left) return cp; } +static int word_char(char ch) +{ + return isalnum(ch) || ch == '_'; +} + static void show_line(struct grep_opt *opt, const char *bol, const char *eol, const char *name, unsigned lno, char sign) { @@ -171,6 +177,25 @@ static int grep_buffer(struct grep_opt *opt, const char *name, regex_t *exp = &p->regexp; hit = !regexec(exp, bol, ARRAY_SIZE(pmatch), pmatch, 0); + + if (hit && opt->word_regexp) { + /* Match beginning must be either + * beginning of the line, or at word + * boundary (i.e. the last char must + * not be alnum or underscore). + */ + if ((pmatch[0].rm_so < 0) || + (eol - bol) <= pmatch[0].rm_so || + (pmatch[0].rm_eo < 0) || + (eol - bol) < pmatch[0].rm_eo) + die("regexp returned nonsense"); + if (pmatch[0].rm_so != 0 && + word_char(bol[pmatch[0].rm_so-1])) + continue; /* not a word boundary */ + if ((eol-bol) < pmatch[0].rm_eo && + word_char(bol[pmatch[0].rm_eo])) + continue; /* not a word boundary */ + } if (hit) break; } @@ -461,6 +486,11 @@ int cmd_grep(int argc, const char **argv, char **envp) opt.count = 1; continue; } + if (!strcmp("-w", arg) || + !strcmp("--word-regexp", arg)) { + opt.word_regexp = 1; + continue; + } if (!strncmp("-A", arg, 2) || !strncmp("-B", arg, 2) || !strncmp("-C", arg, 2) ||