From e0eb889f8e6597120527b0312874ff4c8448108e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 1 May 2006 12:27:56 -0700 Subject: [PATCH 1/3] builtin-grep: wildcard pathspec fixes This tweaks the pathspec wildcard used in builtin-grep to match that of ls-files. With this: git grep -e DEBUG -- '*/Kconfig*' would work like the shell script version, and you could even do: git grep -e DEBUG --cached -- '*/Kconfig*' ;# from index git grep -e DEBUG v2.6.12 -- '*/Kconfig*' ;# from rev Signed-off-by: Junio C Hamano --- builtin-grep.c | 89 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 25 deletions(-) diff --git a/builtin-grep.c b/builtin-grep.c index 36150bf4ef..653b65ea10 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -12,33 +12,66 @@ #include "revision.h" #include "builtin.h" #include +#include +/* + * git grep pathspecs are somewhat different from diff-tree pathspecs; + * pathname wildcards are allowed. + */ static int pathspec_matches(struct diff_options *opt, const char *name) { - int i, j; - int namelen; + int namelen, i; if (!opt->nr_paths) return 1; namelen = strlen(name); for (i = 0; i < opt->nr_paths; i++) { const char *match = opt->paths[i]; int matchlen = opt->pathlens[i]; - if (matchlen <= namelen) { - if (!strncmp(name, match, matchlen)) - return 1; - continue; - } - /* If name is "Documentation" and pathspec is - * "Documentation/", they should match. Maybe - * we would want to strip it in get_pathspec()??? - */ - if (strncmp(name, match, namelen)) - continue; - for (j = namelen; j < matchlen; j++) - if (match[j] != '/') - break; - if (matchlen <= j) + const char *slash, *cp; + + if ((matchlen <= namelen) && + !strncmp(name, match, matchlen) && + (match[matchlen-1] == '/' || + name[matchlen] == '\0' || name[matchlen] == '/')) return 1; + if (!fnmatch(match, name, 0)) + return 1; + if (name[namelen-1] != '/') + continue; + + /* We are being asked if the name directory is worth + * descending into. + * + * Find the longest leading directory name that does + * not have metacharacter in the pathspec; the name + * we are looking at must overlap with that directory. + */ + for (cp = match, slash = NULL; cp - match < matchlen; cp++) { + char ch = *cp; + if (ch == '/') + slash = cp; + if (ch == '*' || ch == '[') + break; + } + if (!slash) + slash = match; /* toplevel */ + else + slash++; + if (namelen <= slash - match) { + /* Looking at "Documentation/" and + * the pattern says "Documentation/howto/", or + * "Documentation/diff*.txt". + */ + if (!memcmp(match, name, namelen)) + return 1; + } + else { + /* Looking at "Documentation/howto/" and + * the pattern says "Documentation/h*". + */ + if (!memcmp(match, name, slash - match)) + return 1; + } } return 0; } @@ -232,17 +265,17 @@ static int grep_tree(struct grep_opt *opt, struct rev_info *revs, int hit = 0; const char *path; const unsigned char *sha1; - char *down_base; + char *down; char *path_buf = xmalloc(PATH_MAX + strlen(tree_name) + 100); if (tree_name[0]) { int offset = sprintf(path_buf, "%s:", tree_name); - down_base = path_buf + offset; - strcat(down_base, base); + down = path_buf + offset; + strcat(down, base); } else { - down_base = path_buf; - strcpy(down_base, base); + down = path_buf; + strcpy(down, base); } len = strlen(path_buf); @@ -252,7 +285,14 @@ static int grep_tree(struct grep_opt *opt, struct rev_info *revs, pathlen = strlen(path); strcpy(path_buf + len, path); - if (!pathspec_matches(&revs->diffopt, down_base)) + if (S_ISDIR(mode)) + /* Match "abc/" against pathspec to + * decide if we want to descend into "abc" + * directory. + */ + strcpy(path_buf + len + pathlen, "/"); + + if (!pathspec_matches(&revs->diffopt, down)) ; else if (S_ISREG(mode)) hit |= grep_sha1(opt, sha1, path_buf); @@ -264,9 +304,8 @@ static int grep_tree(struct grep_opt *opt, struct rev_info *revs, if (!data) die("unable to read tree (%s)", sha1_to_hex(sha1)); - strcpy(path_buf + len + pathlen, "/"); sub.buf = data; - hit = grep_tree(opt, revs, &sub, tree_name, down_base); + hit |= grep_tree(opt, revs, &sub, tree_name, down); free(data); } update_tree_entry(tree); From df0e7aa8644eef5ebc018bf838739e25f3494b6c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 1 May 2006 12:39:21 -0700 Subject: [PATCH 2/3] builtin-grep: support '-l' option. Signed-off-by: Junio C Hamano --- builtin-grep.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/builtin-grep.c b/builtin-grep.c index 653b65ea10..c3e6701aa0 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -81,6 +81,7 @@ struct grep_opt { regex_t regexp; unsigned linenum:1; unsigned invert:1; + unsigned name_only:1; int regflags; unsigned pre_context; unsigned post_context; @@ -139,6 +140,10 @@ static int grep_buffer(struct grep_opt *opt, const char *name, if (opt->invert) hit = !hit; if (hit) { + if (opt->name_only) { + printf("%s\n", name); + return 1; + } /* Hit at this line. If we haven't shown the * pre-context lines, we would need to show them. */ @@ -406,6 +411,11 @@ int cmd_grep(int argc, const char **argv, char **envp) */ continue; } + if (!strcmp("-l", arg) || + !strcmp("--files-with-matches", arg)) { + opt.name_only = 1; + continue; + } if (!strcmp("-A", arg) || !strcmp("-B", arg) || !strcmp("-C", arg)) { From 1362671f6a716e1a34abbcab43adfb68c7e2fd3b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 1 May 2006 15:58:29 -0700 Subject: [PATCH 3/3] builtin-grep: do not use setup_revisions() Grep may want to grok multiple revisions, but it does not make much sense to walk revisions while doing so. This stops calling the code to parse parameters for the revision walker. The parameter parsing for the optional "-e" option becomes a lot simpler with it as well. Signed-off-by: Junio C Hamano --- builtin-grep.c | 265 ++++++++++++++++++++++++++----------------------- 1 file changed, 139 insertions(+), 126 deletions(-) diff --git a/builtin-grep.c b/builtin-grep.c index c3e6701aa0..4be1514a4e 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -8,8 +8,7 @@ #include "tree.h" #include "commit.h" #include "tag.h" -#include "diff.h" -#include "revision.h" +#include "tree-walk.h" #include "builtin.h" #include #include @@ -18,15 +17,15 @@ * git grep pathspecs are somewhat different from diff-tree pathspecs; * pathname wildcards are allowed. */ -static int pathspec_matches(struct diff_options *opt, const char *name) +static int pathspec_matches(const char **paths, const char *name) { int namelen, i; - if (!opt->nr_paths) + if (!paths || !*paths) return 1; namelen = strlen(name); - for (i = 0; i < opt->nr_paths; i++) { - const char *match = opt->paths[i]; - int matchlen = opt->pathlens[i]; + for (i = 0; paths[i]; i++) { + const char *match = paths[i]; + int matchlen = strlen(match); const char *slash, *cp; if ((matchlen <= namelen) && @@ -241,7 +240,7 @@ static int grep_file(struct grep_opt *opt, const char *filename) return i; } -static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached) +static int grep_cache(struct grep_opt *opt, const char **paths, int cached) { int hit = 0; int nr; @@ -251,7 +250,7 @@ static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached) struct cache_entry *ce = active_cache[nr]; if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode))) continue; - if (!pathspec_matches(&revs->diffopt, ce->name)) + if (!pathspec_matches(paths, ce->name)) continue; if (cached) hit |= grep_sha1(opt, ce->sha1, ce->name); @@ -261,7 +260,7 @@ static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached) return hit; } -static int grep_tree(struct grep_opt *opt, struct rev_info *revs, +static int grep_tree(struct grep_opt *opt, const char **paths, struct tree_desc *tree, const char *tree_name, const char *base) { @@ -297,7 +296,7 @@ static int grep_tree(struct grep_opt *opt, struct rev_info *revs, */ strcpy(path_buf + len + pathlen, "/"); - if (!pathspec_matches(&revs->diffopt, down)) + if (!pathspec_matches(paths, down)) ; else if (S_ISREG(mode)) hit |= grep_sha1(opt, sha1, path_buf); @@ -310,7 +309,7 @@ static int grep_tree(struct grep_opt *opt, struct rev_info *revs, die("unable to read tree (%s)", sha1_to_hex(sha1)); sub.buf = data; - hit |= grep_tree(opt, revs, &sub, tree_name, down); + hit |= grep_tree(opt, paths, &sub, tree_name, down); free(data); } update_tree_entry(tree); @@ -318,7 +317,7 @@ static int grep_tree(struct grep_opt *opt, struct rev_info *revs, return hit; } -static int grep_object(struct grep_opt *opt, struct rev_info *revs, +static int grep_object(struct grep_opt *opt, const char **paths, struct object *obj, const char *name) { if (!strcmp(obj->type, blob_type)) @@ -333,7 +332,7 @@ static int grep_object(struct grep_opt *opt, struct rev_info *revs, if (!data) die("unable to read tree (%s)", sha1_to_hex(obj->sha1)); tree.buf = data; - hit = grep_tree(opt, revs, &tree, name, ""); + hit = grep_tree(opt, paths, &tree, name, ""); free(data); return hit; } @@ -345,102 +344,119 @@ static const char builtin_grep_usage[] = int cmd_grep(int argc, const char **argv, char **envp) { - struct rev_info rev; - const char **dst, **src; int err; int hit = 0; - int no_more_arg = 0; - int seen_range = 0; + int no_more_flags = 0; int seen_noncommit = 0; int cached = 0; struct grep_opt opt; - struct object_list *list; + struct object_list *list, **tail, *object_list = NULL; + const char *prefix = setup_git_directory(); + const char **paths = NULL; memset(&opt, 0, sizeof(opt)); opt.regflags = REG_NEWLINE; /* - * Interpret and remove the grep options upfront. Sigh... + * No point using rev_info, really. */ - for (dst = src = &argv[1]; src < argc + argv; ) { - const char *arg = *src++; - if (!no_more_arg) { - if (!strcmp("--", arg)) { - no_more_arg = 1; - *dst++ = arg; - continue; - } - if (!strcmp("--cached", arg)) { - cached = 1; - continue; - } - if (!strcmp("-i", arg) || - !strcmp("--ignore-case", arg)) { - opt.regflags |= REG_ICASE; - continue; - } - if (!strcmp("-v", arg) || - !strcmp("--invert-match", arg)) { - opt.invert = 1; - continue; - } - if (!strcmp("-E", arg) || - !strcmp("--extended-regexp", arg)) { - opt.regflags |= REG_EXTENDED; - continue; - } - if (!strcmp("-G", arg) || - !strcmp("--basic-regexp", arg)) { - opt.regflags &= ~REG_EXTENDED; - continue; - } - if (!strcmp("-e", arg)) { - if (src < argc + argv) { - opt.pattern = *src++; - continue; - } - usage(builtin_grep_usage); - } - if (!strcmp("-n", arg)) { - opt.linenum = 1; - continue; - } - if (!strcmp("-H", arg)) { - /* We always show the pathname, so this - * is a noop. - */ - continue; - } - if (!strcmp("-l", arg) || - !strcmp("--files-with-matches", arg)) { - opt.name_only = 1; - continue; - } - if (!strcmp("-A", arg) || - !strcmp("-B", arg) || - !strcmp("-C", arg)) { - unsigned num; - if ((argc + argv <= src) || - sscanf(*src++, "%u", &num) != 1) - usage(builtin_grep_usage); - switch (arg[1]) { - case 'A': - opt.post_context = num; - break; - case 'C': - opt.post_context = num; - case 'B': - opt.pre_context = num; - break; - } - continue; - } + while (1 < argc) { + const char *arg = argv[1]; + argc--; argv++; + if (!strcmp("--cached", arg)) { + cached = 1; + continue; + } + if (!strcmp("-i", arg) || + !strcmp("--ignore-case", arg)) { + opt.regflags |= REG_ICASE; + continue; + } + if (!strcmp("-v", arg) || + !strcmp("--invert-match", arg)) { + opt.invert = 1; + continue; + } + if (!strcmp("-E", arg) || + !strcmp("--extended-regexp", arg)) { + opt.regflags |= REG_EXTENDED; + continue; + } + if (!strcmp("-G", arg) || + !strcmp("--basic-regexp", arg)) { + opt.regflags &= ~REG_EXTENDED; + continue; + } + if (!strcmp("-n", arg)) { + opt.linenum = 1; + continue; + } + if (!strcmp("-H", arg)) { + /* We always show the pathname, so this + * is a noop. + */ + continue; + } + if (!strcmp("-l", arg) || + !strcmp("--files-with-matches", arg)) { + opt.name_only = 1; + continue; + } + if (!strcmp("-A", arg) || + !strcmp("-B", arg) || + !strcmp("-C", arg)) { + unsigned num; + if (argc <= 1 || + sscanf(*++argv, "%u", &num) != 1) + usage(builtin_grep_usage); + argc--; + switch (arg[1]) { + case 'A': + opt.post_context = num; + break; + case 'C': + opt.post_context = num; + case 'B': + opt.pre_context = num; + break; + } + continue; + } + if (!strcmp("-e", arg)) { + if (1 < argc) { + /* We probably would want to do + * -e pat1 -e pat2 as well later... + */ + if (opt.pattern) + die("more than one pattern?"); + opt.pattern = *++argv; + argc--; + continue; + } + usage(builtin_grep_usage); + } + if (!strcmp("--", arg)) { + no_more_flags = 1; + continue; + } + /* Either unrecognized option or a single pattern */ + if (!no_more_flags && *arg == '-') + usage(builtin_grep_usage); + if (!opt.pattern) { + opt.pattern = arg; + break; + } + else { + /* We are looking at the first path or rev; + * it is found at argv[0] after leaving the + * loop. + */ + argc++; argv--; + break; } - *dst++ = arg; } if (!opt.pattern) die("no pattern given."); - err = regcomp(&opt.regexp, opt.pattern, opt.regflags); if (err) { char errbuf[1024]; @@ -448,11 +464,32 @@ int cmd_grep(int argc, const char **argv, char **envp) regfree(&opt.regexp); die("'%s': %s", opt.pattern, errbuf); } + tail = &object_list; + while (1 < argc) { + struct object *object; + struct object_list *elem; + const char *arg = argv[1]; + unsigned char sha1[20]; + if (get_sha1(arg, sha1) < 0) + break; + object = parse_object(sha1); + if (!object) + die("bad object %s", arg); + elem = object_list_insert(object, tail); + elem->name = arg; + tail = &elem->next; + argc--; argv++; + } + if (1 < argc) + paths = get_pathspec(prefix, argv + 1); + else if (prefix) { + paths = xcalloc(2, sizeof(const char *)); + paths[0] = prefix; + paths[1] = NULL; + } - init_revisions(&rev); - *dst = NULL; - argc = setup_revisions(dst - argv, argv, &rev, NULL); - + if (!object_list) + return !grep_cache(&opt, paths, cached); /* * Do not walk "grep -e foo master next pu -- Documentation/" * but do walk "grep -e foo master..next -- Documentation/". @@ -460,43 +497,19 @@ int cmd_grep(int argc, const char **argv, char **envp) * "grep -e foo v1.0.0:Documentation/ master..next" * so detect that and complain. */ - for (list = rev.pending_objects; list; list = list->next) { + for (list = object_list; list; list = list->next) { struct object *real_obj; - if (list->item->flags & UNINTERESTING) - seen_range = 1; real_obj = deref_tag(list->item, NULL, 0); if (strcmp(real_obj->type, commit_type)) seen_noncommit = 1; } - if (!rev.pending_objects) - return !grep_cache(&opt, &rev, cached); if (cached) die("both --cached and revisions given."); - if (seen_range && seen_noncommit) - die("both A..B and non commit are given."); - if (seen_range) { - struct commit *commit; - prepare_revision_walk(&rev); - while ((commit = get_revision(&rev)) != NULL) { - unsigned char *sha1 = commit->object.sha1; - const char *n = find_unique_abbrev(sha1, rev.abbrev); - char rev_name[41]; - strcpy(rev_name, n); - if (grep_object(&opt, &rev, &commit->object, rev_name)) - hit = 1; - commit->buffer = NULL; - } - return !hit; - } - - /* all of them are non-commit; do not walk, and - * do not lose their names. - */ - for (list = rev.pending_objects; list; list = list->next) { + for (list = object_list; list; list = list->next) { struct object *real_obj; real_obj = deref_tag(list->item, NULL, 0); - if (grep_object(&opt, &rev, real_obj, list->name)) + if (grep_object(&opt, paths, real_obj, list->name)) hit = 1; } return !hit;