2006-05-17 04:02:14 +02:00
|
|
|
#ifndef DIR_H
|
|
|
|
#define DIR_H
|
|
|
|
|
2012-12-27 03:32:21 +01:00
|
|
|
/* See Documentation/technical/api-directory-listing.txt */
|
|
|
|
|
2012-06-01 20:28:00 +02:00
|
|
|
#include "strbuf.h"
|
|
|
|
|
2006-05-17 04:02:14 +02:00
|
|
|
struct dir_entry {
|
builtin-add: simplify (and increase accuracy of) exclude handling
Previously, the code would always set up the excludes, and then manually
pick through the pathspec we were given, assuming that non-added but
existing paths were just ignored. This was mostly correct, but would
erroneously mark a totally empty directory as 'ignored'.
Instead, we now use the collect_ignored option of dir_struct, which
unambiguously tells us whether a path was ignored. This simplifies the
code, and means empty directories are now just not mentioned at all.
Furthermore, we now conditionally ask dir_struct to respect excludes,
depending on whether the '-f' flag has been set. This means we don't have
to pick through the result, checking for an 'ignored' flag; ignored entries
were either added or not in the first place.
We can safely get rid of the special 'ignored' flags to dir_entry, which
were not used anywhere else.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonas Fonseca <fonseca@diku.dk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-12 23:42:14 +02:00
|
|
|
unsigned int len;
|
2006-05-17 04:02:14 +02:00
|
|
|
char name[FLEX_ARRAY]; /* more */
|
|
|
|
};
|
|
|
|
|
2007-10-28 21:27:13 +01:00
|
|
|
#define EXC_FLAG_NODIR 1
|
|
|
|
#define EXC_FLAG_ENDSWITH 4
|
2008-01-31 10:17:48 +01:00
|
|
|
#define EXC_FLAG_MUSTBEDIR 8
|
2012-10-15 08:24:38 +02:00
|
|
|
#define EXC_FLAG_NEGATIVE 16
|
2007-10-28 21:27:13 +01:00
|
|
|
|
2012-12-27 03:32:21 +01:00
|
|
|
/*
|
2013-01-06 17:58:03 +01:00
|
|
|
* Each excludes file will be parsed into a fresh exclude_list which
|
|
|
|
* is appended to the relevant exclude_list_group (either EXC_DIRS or
|
|
|
|
* EXC_FILE). An exclude_list within the EXC_CMDL exclude_list_group
|
|
|
|
* can also be used to represent the list of --exclude values passed
|
|
|
|
* via CLI args.
|
2012-12-27 03:32:21 +01:00
|
|
|
*/
|
2006-05-17 04:02:14 +02:00
|
|
|
struct exclude_list {
|
|
|
|
int nr;
|
|
|
|
int alloc;
|
2013-01-06 17:58:04 +01:00
|
|
|
|
2013-01-06 17:58:03 +01:00
|
|
|
/* remember pointer to exclude file contents so we can free() */
|
|
|
|
char *filebuf;
|
|
|
|
|
2013-01-06 17:58:04 +01:00
|
|
|
/* origin of list, e.g. path to filename, or descriptive string */
|
|
|
|
const char *src;
|
|
|
|
|
2006-05-17 04:02:14 +02:00
|
|
|
struct exclude {
|
2013-01-06 17:58:04 +01:00
|
|
|
/*
|
|
|
|
* This allows callers of last_exclude_matching() etc.
|
|
|
|
* to determine the origin of the matching pattern.
|
|
|
|
*/
|
|
|
|
struct exclude_list *el;
|
|
|
|
|
2006-05-17 04:02:14 +02:00
|
|
|
const char *pattern;
|
2007-10-28 21:27:13 +01:00
|
|
|
int patternlen;
|
2012-06-07 09:53:36 +02:00
|
|
|
int nowildcardlen;
|
2006-05-17 04:02:14 +02:00
|
|
|
const char *base;
|
|
|
|
int baselen;
|
2007-10-28 21:27:13 +01:00
|
|
|
int flags;
|
2013-01-06 17:58:04 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Counting starts from 1 for line numbers in ignore files,
|
|
|
|
* and from -1 decrementing for patterns from CLI args.
|
|
|
|
*/
|
|
|
|
int srcpos;
|
2006-05-17 04:02:14 +02:00
|
|
|
} **excludes;
|
|
|
|
};
|
|
|
|
|
2012-12-27 03:32:21 +01:00
|
|
|
/*
|
|
|
|
* The contents of the per-directory exclude files are lazily read on
|
|
|
|
* demand and then cached in memory, one per exclude_stack struct, in
|
|
|
|
* order to avoid opening and parsing each one every time that
|
|
|
|
* directory is traversed.
|
|
|
|
*/
|
2007-11-29 11:17:44 +01:00
|
|
|
struct exclude_stack {
|
2012-12-27 03:32:21 +01:00
|
|
|
struct exclude_stack *prev; /* the struct exclude_stack for the parent directory */
|
2007-11-29 11:17:44 +01:00
|
|
|
int baselen;
|
2013-01-06 17:58:03 +01:00
|
|
|
int exclude_ix; /* index of exclude_list within EXC_DIRS exclude_list_group */
|
|
|
|
};
|
|
|
|
|
|
|
|
struct exclude_list_group {
|
|
|
|
int nr, alloc;
|
|
|
|
struct exclude_list *el;
|
2007-11-29 11:17:44 +01:00
|
|
|
};
|
|
|
|
|
2006-05-17 04:02:14 +02:00
|
|
|
struct dir_struct {
|
|
|
|
int nr, alloc;
|
2007-06-11 15:39:50 +02:00
|
|
|
int ignored_nr, ignored_alloc;
|
2009-02-16 13:20:25 +01:00
|
|
|
enum {
|
|
|
|
DIR_SHOW_IGNORED = 1<<0,
|
|
|
|
DIR_SHOW_OTHER_DIRECTORIES = 1<<1,
|
|
|
|
DIR_HIDE_EMPTY_DIRECTORIES = 1<<2,
|
|
|
|
DIR_NO_GITLINKS = 1<<3,
|
2013-04-15 21:15:03 +02:00
|
|
|
DIR_COLLECT_IGNORED = 1<<4,
|
|
|
|
DIR_SHOW_IGNORED_TOO = 1<<5
|
2009-02-16 13:20:25 +01:00
|
|
|
} flags;
|
2006-05-17 04:02:14 +02:00
|
|
|
struct dir_entry **entries;
|
2007-06-11 15:39:50 +02:00
|
|
|
struct dir_entry **ignored;
|
2006-05-17 04:02:14 +02:00
|
|
|
|
|
|
|
/* Exclude info */
|
|
|
|
const char *exclude_per_dir;
|
2013-01-06 17:58:03 +01:00
|
|
|
|
2007-11-29 11:17:44 +01:00
|
|
|
/*
|
2013-01-06 17:58:03 +01:00
|
|
|
* We maintain three groups of exclude pattern lists:
|
|
|
|
*
|
2007-11-29 11:17:44 +01:00
|
|
|
* EXC_CMDL lists patterns explicitly given on the command line.
|
|
|
|
* EXC_DIRS lists patterns obtained from per-directory ignore files.
|
2013-01-06 17:58:03 +01:00
|
|
|
* EXC_FILE lists patterns from fallback ignore files, e.g.
|
|
|
|
* - .git/info/exclude
|
|
|
|
* - core.excludesfile
|
|
|
|
*
|
|
|
|
* Each group contains multiple exclude lists, a single list
|
|
|
|
* per source.
|
2007-11-29 11:17:44 +01:00
|
|
|
*/
|
|
|
|
#define EXC_CMDL 0
|
|
|
|
#define EXC_DIRS 1
|
|
|
|
#define EXC_FILE 2
|
2013-01-06 17:58:03 +01:00
|
|
|
struct exclude_list_group exclude_list_group[3];
|
2007-11-29 11:17:44 +01:00
|
|
|
|
2012-12-27 03:32:21 +01:00
|
|
|
/*
|
|
|
|
* Temporary variables which are used during loading of the
|
|
|
|
* per-directory exclude lists.
|
|
|
|
*
|
|
|
|
* exclude_stack points to the top of the exclude_stack, and
|
|
|
|
* basebuf contains the full path to the current
|
dir.c: unify is_excluded and is_path_excluded APIs
The is_excluded and is_path_excluded APIs are very similar, except for a
few noteworthy differences:
is_excluded doesn't handle ignored directories, results for paths within
ignored directories are incorrect. This is probably based on the premise
that recursive directory scans should stop at ignored directories, which
is no longer true (in certain cases, read_directory_recursive currently
calls is_excluded *and* is_path_excluded to get correct ignored state).
is_excluded caches parsed .gitignore files of the last directory in struct
dir_struct. If the directory changes, it finds a common parent directory
and is very careful to drop only as much state as necessary. On the other
hand, is_excluded will also read and parse .gitignore files in already
ignored directories, which are completely irrelevant.
is_path_excluded correctly handles ignored directories by checking if any
component in the path is excluded. As it uses is_excluded internally, this
unfortunately forces is_excluded to drop and re-read all .gitignore files,
as there is no common parent directory for the root dir.
is_path_excluded tracks state in a separate struct path_exclude_check,
which is essentially a wrapper of dir_struct with two more fields. However,
as is_path_excluded also modifies dir_struct, it is not possible to e.g.
use multiple path_exclude_check structures with the same dir_struct in
parallel. The additional structure just unnecessarily complicates the API.
Teach is_excluded / prep_exclude about ignored directories: whenever
entering a new directory, first check if the entire directory is excluded.
Remember the excluded state in dir_struct. Don't traverse into already
ignored directories (i.e. don't read irrelevant .gitignore files).
Directories could also be excluded by exclude patterns specified on the
command line or .git/info/exclude, so we cannot simply skip prep_exclude
entirely if there's no .gitignore file name (dir_struct.exclude_per_dir).
Move this check to just before actually reading the file.
is_path_excluded is now equivalent to is_excluded, so we can simply
redirect to it (the public API is cleaned up in the next patch).
The performance impact of the additional ignored check per directory is
hardly noticeable when reading directories recursively (e.g. 'git status').
However, performance of git commands using the is_path_excluded API (e.g.
'git ls-files --cached --ignored --exclude-standard') is greatly improved
as this no longer re-reads .gitignore files on each call.
Here's some performance data from the linux and WebKit repos (best of 10
runs on a Debian Linux on SSD, core.preloadIndex=true):
| ls-files -ci | status | status --ignored
| linux | WebKit | linux | WebKit | linux | WebKit
-------+-------+--------+-------+--------+-------+---------
before | 0.506 | 6.539 | 0.212 | 1.555 | 0.323 | 2.541
after | 0.080 | 1.191 | 0.218 | 1.583 | 0.321 | 2.579
gain | 6.325 | 5.490 | 0.972 | 0.982 | 1.006 | 0.985
Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-15 21:12:14 +02:00
|
|
|
* (sub)directory in the traversal. Exclude points to the
|
|
|
|
* matching exclude struct if the directory is excluded.
|
2012-12-27 03:32:21 +01:00
|
|
|
*/
|
2007-11-29 11:17:44 +01:00
|
|
|
struct exclude_stack *exclude_stack;
|
dir.c: unify is_excluded and is_path_excluded APIs
The is_excluded and is_path_excluded APIs are very similar, except for a
few noteworthy differences:
is_excluded doesn't handle ignored directories, results for paths within
ignored directories are incorrect. This is probably based on the premise
that recursive directory scans should stop at ignored directories, which
is no longer true (in certain cases, read_directory_recursive currently
calls is_excluded *and* is_path_excluded to get correct ignored state).
is_excluded caches parsed .gitignore files of the last directory in struct
dir_struct. If the directory changes, it finds a common parent directory
and is very careful to drop only as much state as necessary. On the other
hand, is_excluded will also read and parse .gitignore files in already
ignored directories, which are completely irrelevant.
is_path_excluded correctly handles ignored directories by checking if any
component in the path is excluded. As it uses is_excluded internally, this
unfortunately forces is_excluded to drop and re-read all .gitignore files,
as there is no common parent directory for the root dir.
is_path_excluded tracks state in a separate struct path_exclude_check,
which is essentially a wrapper of dir_struct with two more fields. However,
as is_path_excluded also modifies dir_struct, it is not possible to e.g.
use multiple path_exclude_check structures with the same dir_struct in
parallel. The additional structure just unnecessarily complicates the API.
Teach is_excluded / prep_exclude about ignored directories: whenever
entering a new directory, first check if the entire directory is excluded.
Remember the excluded state in dir_struct. Don't traverse into already
ignored directories (i.e. don't read irrelevant .gitignore files).
Directories could also be excluded by exclude patterns specified on the
command line or .git/info/exclude, so we cannot simply skip prep_exclude
entirely if there's no .gitignore file name (dir_struct.exclude_per_dir).
Move this check to just before actually reading the file.
is_path_excluded is now equivalent to is_excluded, so we can simply
redirect to it (the public API is cleaned up in the next patch).
The performance impact of the additional ignored check per directory is
hardly noticeable when reading directories recursively (e.g. 'git status').
However, performance of git commands using the is_path_excluded API (e.g.
'git ls-files --cached --ignored --exclude-standard') is greatly improved
as this no longer re-reads .gitignore files on each call.
Here's some performance data from the linux and WebKit repos (best of 10
runs on a Debian Linux on SSD, core.preloadIndex=true):
| ls-files -ci | status | status --ignored
| linux | WebKit | linux | WebKit | linux | WebKit
-------+-------+--------+-------+--------+-------+---------
before | 0.506 | 6.539 | 0.212 | 1.555 | 0.323 | 2.541
after | 0.080 | 1.191 | 0.218 | 1.583 | 0.321 | 2.579
gain | 6.325 | 5.490 | 0.972 | 0.982 | 1.006 | 0.985
Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-04-15 21:12:14 +02:00
|
|
|
struct exclude *exclude;
|
2007-11-29 11:17:44 +01:00
|
|
|
char basebuf[PATH_MAX];
|
2006-05-17 04:02:14 +02:00
|
|
|
};
|
|
|
|
|
2013-01-06 17:58:06 +01:00
|
|
|
/*
|
|
|
|
* The ordering of these constants is significant, with
|
|
|
|
* higher-numbered match types signifying "closer" (i.e. more
|
|
|
|
* specific) matches which will override lower-numbered match types
|
|
|
|
* when populating the seen[] array.
|
|
|
|
*/
|
2006-12-25 12:09:52 +01:00
|
|
|
#define MATCHED_RECURSIVELY 1
|
|
|
|
#define MATCHED_FNMATCH 2
|
|
|
|
#define MATCHED_EXACTLY 3
|
2013-07-14 10:35:28 +02:00
|
|
|
extern int simple_length(const char *match);
|
|
|
|
extern int no_wildcard(const char *string);
|
2011-09-04 12:42:01 +02:00
|
|
|
extern char *common_prefix(const char **pathspec);
|
2006-05-20 01:07:51 +02:00
|
|
|
extern int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen);
|
2010-12-15 16:02:48 +01:00
|
|
|
extern int match_pathspec_depth(const struct pathspec *pathspec,
|
|
|
|
const char *name, int namelen,
|
|
|
|
int prefix, char *seen);
|
2010-12-15 16:02:44 +01:00
|
|
|
extern int within_depth(const char *name, int namelen, int depth, int max_depth);
|
2006-05-20 01:07:51 +02:00
|
|
|
|
2013-07-14 10:35:55 +02:00
|
|
|
extern int fill_directory(struct dir_struct *dir, const struct pathspec *pathspec);
|
|
|
|
extern int read_directory(struct dir_struct *, const char *path, int len, const struct pathspec *pathspec);
|
2006-12-05 01:00:46 +01:00
|
|
|
|
2012-12-27 03:32:24 +01:00
|
|
|
extern int is_excluded_from_list(const char *pathname, int pathlen, const char *basename,
|
|
|
|
int *dtype, struct exclude_list *el);
|
2010-07-10 00:18:38 +02:00
|
|
|
struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len);
|
2012-06-01 20:28:00 +02:00
|
|
|
|
2012-10-15 08:24:39 +02:00
|
|
|
/*
|
|
|
|
* these implement the matching logic for dir.c:excluded_from_list and
|
|
|
|
* attr.c:path_matches()
|
|
|
|
*/
|
|
|
|
extern int match_basename(const char *, int,
|
|
|
|
const char *, int, int, int);
|
|
|
|
extern int match_pathname(const char *, int,
|
|
|
|
const char *, int,
|
|
|
|
const char *, int, int, int);
|
|
|
|
|
2013-04-15 21:12:57 +02:00
|
|
|
extern struct exclude *last_exclude_matching(struct dir_struct *dir,
|
|
|
|
const char *name, int *dtype);
|
2012-06-06 06:17:52 +02:00
|
|
|
|
2013-04-15 21:12:57 +02:00
|
|
|
extern int is_excluded(struct dir_struct *dir, const char *name, int *dtype);
|
2012-06-01 20:28:00 +02:00
|
|
|
|
2013-01-06 17:58:04 +01:00
|
|
|
extern struct exclude_list *add_exclude_list(struct dir_struct *dir,
|
|
|
|
int group_type, const char *src);
|
2009-08-20 15:47:04 +02:00
|
|
|
extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
|
2013-01-06 17:58:03 +01:00
|
|
|
struct exclude_list *el, int check_index);
|
2006-05-17 04:02:14 +02:00
|
|
|
extern void add_excludes_from_file(struct dir_struct *, const char *fname);
|
2012-10-15 08:24:39 +02:00
|
|
|
extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
|
2006-05-17 04:02:14 +02:00
|
|
|
extern void add_exclude(const char *string, const char *base,
|
2013-01-06 17:58:04 +01:00
|
|
|
int baselen, struct exclude_list *el, int srcpos);
|
2012-12-27 03:32:29 +01:00
|
|
|
extern void clear_exclude_list(struct exclude_list *el);
|
2013-01-06 17:58:05 +01:00
|
|
|
extern void clear_directory(struct dir_struct *dir);
|
2006-09-08 10:05:34 +02:00
|
|
|
extern int file_exists(const char *);
|
2006-05-17 04:02:14 +02:00
|
|
|
|
2007-08-01 02:29:17 +02:00
|
|
|
extern int is_inside_dir(const char *dir);
|
2011-03-26 10:04:24 +01:00
|
|
|
extern int dir_inside_of(const char *subdir, const char *dir);
|
2007-08-01 02:29:17 +02:00
|
|
|
|
2009-01-10 13:07:50 +01:00
|
|
|
static inline int is_dot_or_dotdot(const char *name)
|
|
|
|
{
|
|
|
|
return (name[0] == '.' &&
|
|
|
|
(name[1] == '\0' ||
|
|
|
|
(name[1] == '.' && name[2] == '\0')));
|
|
|
|
}
|
|
|
|
|
2009-01-11 13:19:12 +01:00
|
|
|
extern int is_empty_dir(const char *dir);
|
|
|
|
|
core.excludesfile clean-up
There are inconsistencies in the way commands currently handle
the core.excludesfile configuration variable. The problem is
the variable is too new to be noticed by anything other than
git-add and git-status.
* git-ls-files does not notice any of the "ignore" files by
default, as it predates the standardized set of ignore files.
The calling scripts established the convention to use
.git/info/exclude, .gitignore, and later core.excludesfile.
* git-add and git-status know about it because they call
add_excludes_from_file() directly with their own notion of
which standard set of ignore files to use. This is just a
stupid duplication of code that need to be updated every time
the definition of the standard set of ignore files is
changed.
* git-read-tree takes --exclude-per-directory=<gitignore>,
not because the flexibility was needed. Again, this was
because the option predates the standardization of the ignore
files.
* git-merge-recursive uses hardcoded per-directory .gitignore
and nothing else. git-clean (scripted version) does not
honor core.* because its call to underlying ls-files does not
know about it. git-clean in C (parked in 'pu') doesn't either.
We probably could change git-ls-files to use the standard set
when no excludes are specified on the command line and ignore
processing was asked, or something like that, but that will be a
change in semantics and might break people's scripts in a subtle
way. I am somewhat reluctant to make such a change.
On the other hand, I think it makes perfect sense to fix
git-read-tree, git-merge-recursive and git-clean to follow the
same rule as other commands. I do not think of a valid use case
to give an exclude-per-directory that is nonstandard to
read-tree command, outside a "negative" test in the t1004 test
script.
This patch is the first step to untangle this mess.
The next step would be to teach read-tree, merge-recursive and
clean (in C) to use setup_standard_excludes().
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-14 09:05:00 +01:00
|
|
|
extern void setup_standard_excludes(struct dir_struct *dir);
|
2009-07-01 00:33:45 +02:00
|
|
|
|
|
|
|
#define REMOVE_DIR_EMPTY_ONLY 01
|
|
|
|
#define REMOVE_DIR_KEEP_NESTED_GIT 02
|
2012-03-15 15:58:54 +01:00
|
|
|
#define REMOVE_DIR_KEEP_TOPLEVEL 04
|
2009-07-01 00:33:45 +02:00
|
|
|
extern int remove_dir_recursively(struct strbuf *path, int flag);
|
2007-09-28 17:28:54 +02:00
|
|
|
|
2008-09-27 00:56:46 +02:00
|
|
|
/* tries to remove the path with empty directories along it, ignores ENOENT */
|
|
|
|
extern int remove_path(const char *path);
|
|
|
|
|
2010-10-03 11:56:41 +02:00
|
|
|
extern int strcmp_icase(const char *a, const char *b);
|
|
|
|
extern int strncmp_icase(const char *a, const char *b, size_t count);
|
|
|
|
extern int fnmatch_icase(const char *pattern, const char *string, int flags);
|
|
|
|
|
2012-11-24 05:33:49 +01:00
|
|
|
/*
|
|
|
|
* The prefix part of pattern must not contains wildcards.
|
|
|
|
*/
|
|
|
|
#define GFNM_PATHNAME 1 /* similar to FNM_PATHNAME */
|
2012-11-24 05:33:50 +01:00
|
|
|
#define GFNM_ONESTAR 2 /* there is only _one_ wildcard, a star */
|
2012-11-24 05:33:49 +01:00
|
|
|
|
|
|
|
extern int git_fnmatch(const char *pattern, const char *string,
|
|
|
|
int flags, int prefix);
|
|
|
|
|
2006-05-17 04:02:14 +02:00
|
|
|
#endif
|