1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-11-09 02:33:11 +01:00
git/builtin-fetch.c

658 lines
17 KiB
C
Raw Normal View History

/*
* "git fetch"
*/
#include "cache.h"
#include "refs.h"
#include "commit.h"
#include "builtin.h"
#include "path-list.h"
#include "remote.h"
#include "transport.h"
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-11-11 08:29:47 +01:00
#include "run-command.h"
#include "parse-options.h"
static const char * const builtin_fetch_usage[] = {
"git-fetch [options] [<repository> <refspec>...]",
NULL
};
enum {
TAGS_UNSET = 0,
TAGS_DEFAULT = 1,
TAGS_SET = 2
};
static int append, force, keep, update_head_ok, verbose, quiet;
static int tags = TAGS_DEFAULT;
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-11-11 08:29:47 +01:00
static const char *depth;
static const char *upload_pack;
static struct strbuf default_rla = STRBUF_INIT;
static struct transport *transport;
static struct option builtin_fetch_options[] = {
OPT__QUIET(&quiet),
OPT__VERBOSE(&verbose),
OPT_BOOLEAN('a', "append", &append,
"append to .git/FETCH_HEAD instead of overwriting"),
OPT_STRING(0, "upload-pack", &upload_pack, "PATH",
"path to upload pack on remote end"),
OPT_BOOLEAN('f', "force", &force,
"force overwrite of local branch"),
OPT_SET_INT('t', "tags", &tags,
"fetch all tags and associated objects", TAGS_SET),
OPT_SET_INT('n', NULL, &tags,
"do not fetch all tags (--no-tags)", TAGS_UNSET),
OPT_BOOLEAN('k', "keep", &keep, "keep downloaded pack"),
OPT_BOOLEAN('u', "update-head-ok", &update_head_ok,
"allow updating of HEAD ref"),
OPT_STRING(0, "depth", &depth, "DEPTH",
"deepen history of shallow clone"),
OPT_END()
};
static void unlock_pack(void)
{
if (transport)
transport_unlock_pack(transport);
}
static void unlock_pack_on_signal(int signo)
{
unlock_pack();
signal(SIGINT, SIG_DFL);
raise(signo);
}
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
static void add_merge_config(struct ref **head,
const struct ref *remote_refs,
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
struct branch *branch,
struct ref ***tail)
{
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
int i;
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
for (i = 0; i < branch->merge_nr; i++) {
struct ref *rm, **old_tail = *tail;
struct refspec refspec;
for (rm = *head; rm; rm = rm->next) {
if (branch_merge_matches(branch, i, rm->name)) {
rm->merge = 1;
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
break;
}
}
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
if (rm)
continue;
/*
* Not fetched to a tracking branch? We need to fetch
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
* it anyway to allow this branch's "branch.$name.merge"
* to be honored by git-pull, but we do not have to
* fail if branch.$name.merge is misconfigured to point
* at a nonexisting branch. If we were indeed called by
* git-pull, it will notice the misconfiguration because
* there is no entry in the resulting FETCH_HEAD marked
* for merging.
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
*/
refspec.src = branch->merge[i]->src;
refspec.dst = NULL;
refspec.pattern = 0;
refspec.force = 0;
get_fetch_map(remote_refs, &refspec, tail, 1);
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
for (rm = *old_tail; rm; rm = rm->next)
rm->merge = 1;
}
}
static void find_non_local_tags(struct transport *transport,
struct ref **head,
struct ref ***tail);
static struct ref *get_ref_map(struct transport *transport,
struct refspec *refs, int ref_count, int tags,
int *autotags)
{
int i;
struct ref *rm;
struct ref *ref_map = NULL;
struct ref **tail = &ref_map;
const struct ref *remote_refs = transport_get_remote_refs(transport);
if (ref_count || tags == TAGS_SET) {
for (i = 0; i < ref_count; i++) {
get_fetch_map(remote_refs, &refs[i], &tail, 0);
if (refs[i].dst && refs[i].dst[0])
*autotags = 1;
}
/* Merge everything on the command line, but not --tags */
for (rm = ref_map; rm; rm = rm->next)
rm->merge = 1;
if (tags == TAGS_SET) {
struct refspec refspec;
refspec.src = "refs/tags/";
refspec.dst = "refs/tags/";
refspec.pattern = 1;
refspec.force = 0;
get_fetch_map(remote_refs, &refspec, &tail, 0);
}
} else {
/* Use the defaults */
struct remote *remote = transport->remote;
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
struct branch *branch = branch_get(NULL);
int has_merge = branch_has_merge_config(branch);
if (remote && (remote->fetch_refspec_nr || has_merge)) {
for (i = 0; i < remote->fetch_refspec_nr; i++) {
get_fetch_map(remote_refs, &remote->fetch[i], &tail, 0);
if (remote->fetch[i].dst &&
remote->fetch[i].dst[0])
*autotags = 1;
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
if (!i && !has_merge && ref_map &&
!remote->fetch[0].pattern)
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
ref_map->merge = 1;
}
/*
* if the remote we're fetching from is the same
* as given in branch.<name>.remote, we add the
* ref given in branch.<name>.merge, too.
*/
if (has_merge &&
!strcmp(branch->remote_name, remote->name))
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
add_merge_config(&ref_map, remote_refs, branch, &tail);
} else {
ref_map = get_remote_ref(remote_refs, "HEAD");
if (!ref_map)
die("Couldn't find remote ref HEAD");
ref_map->merge = 1;
tail = &ref_map->next;
}
}
if (tags == TAGS_DEFAULT && *autotags)
find_non_local_tags(transport, &ref_map, &tail);
ref_remove_duplicates(ref_map);
return ref_map;
}
static int s_update_ref(const char *action,
struct ref *ref,
int check_old)
{
char msg[1024];
char *rla = getenv("GIT_REFLOG_ACTION");
static struct ref_lock *lock;
if (!rla)
rla = default_rla.buf;
snprintf(msg, sizeof(msg), "%s: %s", rla, action);
lock = lock_any_ref_for_update(ref->name,
check_old ? ref->old_sha1 : NULL, 0);
if (!lock)
return 1;
if (write_ref_sha1(lock, ref->new_sha1, msg) < 0)
return 1;
return 0;
}
#define SUMMARY_WIDTH (2 * DEFAULT_ABBREV + 3)
#define REFCOL_WIDTH 10
static int update_local_ref(struct ref *ref,
const char *remote,
int verbose,
char *display)
{
struct commit *current = NULL, *updated;
enum object_type type;
struct branch *current_branch = branch_get(NULL);
const char *pretty_ref = ref->name + (
!prefixcmp(ref->name, "refs/heads/") ? 11 :
!prefixcmp(ref->name, "refs/tags/") ? 10 :
!prefixcmp(ref->name, "refs/remotes/") ? 13 :
0);
*display = 0;
type = sha1_object_info(ref->new_sha1, NULL);
if (type < 0)
die("object %s not found", sha1_to_hex(ref->new_sha1));
if (!*ref->name) {
/* Not storing */
if (verbose)
sprintf(display, "* branch %s -> FETCH_HEAD", remote);
return 0;
}
if (!hashcmp(ref->old_sha1, ref->new_sha1)) {
if (verbose)
sprintf(display, "= %-*s %-*s -> %s", SUMMARY_WIDTH,
"[up to date]", REFCOL_WIDTH, remote,
pretty_ref);
return 0;
}
if (current_branch &&
!strcmp(ref->name, current_branch->name) &&
!(update_head_ok || is_bare_repository()) &&
!is_null_sha1(ref->old_sha1)) {
/*
* If this is the head, and it's not okay to update
* the head, and the old value of the head isn't empty...
*/
sprintf(display, "! %-*s %-*s -> %s (can't fetch in current branch)",
SUMMARY_WIDTH, "[rejected]", REFCOL_WIDTH, remote,
pretty_ref);
return 1;
}
if (!is_null_sha1(ref->old_sha1) &&
!prefixcmp(ref->name, "refs/tags/")) {
sprintf(display, "- %-*s %-*s -> %s",
SUMMARY_WIDTH, "[tag update]", REFCOL_WIDTH, remote,
pretty_ref);
return s_update_ref("updating tag", ref, 0);
}
current = lookup_commit_reference_gently(ref->old_sha1, 1);
updated = lookup_commit_reference_gently(ref->new_sha1, 1);
if (!current || !updated) {
const char *msg;
const char *what;
if (!strncmp(ref->name, "refs/tags/", 10)) {
msg = "storing tag";
what = "[new tag]";
}
else {
msg = "storing head";
what = "[new branch]";
}
sprintf(display, "* %-*s %-*s -> %s", SUMMARY_WIDTH, what,
REFCOL_WIDTH, remote, pretty_ref);
return s_update_ref(msg, ref, 0);
}
if (in_merge_bases(current, &updated, 1)) {
char quickref[83];
strcpy(quickref, find_unique_abbrev(current->object.sha1, DEFAULT_ABBREV));
strcat(quickref, "..");
strcat(quickref, find_unique_abbrev(ref->new_sha1, DEFAULT_ABBREV));
sprintf(display, " %-*s %-*s -> %s", SUMMARY_WIDTH, quickref,
REFCOL_WIDTH, remote, pretty_ref);
return s_update_ref("fast forward", ref, 1);
} else if (force || ref->force) {
char quickref[84];
strcpy(quickref, find_unique_abbrev(current->object.sha1, DEFAULT_ABBREV));
strcat(quickref, "...");
strcat(quickref, find_unique_abbrev(ref->new_sha1, DEFAULT_ABBREV));
sprintf(display, "+ %-*s %-*s -> %s (forced update)",
SUMMARY_WIDTH, quickref, REFCOL_WIDTH, remote, pretty_ref);
return s_update_ref("forced-update", ref, 1);
} else {
sprintf(display, "! %-*s %-*s -> %s (non fast forward)",
SUMMARY_WIDTH, "[rejected]", REFCOL_WIDTH, remote,
pretty_ref);
return 1;
}
}
static int store_updated_refs(const char *url, struct ref *ref_map)
{
FILE *fp;
struct commit *commit;
int url_len, i, note_len, shown_url = 0;
char note[1024];
const char *what, *kind;
struct ref *rm;
char *filename = git_path("FETCH_HEAD");
fp = fopen(filename, "a");
if (!fp)
return error("cannot open %s: %s\n", filename, strerror(errno));
for (rm = ref_map; rm; rm = rm->next) {
struct ref *ref = NULL;
if (rm->peer_ref) {
ref = xcalloc(1, sizeof(*ref) + strlen(rm->peer_ref->name) + 1);
strcpy(ref->name, rm->peer_ref->name);
hashcpy(ref->old_sha1, rm->peer_ref->old_sha1);
hashcpy(ref->new_sha1, rm->old_sha1);
ref->force = rm->peer_ref->force;
}
commit = lookup_commit_reference_gently(rm->old_sha1, 1);
if (!commit)
rm->merge = 0;
if (!strcmp(rm->name, "HEAD")) {
kind = "";
what = "";
}
else if (!prefixcmp(rm->name, "refs/heads/")) {
kind = "branch";
what = rm->name + 11;
}
else if (!prefixcmp(rm->name, "refs/tags/")) {
kind = "tag";
what = rm->name + 10;
}
else if (!prefixcmp(rm->name, "refs/remotes/")) {
kind = "remote branch";
what = rm->name + 13;
}
else {
kind = "";
what = rm->name;
}
url_len = strlen(url);
for (i = url_len - 1; url[i] == '/' && 0 <= i; i--)
;
url_len = i + 1;
if (4 < i && !strncmp(".git", url + i - 3, 4))
url_len = i - 3;
note_len = 0;
if (*what) {
if (*kind)
note_len += sprintf(note + note_len, "%s ",
kind);
note_len += sprintf(note + note_len, "'%s' of ", what);
}
note_len += sprintf(note + note_len, "%.*s", url_len, url);
fprintf(fp, "%s\t%s\t%s\n",
sha1_to_hex(commit ? commit->object.sha1 :
rm->old_sha1),
rm->merge ? "" : "not-for-merge",
note);
if (ref) {
update_local_ref(ref, what, verbose, note);
if (*note) {
if (!shown_url) {
fprintf(stderr, "From %.*s\n",
url_len, url);
shown_url = 1;
}
fprintf(stderr, " %s\n", note);
}
}
}
fclose(fp);
return 0;
}
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-11-11 08:29:47 +01:00
/*
* We would want to bypass the object transfer altogether if
* everything we are going to fetch already exists and connected
* locally.
*
* The refs we are going to fetch are in to_fetch (nr_heads in
* total). If running
*
* $ git-rev-list --objects to_fetch[0] to_fetch[1] ... --not --all
*
* does not error out, that means everything reachable from the
* refs we are going to fetch exists and is connected to some of
* our existing refs.
*/
static int quickfetch(struct ref *ref_map)
{
struct child_process revlist;
struct ref *ref;
char **argv;
int i, err;
/*
* If we are deepening a shallow clone we already have these
* objects reachable. Running rev-list here will return with
* a good (0) exit status and we'll bypass the fetch that we
* really need to perform. Claiming failure now will ensure
* we perform the network exchange to deepen our history.
*/
if (depth)
return -1;
for (i = 0, ref = ref_map; ref; ref = ref->next)
i++;
if (!i)
return 0;
argv = xmalloc(sizeof(*argv) * (i + 6));
i = 0;
argv[i++] = xstrdup("rev-list");
argv[i++] = xstrdup("--quiet");
argv[i++] = xstrdup("--objects");
for (ref = ref_map; ref; ref = ref->next)
argv[i++] = xstrdup(sha1_to_hex(ref->old_sha1));
argv[i++] = xstrdup("--not");
argv[i++] = xstrdup("--all");
argv[i++] = NULL;
memset(&revlist, 0, sizeof(revlist));
revlist.argv = (const char**)argv;
revlist.git_cmd = 1;
revlist.no_stdin = 1;
revlist.no_stdout = 1;
revlist.no_stderr = 1;
err = run_command(&revlist);
for (i = 0; argv[i]; i++)
free(argv[i]);
free(argv);
return err;
}
static int fetch_refs(struct transport *transport, struct ref *ref_map)
{
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-11-11 08:29:47 +01:00
int ret = quickfetch(ref_map);
if (ret)
ret = transport_fetch_refs(transport, ref_map);
if (!ret)
ret |= store_updated_refs(transport->url, ref_map);
transport_unlock_pack(transport);
return ret;
}
static int add_existing(const char *refname, const unsigned char *sha1,
int flag, void *cbdata)
{
struct path_list *list = (struct path_list *)cbdata;
path_list_insert(refname, list);
return 0;
}
static int will_fetch(struct ref **head, const unsigned char *sha1)
{
struct ref *rm = *head;
while (rm) {
if (!hashcmp(rm->old_sha1, sha1))
return 1;
rm = rm->next;
}
return 0;
}
static void find_non_local_tags(struct transport *transport,
struct ref **head,
struct ref ***tail)
{
struct path_list existing_refs = { NULL, 0, 0, 0 };
struct path_list new_refs = { NULL, 0, 0, 1 };
char *ref_name;
int ref_name_len;
const unsigned char *ref_sha1;
const struct ref *tag_ref;
struct ref *rm = NULL;
const struct ref *ref;
for_each_ref(add_existing, &existing_refs);
for (ref = transport_get_remote_refs(transport); ref; ref = ref->next) {
if (prefixcmp(ref->name, "refs/tags"))
continue;
ref_name = xstrdup(ref->name);
ref_name_len = strlen(ref_name);
ref_sha1 = ref->old_sha1;
if (!strcmp(ref_name + ref_name_len - 3, "^{}")) {
ref_name[ref_name_len - 3] = 0;
tag_ref = transport_get_remote_refs(transport);
while (tag_ref) {
if (!strcmp(tag_ref->name, ref_name)) {
ref_sha1 = tag_ref->old_sha1;
break;
}
tag_ref = tag_ref->next;
}
}
if (!path_list_has_path(&existing_refs, ref_name) &&
!path_list_has_path(&new_refs, ref_name) &&
(has_sha1_file(ref->old_sha1) ||
will_fetch(head, ref->old_sha1))) {
path_list_insert(ref_name, &new_refs);
rm = alloc_ref(strlen(ref_name) + 1);
strcpy(rm->name, ref_name);
rm->peer_ref = alloc_ref(strlen(ref_name) + 1);
strcpy(rm->peer_ref->name, ref_name);
hashcpy(rm->old_sha1, ref_sha1);
**tail = rm;
*tail = &rm->next;
}
free(ref_name);
}
path_list_clear(&existing_refs, 0);
path_list_clear(&new_refs, 0);
}
static int do_fetch(struct transport *transport,
struct refspec *refs, int ref_count)
{
struct ref *ref_map;
struct ref *rm;
int autotags = (transport->remote->fetch_tags == 1);
if (transport->remote->fetch_tags == 2 && tags != TAGS_UNSET)
tags = TAGS_SET;
if (transport->remote->fetch_tags == -1)
tags = TAGS_UNSET;
if (!transport->get_refs_list || !transport->fetch)
die("Don't know how to fetch from %s", transport->url);
/* if not appending, truncate FETCH_HEAD */
if (!append) {
char *filename = git_path("FETCH_HEAD");
FILE *fp = fopen(filename, "w");
if (!fp)
return error("cannot open %s: %s\n", filename, strerror(errno));
fclose(fp);
}
ref_map = get_ref_map(transport, refs, ref_count, tags, &autotags);
for (rm = ref_map; rm; rm = rm->next) {
if (rm->peer_ref)
read_ref(rm->peer_ref->name, rm->peer_ref->old_sha1);
}
if (tags == TAGS_DEFAULT && autotags)
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1");
if (fetch_refs(transport, ref_map)) {
free_refs(ref_map);
return 1;
}
free_refs(ref_map);
/* if neither --no-tags nor --tags was specified, do automated tag
* following ... */
if (tags == TAGS_DEFAULT && autotags) {
struct ref **tail = &ref_map;
ref_map = NULL;
find_non_local_tags(transport, &ref_map, &tail);
if (ref_map) {
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, NULL);
transport_set_option(transport, TRANS_OPT_DEPTH, "0");
fetch_refs(transport, ref_map);
}
free_refs(ref_map);
}
transport_disconnect(transport);
return 0;
}
static void set_option(const char *name, const char *value)
{
int r = transport_set_option(transport, name, value);
if (r < 0)
die("Option \"%s\" value \"%s\" is not valid for %s\n",
name, value, transport->url);
if (r > 0)
warning("Option \"%s\" is ignored for %s\n",
name, transport->url);
}
int cmd_fetch(int argc, const char **argv, const char *prefix)
{
struct remote *remote;
int i;
static const char **refs = NULL;
int ref_nr = 0;
/* Record the command line for the reflog */
strbuf_addstr(&default_rla, "fetch");
for (i = 1; i < argc; i++)
strbuf_addf(&default_rla, " %s", argv[i]);
argc = parse_options(argc, argv,
builtin_fetch_options, builtin_fetch_usage, 0);
if (argc == 0)
remote = remote_get(NULL);
else
remote = remote_get(argv[0]);
transport = transport_get(remote, remote->url[0]);
if (verbose >= 2)
transport->verbose = 1;
if (quiet)
transport->verbose = -1;
if (upload_pack)
set_option(TRANS_OPT_UPLOADPACK, upload_pack);
if (keep)
set_option(TRANS_OPT_KEEP, "yes");
if (depth)
set_option(TRANS_OPT_DEPTH, depth);
if (!transport->url)
die("Where do you want to fetch from today?");
if (argc > 1) {
int j = 0;
refs = xcalloc(argc + 1, sizeof(const char *));
for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "tag")) {
char *ref;
i++;
ref = xmalloc(strlen(argv[i]) * 2 + 22);
strcpy(ref, "refs/tags/");
strcat(ref, argv[i]);
strcat(ref, ":refs/tags/");
strcat(ref, argv[i]);
refs[j++] = ref;
} else
refs[j++] = argv[i];
}
refs[j] = NULL;
ref_nr = j;
}
signal(SIGINT, unlock_pack_on_signal);
atexit(unlock_pack);
return do_fetch(transport,
parse_fetch_refspec(ref_nr, refs), ref_nr);
}