2009-10-31 01:47:47 +01:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
|
|
test_description='test smart fetching over http via http-backend'
|
|
|
|
. ./test-lib.sh
|
|
|
|
. "$TEST_DIRECTORY"/lib-httpd.sh
|
|
|
|
start_httpd
|
|
|
|
|
|
|
|
test_expect_success 'setup repository' '
|
2013-01-16 03:05:07 +01:00
|
|
|
git config push.default matching &&
|
2009-10-31 01:47:47 +01:00
|
|
|
echo content >file &&
|
|
|
|
git add file &&
|
|
|
|
git commit -m one
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'create http-accessible bare repository' '
|
|
|
|
mkdir "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
|
|
|
|
(cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
|
|
|
|
git --bare init
|
|
|
|
) &&
|
|
|
|
git remote add public "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
|
|
|
|
git push public master:master
|
|
|
|
'
|
|
|
|
|
2012-08-27 15:25:36 +02:00
|
|
|
setup_askpass_helper
|
|
|
|
|
2009-10-31 01:47:47 +01:00
|
|
|
cat >exp <<EOF
|
|
|
|
> GET /smart/repo.git/info/refs?service=git-upload-pack HTTP/1.1
|
|
|
|
> Accept: */*
|
2012-09-20 01:12:02 +02:00
|
|
|
> Accept-Encoding: gzip
|
2009-10-31 01:47:47 +01:00
|
|
|
> Pragma: no-cache
|
|
|
|
< HTTP/1.1 200 OK
|
|
|
|
< Pragma: no-cache
|
|
|
|
< Cache-Control: no-cache, max-age=0, must-revalidate
|
|
|
|
< Content-Type: application/x-git-upload-pack-advertisement
|
|
|
|
> POST /smart/repo.git/git-upload-pack HTTP/1.1
|
2012-09-20 01:12:02 +02:00
|
|
|
> Accept-Encoding: gzip
|
2009-10-31 01:47:47 +01:00
|
|
|
> Content-Type: application/x-git-upload-pack-request
|
2010-01-12 18:54:04 +01:00
|
|
|
> Accept: application/x-git-upload-pack-result
|
2009-10-31 01:47:47 +01:00
|
|
|
> Content-Length: xxx
|
|
|
|
< HTTP/1.1 200 OK
|
|
|
|
< Pragma: no-cache
|
|
|
|
< Cache-Control: no-cache, max-age=0, must-revalidate
|
|
|
|
< Content-Type: application/x-git-upload-pack-result
|
|
|
|
EOF
|
|
|
|
test_expect_success 'clone http repository' '
|
|
|
|
GIT_CURL_VERBOSE=1 git clone --quiet $HTTPD_URL/smart/repo.git clone 2>err &&
|
|
|
|
test_cmp file clone/file &&
|
|
|
|
tr '\''\015'\'' Q <err |
|
|
|
|
sed -e "
|
|
|
|
s/Q\$//
|
|
|
|
/^[*] /d
|
2009-11-09 19:10:36 +01:00
|
|
|
/^$/d
|
|
|
|
/^< $/d
|
2009-10-31 01:47:47 +01:00
|
|
|
|
|
|
|
/^[^><]/{
|
|
|
|
s/^/> /
|
|
|
|
}
|
|
|
|
|
|
|
|
/^> User-Agent: /d
|
|
|
|
/^> Host: /d
|
2009-11-09 19:10:37 +01:00
|
|
|
/^> POST /,$ {
|
|
|
|
/^> Accept: [*]\\/[*]/d
|
|
|
|
}
|
2009-10-31 01:47:47 +01:00
|
|
|
s/^> Content-Length: .*/> Content-Length: xxx/
|
2009-11-09 19:10:36 +01:00
|
|
|
/^> 00..want /d
|
|
|
|
/^> 00.*done/d
|
2009-10-31 01:47:47 +01:00
|
|
|
|
|
|
|
/^< Server: /d
|
|
|
|
/^< Expires: /d
|
|
|
|
/^< Date: /d
|
|
|
|
/^< Content-Length: /d
|
|
|
|
/^< Transfer-Encoding: /d
|
|
|
|
" >act &&
|
|
|
|
test_cmp exp act
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'fetch changes via http' '
|
|
|
|
echo content >>file &&
|
|
|
|
git commit -a -m two &&
|
2015-03-20 11:07:15 +01:00
|
|
|
git push public &&
|
2009-10-31 01:47:47 +01:00
|
|
|
(cd clone && git pull) &&
|
|
|
|
test_cmp file clone/file
|
|
|
|
'
|
|
|
|
|
|
|
|
cat >exp <<EOF
|
|
|
|
GET /smart/repo.git/info/refs?service=git-upload-pack HTTP/1.1 200
|
|
|
|
POST /smart/repo.git/git-upload-pack HTTP/1.1 200
|
|
|
|
GET /smart/repo.git/info/refs?service=git-upload-pack HTTP/1.1 200
|
|
|
|
POST /smart/repo.git/git-upload-pack HTTP/1.1 200
|
|
|
|
EOF
|
|
|
|
test_expect_success 'used upload-pack service' '
|
|
|
|
sed -e "
|
|
|
|
s/^.* \"//
|
|
|
|
s/\"//
|
|
|
|
s/ [1-9][0-9]*\$//
|
|
|
|
s/^GET /GET /
|
|
|
|
" >act <"$HTTPD_ROOT_PATH"/access.log &&
|
|
|
|
test_cmp exp act
|
|
|
|
'
|
|
|
|
|
2010-09-25 06:20:35 +02:00
|
|
|
test_expect_success 'follow redirects (301)' '
|
|
|
|
git clone $HTTPD_URL/smart-redir-perm/repo.git --quiet repo-p
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'follow redirects (302)' '
|
|
|
|
git clone $HTTPD_URL/smart-redir-temp/repo.git --quiet repo-t
|
|
|
|
'
|
|
|
|
|
remote-curl: rewrite base url from info/refs redirects
For efficiency and security reasons, an earlier commit in
this series taught http_get_* to re-write the base url based
on redirections we saw while making a specific request.
This commit wires that option into the info/refs request,
meaning that a redirect from
http://example.com/foo.git/info/refs
to
https://example.com/bar.git/info/refs
will behave as if "https://example.com/bar.git" had been
provided to git in the first place.
The tests bear some explanation. We introduce two new
hierearchies into the httpd test config:
1. Requests to /smart-redir-limited will work only for the
initial info/refs request, but not any subsequent
requests. As a result, we can confirm whether the
client is re-rooting its requests after the initial
contact, since otherwise it will fail (it will ask for
"repo.git/git-upload-pack", which is not redirected).
2. Requests to smart-redir-auth will redirect, and require
auth after the redirection. Since we are using the
redirected base for further requests, we also update
the credential struct, in order not to mislead the user
(or credential helpers) about which credential is
needed. We can therefore check the GIT_ASKPASS prompts
to make sure we are prompting for the new location.
Because we have neither multiple servers nor https
support in our test setup, we can only redirect between
paths, meaning we need to turn on
credential.useHttpPath to see the difference.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2013-09-28 10:35:35 +02:00
|
|
|
test_expect_success 'redirects re-root further requests' '
|
|
|
|
git clone $HTTPD_URL/smart-redir-limited/repo.git repo-redir-limited
|
|
|
|
'
|
|
|
|
|
2012-08-27 15:25:36 +02:00
|
|
|
test_expect_success 'clone from password-protected repository' '
|
|
|
|
echo two >expect &&
|
2014-01-02 08:38:35 +01:00
|
|
|
set_askpass user@host pass@host &&
|
2012-08-27 15:25:36 +02:00
|
|
|
git clone --bare "$HTTPD_URL/auth/smart/repo.git" smart-auth &&
|
|
|
|
expect_askpass both user@host &&
|
|
|
|
git --git-dir=smart-auth log -1 --format=%s >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2012-08-27 15:25:53 +02:00
|
|
|
test_expect_success 'clone from auth-only-for-push repository' '
|
|
|
|
echo two >expect &&
|
|
|
|
set_askpass wrong &&
|
|
|
|
git clone --bare "$HTTPD_URL/auth-push/smart/repo.git" smart-noauth &&
|
|
|
|
expect_askpass none &&
|
|
|
|
git --git-dir=smart-noauth log -1 --format=%s >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
remote-curl: retry failed requests for auth even with gzip
Commit b81401c taught the post_rpc function to retry the
http request after prompting for credentials. However, it
did not handle two cases:
1. If we have a large request, we do not retry. That's OK,
since we would have sent a probe (with retry) already.
2. If we are gzipping the request, we do not retry. That
was considered OK, because the intended use was for
push (e.g., listing refs is OK, but actually pushing
objects is not), and we never gzip on push.
This patch teaches post_rpc to retry even a gzipped request.
This has two advantages:
1. It is possible to configure a "half-auth" state for
fetching, where the set of refs and their sha1s are
advertised, but one cannot actually fetch objects.
This is not a recommended configuration, as it leaks
some information about what is in the repository (e.g.,
an attacker can try brute-forcing possible content in
your repository and checking whether it matches your
branch sha1). However, it can be slightly more
convenient, since a no-op fetch will not require a
password at all.
2. It future-proofs us should we decide to ever gzip more
requests.
Signed-off-by: Jeff King <peff@peff.net>
2012-10-31 12:29:16 +01:00
|
|
|
test_expect_success 'clone from auth-only-for-objects repository' '
|
|
|
|
echo two >expect &&
|
2014-01-02 08:38:35 +01:00
|
|
|
set_askpass user@host pass@host &&
|
remote-curl: retry failed requests for auth even with gzip
Commit b81401c taught the post_rpc function to retry the
http request after prompting for credentials. However, it
did not handle two cases:
1. If we have a large request, we do not retry. That's OK,
since we would have sent a probe (with retry) already.
2. If we are gzipping the request, we do not retry. That
was considered OK, because the intended use was for
push (e.g., listing refs is OK, but actually pushing
objects is not), and we never gzip on push.
This patch teaches post_rpc to retry even a gzipped request.
This has two advantages:
1. It is possible to configure a "half-auth" state for
fetching, where the set of refs and their sha1s are
advertised, but one cannot actually fetch objects.
This is not a recommended configuration, as it leaks
some information about what is in the repository (e.g.,
an attacker can try brute-forcing possible content in
your repository and checking whether it matches your
branch sha1). However, it can be slightly more
convenient, since a no-op fetch will not require a
password at all.
2. It future-proofs us should we decide to ever gzip more
requests.
Signed-off-by: Jeff King <peff@peff.net>
2012-10-31 12:29:16 +01:00
|
|
|
git clone --bare "$HTTPD_URL/auth-fetch/smart/repo.git" half-auth &&
|
|
|
|
expect_askpass both user@host &&
|
|
|
|
git --git-dir=half-auth log -1 --format=%s >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'no-op half-auth fetch does not require a password' '
|
|
|
|
set_askpass wrong &&
|
|
|
|
git --git-dir=half-auth fetch &&
|
|
|
|
expect_askpass none
|
|
|
|
'
|
|
|
|
|
remote-curl: rewrite base url from info/refs redirects
For efficiency and security reasons, an earlier commit in
this series taught http_get_* to re-write the base url based
on redirections we saw while making a specific request.
This commit wires that option into the info/refs request,
meaning that a redirect from
http://example.com/foo.git/info/refs
to
https://example.com/bar.git/info/refs
will behave as if "https://example.com/bar.git" had been
provided to git in the first place.
The tests bear some explanation. We introduce two new
hierearchies into the httpd test config:
1. Requests to /smart-redir-limited will work only for the
initial info/refs request, but not any subsequent
requests. As a result, we can confirm whether the
client is re-rooting its requests after the initial
contact, since otherwise it will fail (it will ask for
"repo.git/git-upload-pack", which is not redirected).
2. Requests to smart-redir-auth will redirect, and require
auth after the redirection. Since we are using the
redirected base for further requests, we also update
the credential struct, in order not to mislead the user
(or credential helpers) about which credential is
needed. We can therefore check the GIT_ASKPASS prompts
to make sure we are prompting for the new location.
Because we have neither multiple servers nor https
support in our test setup, we can only redirect between
paths, meaning we need to turn on
credential.useHttpPath to see the difference.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2013-09-28 10:35:35 +02:00
|
|
|
test_expect_success 'redirects send auth to new location' '
|
2014-01-02 08:38:35 +01:00
|
|
|
set_askpass user@host pass@host &&
|
remote-curl: rewrite base url from info/refs redirects
For efficiency and security reasons, an earlier commit in
this series taught http_get_* to re-write the base url based
on redirections we saw while making a specific request.
This commit wires that option into the info/refs request,
meaning that a redirect from
http://example.com/foo.git/info/refs
to
https://example.com/bar.git/info/refs
will behave as if "https://example.com/bar.git" had been
provided to git in the first place.
The tests bear some explanation. We introduce two new
hierearchies into the httpd test config:
1. Requests to /smart-redir-limited will work only for the
initial info/refs request, but not any subsequent
requests. As a result, we can confirm whether the
client is re-rooting its requests after the initial
contact, since otherwise it will fail (it will ask for
"repo.git/git-upload-pack", which is not redirected).
2. Requests to smart-redir-auth will redirect, and require
auth after the redirection. Since we are using the
redirected base for further requests, we also update
the credential struct, in order not to mislead the user
(or credential helpers) about which credential is
needed. We can therefore check the GIT_ASKPASS prompts
to make sure we are prompting for the new location.
Because we have neither multiple servers nor https
support in our test setup, we can only redirect between
paths, meaning we need to turn on
credential.useHttpPath to see the difference.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
2013-09-28 10:35:35 +02:00
|
|
|
git -c credential.useHttpPath=true \
|
|
|
|
clone $HTTPD_URL/smart-redir-auth/repo.git repo-redir-auth &&
|
|
|
|
expect_askpass both user@host auth/smart/repo.git
|
|
|
|
'
|
|
|
|
|
2012-09-20 23:30:58 +02:00
|
|
|
test_expect_success 'disable dumb http on server' '
|
|
|
|
git --git-dir="$HTTPD_DOCUMENT_ROOT_PATH/repo.git" \
|
|
|
|
config http.getanyfile false
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'GIT_SMART_HTTP can disable smart http' '
|
|
|
|
(GIT_SMART_HTTP=0 &&
|
|
|
|
export GIT_SMART_HTTP &&
|
|
|
|
cd clone &&
|
|
|
|
test_must_fail git fetch)
|
|
|
|
'
|
|
|
|
|
2013-01-31 22:02:07 +01:00
|
|
|
test_expect_success 'invalid Content-Type rejected' '
|
2015-03-20 11:06:15 +01:00
|
|
|
test_must_fail git clone $HTTPD_URL/broken_smart/repo.git 2>actual &&
|
2013-02-05 01:21:42 +01:00
|
|
|
grep "not valid:" actual
|
2013-01-31 22:02:07 +01:00
|
|
|
'
|
|
|
|
|
2013-04-10 02:55:08 +02:00
|
|
|
test_expect_success 'create namespaced refs' '
|
|
|
|
test_commit namespaced &&
|
|
|
|
git push public HEAD:refs/namespaces/ns/refs/heads/master &&
|
|
|
|
git --git-dir="$HTTPD_DOCUMENT_ROOT_PATH/repo.git" \
|
|
|
|
symbolic-ref refs/namespaces/ns/HEAD refs/namespaces/ns/refs/heads/master
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'smart clone respects namespace' '
|
|
|
|
git clone "$HTTPD_URL/smart_namespace/repo.git" ns-smart &&
|
|
|
|
echo namespaced >expect &&
|
|
|
|
git --git-dir=ns-smart/.git log -1 --format=%s >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'dumb clone via http-backend respects namespace' '
|
|
|
|
git --git-dir="$HTTPD_DOCUMENT_ROOT_PATH/repo.git" \
|
|
|
|
config http.getanyfile true &&
|
|
|
|
GIT_SMART_HTTP=0 git clone \
|
|
|
|
"$HTTPD_URL/smart_namespace/repo.git" ns-dumb &&
|
|
|
|
echo namespaced >expect &&
|
|
|
|
git --git-dir=ns-dumb/.git log -1 --format=%s >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2013-07-24 00:40:17 +02:00
|
|
|
cat >cookies.txt <<EOF
|
|
|
|
127.0.0.1 FALSE /smart_cookies/ FALSE 0 othername othervalue
|
|
|
|
EOF
|
|
|
|
cat >expect_cookies.txt <<EOF
|
|
|
|
|
|
|
|
127.0.0.1 FALSE /smart_cookies/ FALSE 0 othername othervalue
|
|
|
|
127.0.0.1 FALSE /smart_cookies/repo.git/info/ FALSE 0 name value
|
|
|
|
EOF
|
|
|
|
test_expect_success 'cookies stored in http.cookiefile when http.savecookies set' '
|
|
|
|
git config http.cookiefile cookies.txt &&
|
|
|
|
git config http.savecookies true &&
|
|
|
|
git ls-remote $HTTPD_URL/smart_cookies/repo.git master &&
|
2015-03-20 11:06:44 +01:00
|
|
|
tail -3 cookies.txt >cookies_tail.txt &&
|
2013-08-05 17:59:24 +02:00
|
|
|
test_cmp expect_cookies.txt cookies_tail.txt
|
2013-07-24 00:40:17 +02:00
|
|
|
'
|
|
|
|
|
upload-pack: fix transfer.hiderefs over smart-http
When upload-pack advertises the refs (either for a normal,
non-stateless request, or for the initial contact in a
stateless one), we call for_each_ref with the send_ref
function as its callback. send_ref, in turn, calls
mark_our_ref, which checks whether the ref is hidden, and
sets OUR_REF or HIDDEN_REF on the object as appropriate. If
it is hidden, mark_our_ref also returns "1" to signal
send_ref that the ref should not be advertised.
If we are not advertising refs, (i.e., the follow-up
invocation by an http client to send its "want" lines), we
use mark_our_ref directly as a callback to for_each_ref. Its
marking does the right thing, but when it then returns "1"
to for_each_ref, the latter interprets this as an error and
stops iterating. As a result, we skip marking all of the
refs that come lexicographically after it. Any "want" lines
from the client asking for those objects will fail, as they
were not properly marked with OUR_REF.
To solve this, we introduce a wrapper callback around
mark_our_ref which always returns 0 (even if the ref is
hidden, we want to keep iterating). We also tweak the
signature of mark_our_ref to exclude unnecessary parameters
that were present only to conform to the callback interface.
This should make it less likely for somebody to accidentally
use it as a callback in the future.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-13 05:42:12 +01:00
|
|
|
test_expect_success 'transfer.hiderefs works over smart-http' '
|
|
|
|
test_commit hidden &&
|
|
|
|
test_commit visible &&
|
|
|
|
git push public HEAD^:refs/heads/a HEAD:refs/heads/b &&
|
|
|
|
git --git-dir="$HTTPD_DOCUMENT_ROOT_PATH/repo.git" \
|
|
|
|
config transfer.hiderefs refs/heads/a &&
|
|
|
|
git clone --bare "$HTTPD_URL/smart/repo.git" hidden.git &&
|
|
|
|
test_must_fail git -C hidden.git rev-parse --verify a &&
|
|
|
|
git -C hidden.git rev-parse --verify b
|
|
|
|
'
|
|
|
|
|
2015-05-20 09:36:43 +02:00
|
|
|
# create an arbitrary number of tags, numbered from tag-$1 to tag-$2
|
|
|
|
create_tags () {
|
|
|
|
rm -f marks &&
|
|
|
|
for i in $(test_seq "$1" "$2")
|
2012-04-02 17:17:03 +02:00
|
|
|
do
|
2015-05-20 09:36:43 +02:00
|
|
|
# don't use here-doc, because it requires a process
|
|
|
|
# per loop iteration
|
|
|
|
echo "commit refs/heads/too-many-refs-$1" &&
|
|
|
|
echo "mark :$i" &&
|
|
|
|
echo "committer git <git@example.com> $i +0000" &&
|
|
|
|
echo "data 0" &&
|
|
|
|
echo "M 644 inline bla.txt" &&
|
|
|
|
echo "data 4" &&
|
|
|
|
echo "bla" &&
|
2012-04-02 17:17:03 +02:00
|
|
|
# make every commit dangling by always
|
|
|
|
# rewinding the branch after each commit
|
2015-05-20 09:36:43 +02:00
|
|
|
echo "reset refs/heads/too-many-refs-$1" &&
|
|
|
|
echo "from :$1"
|
2012-04-02 17:17:03 +02:00
|
|
|
done | git fast-import --export-marks=marks &&
|
|
|
|
|
|
|
|
# now assign tags to all the dangling commits we created above
|
2013-10-29 02:23:03 +01:00
|
|
|
tag=$(perl -e "print \"bla\" x 30") &&
|
2013-05-13 00:50:59 +02:00
|
|
|
sed -e "s|^:\([^ ]*\) \(.*\)$|\2 refs/tags/$tag-\1|" <marks >>packed-refs
|
2015-05-20 09:36:43 +02:00
|
|
|
}
|
|
|
|
|
2015-05-26 05:44:04 +02:00
|
|
|
test_expect_success 'create 2,000 tags in the repo' '
|
2015-05-20 09:36:43 +02:00
|
|
|
(
|
|
|
|
cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
|
2015-05-26 05:44:04 +02:00
|
|
|
create_tags 1 2000
|
2012-04-02 17:17:03 +02:00
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2015-03-13 05:57:05 +01:00
|
|
|
test_expect_success CMDLINE_LIMIT \
|
|
|
|
'clone the 2,000 tag repo to check OS command line overflow' '
|
|
|
|
run_with_limited_cmdline git clone $HTTPD_URL/smart/repo.git too-many-refs &&
|
2013-05-13 00:50:59 +02:00
|
|
|
(
|
|
|
|
cd too-many-refs &&
|
2015-03-13 05:57:05 +01:00
|
|
|
git for-each-ref refs/tags >actual &&
|
|
|
|
test_line_count = 2000 actual
|
2013-05-13 00:50:59 +02:00
|
|
|
)
|
2012-04-02 17:17:03 +02:00
|
|
|
'
|
|
|
|
|
2015-03-13 05:57:05 +01:00
|
|
|
test_expect_success 'large fetch-pack requests can be split across POSTs' '
|
|
|
|
GIT_CURL_VERBOSE=1 git -c http.postbuffer=65536 \
|
|
|
|
clone --bare "$HTTPD_URL/smart/repo.git" split.git 2>err &&
|
|
|
|
grep "^> POST" err >posts &&
|
|
|
|
test_line_count = 2 posts
|
|
|
|
'
|
|
|
|
|
http-backend: spool ref negotiation requests to buffer
When http-backend spawns "upload-pack" to do ref
negotiation, it streams the http request body to
upload-pack, who then streams the http response back to the
client as it reads. In theory, git can go full-duplex; the
client can consume our response while it is still sending
the request. In practice, however, HTTP is a half-duplex
protocol. Even if our client is ready to read and write
simultaneously, we may have other HTTP infrastructure in the
way, including the webserver that spawns our CGI, or any
intermediate proxies.
In at least one documented case[1], this leads to deadlock
when trying a fetch over http. What happens is basically:
1. Apache proxies the request to the CGI, http-backend.
2. http-backend gzip-inflates the data and sends
the result to upload-pack.
3. upload-pack acts on the data and generates output over
the pipe back to Apache. Apache isn't reading because
it's busy writing (step 1).
This works fine most of the time, because the upload-pack
output ends up in a system pipe buffer, and Apache reads
it as soon as it finishes writing. But if both the request
and the response exceed the system pipe buffer size, then we
deadlock (Apache blocks writing to http-backend,
http-backend blocks writing to upload-pack, and upload-pack
blocks writing to Apache).
We need to break the deadlock by spooling either the input
or the output. In this case, it's ideal to spool the input,
because Apache does not start reading either stdout _or_
stderr until we have consumed all of the input. So until we
do so, we cannot even get an error message out to the
client.
The solution is fairly straight-forward: we read the request
body into an in-memory buffer in http-backend, freeing up
Apache, and then feed the data ourselves to upload-pack. But
there are a few important things to note:
1. We limit the in-memory buffer to prevent an obvious
denial-of-service attack. This is a new hard limit on
requests, but it's unlikely to come into play. The
default value is 10MB, which covers even the ridiculous
100,000-ref negotation in the included test (that
actually caps out just over 5MB). But it's configurable
on the off chance that you don't mind spending some
extra memory to make even ridiculous requests work.
2. We must take care only to buffer when we have to. For
pushes, the incoming packfile may be of arbitrary
size, and we should connect the input directly to
receive-pack. There's no deadlock problem here, though,
because we do not produce any output until the whole
packfile has been read.
For upload-pack's initial ref advertisement, we
similarly do not need to buffer. Even though we may
generate a lot of output, there is no request body at
all (i.e., it is a GET, not a POST).
[1] http://article.gmane.org/gmane.comp.version-control.git/269020
Test-adapted-from: Dennis Kaarsemaker <dennis@kaarsemaker.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-05-20 09:37:09 +02:00
|
|
|
test_expect_success EXPENSIVE 'http can handle enormous ref negotiation' '
|
2015-05-26 05:44:04 +02:00
|
|
|
(
|
|
|
|
cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
|
|
|
|
create_tags 2001 50000
|
|
|
|
) &&
|
http-backend: spool ref negotiation requests to buffer
When http-backend spawns "upload-pack" to do ref
negotiation, it streams the http request body to
upload-pack, who then streams the http response back to the
client as it reads. In theory, git can go full-duplex; the
client can consume our response while it is still sending
the request. In practice, however, HTTP is a half-duplex
protocol. Even if our client is ready to read and write
simultaneously, we may have other HTTP infrastructure in the
way, including the webserver that spawns our CGI, or any
intermediate proxies.
In at least one documented case[1], this leads to deadlock
when trying a fetch over http. What happens is basically:
1. Apache proxies the request to the CGI, http-backend.
2. http-backend gzip-inflates the data and sends
the result to upload-pack.
3. upload-pack acts on the data and generates output over
the pipe back to Apache. Apache isn't reading because
it's busy writing (step 1).
This works fine most of the time, because the upload-pack
output ends up in a system pipe buffer, and Apache reads
it as soon as it finishes writing. But if both the request
and the response exceed the system pipe buffer size, then we
deadlock (Apache blocks writing to http-backend,
http-backend blocks writing to upload-pack, and upload-pack
blocks writing to Apache).
We need to break the deadlock by spooling either the input
or the output. In this case, it's ideal to spool the input,
because Apache does not start reading either stdout _or_
stderr until we have consumed all of the input. So until we
do so, we cannot even get an error message out to the
client.
The solution is fairly straight-forward: we read the request
body into an in-memory buffer in http-backend, freeing up
Apache, and then feed the data ourselves to upload-pack. But
there are a few important things to note:
1. We limit the in-memory buffer to prevent an obvious
denial-of-service attack. This is a new hard limit on
requests, but it's unlikely to come into play. The
default value is 10MB, which covers even the ridiculous
100,000-ref negotation in the included test (that
actually caps out just over 5MB). But it's configurable
on the off chance that you don't mind spending some
extra memory to make even ridiculous requests work.
2. We must take care only to buffer when we have to. For
pushes, the incoming packfile may be of arbitrary
size, and we should connect the input directly to
receive-pack. There's no deadlock problem here, though,
because we do not produce any output until the whole
packfile has been read.
For upload-pack's initial ref advertisement, we
similarly do not need to buffer. Even though we may
generate a lot of output, there is no request body at
all (i.e., it is a GET, not a POST).
[1] http://article.gmane.org/gmane.comp.version-control.git/269020
Test-adapted-from: Dennis Kaarsemaker <dennis@kaarsemaker.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-05-20 09:37:09 +02:00
|
|
|
git -C too-many-refs fetch -q --tags &&
|
|
|
|
(
|
|
|
|
cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
|
|
|
|
create_tags 50001 100000
|
|
|
|
) &&
|
|
|
|
git -C too-many-refs fetch -q --tags &&
|
|
|
|
git -C too-many-refs for-each-ref refs/tags >tags &&
|
|
|
|
test_line_count = 100000 tags
|
|
|
|
'
|
|
|
|
|
2009-10-31 01:47:47 +01:00
|
|
|
stop_httpd
|
|
|
|
test_done
|