From 23aee4199a69fae861f9d173ee0ef1c2a7fab0d5 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Thu, 19 Nov 2015 09:58:08 +0100 Subject: [PATCH 1/3] git-p4: retry kill/cleanup operations in tests with timeout In rare cases kill/cleanup operations in tests fail. Retry these operations with a timeout to make the test less flaky. Signed-off-by: Lars Schneider Signed-off-by: Jeff King --- t/lib-git-p4.sh | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/t/lib-git-p4.sh b/t/lib-git-p4.sh index 75482254a3..3c9ad9adcf 100644 --- a/t/lib-git-p4.sh +++ b/t/lib-git-p4.sh @@ -6,6 +6,10 @@ # a subdirectory called "$git" TEST_NO_CREATE_REPO=NoThanks +# Some operations require multiple attempts to be successful. Define +# here the maximal retry timeout in seconds. +RETRY_TIMEOUT=60 + . ./test-lib.sh if ! test_have_prereq PYTHON @@ -36,6 +40,15 @@ native_path() { echo "$path" } +# On Solaris the 'date +%s' function is not supported and therefore we +# need this replacement. +# Attention: This function is not safe again against time offset updates +# at runtime (e.g. via NTP). The 'clock_gettime(CLOCK_MONOTONIC)' +# function could fix that but it is not in Python until 3.3. +time_in_seconds() { + python -c 'import time; print int(time.time())' +} + # Try to pick a unique port: guess a large number, then hope # no more than one of each test is running. # @@ -121,22 +134,35 @@ p4_add_user() { EOF } +retry_until_success() { + timeout=$(($(time_in_seconds) + $RETRY_TIMEOUT)) + until "$@" 2>/dev/null || test $(time_in_seconds) -gt $timeout + do + sleep 1 + done +} + +retry_until_fail() { + timeout=$(($(time_in_seconds) + $RETRY_TIMEOUT)) + until ! "$@" 2>/dev/null || test $(time_in_seconds) -gt $timeout + do + sleep 1 + done +} + kill_p4d() { pid=$(cat "$pidfile") - # it had better exist for the first kill - kill $pid && - for i in 1 2 3 4 5 ; do - kill $pid >/dev/null 2>&1 || break - sleep 1 - done && + retry_until_fail kill $pid + retry_until_fail kill -9 $pid # complain if it would not die test_must_fail kill $pid >/dev/null 2>&1 && rm -rf "$db" "$cli" "$pidfile" } cleanup_git() { - rm -rf "$git" && - mkdir "$git" + retry_until_success rm -r "$git" + test_must_fail test -d "$git" && + retry_until_success mkdir "$git" } marshal_dump() { From 842addef70e41f8bb8a16f4d9084432301c3f50f Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Thu, 19 Nov 2015 09:58:09 +0100 Subject: [PATCH 2/3] git-p4: add p4d timeout in tests In rare cases p4d seems to hang. This watchdog will kill the p4d process after 300s in any case. That means each individual git p4 test needs to finish before 300s or it will fail. Signed-off-by: Lars Schneider Acked-by: Luke Diamand Signed-off-by: Jeff King --- t/lib-git-p4.sh | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/t/lib-git-p4.sh b/t/lib-git-p4.sh index 3c9ad9adcf..acd5578eb5 100644 --- a/t/lib-git-p4.sh +++ b/t/lib-git-p4.sh @@ -10,6 +10,10 @@ TEST_NO_CREATE_REPO=NoThanks # here the maximal retry timeout in seconds. RETRY_TIMEOUT=60 +# Sometimes p4d seems to hang. Terminate the p4d process automatically after +# the defined timeout in seconds. +P4D_TIMEOUT=300 + . ./test-lib.sh if ! test_have_prereq PYTHON @@ -94,6 +98,19 @@ start_p4d() { # will be caught with the "kill -0" check below. i=${P4D_START_PATIENCE:-300} pid=$(cat "$pidfile") + + timeout=$(($(time_in_seconds) + $P4D_TIMEOUT)) + while true + do + if test $(time_in_seconds) -gt $timeout + then + kill -9 $pid + exit 1 + fi + sleep 1 + done & + watchdog_pid=$! + ready= while test $i -gt 0 do @@ -156,7 +173,8 @@ kill_p4d() { retry_until_fail kill -9 $pid # complain if it would not die test_must_fail kill $pid >/dev/null 2>&1 && - rm -rf "$db" "$cli" "$pidfile" + rm -rf "$db" "$cli" "$pidfile" && + retry_until_fail kill -9 $watchdog_pid } cleanup_git() { From dfe90e8b528be4d9668b1bdc6329f8227cbf307a Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Thu, 19 Nov 2015 09:58:10 +0100 Subject: [PATCH 3/3] git-p4: add trap to kill p4d on test exit Sometimes the "prove" test runner hangs on test exit because p4d is still running. Add a trap to always kill "p4d" on test exit. You can reproduce the problem by commenting "P4D_TIMEOUT" in "lib-git-p4.sh" and running "prove ./t9800-git-p4-basic.sh". Signed-off-by: Lars Schneider Signed-off-by: Jeff King --- t/lib-git-p4.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/t/lib-git-p4.sh b/t/lib-git-p4.sh index acd5578eb5..f9ae1d780d 100644 --- a/t/lib-git-p4.sh +++ b/t/lib-git-p4.sh @@ -74,6 +74,15 @@ cli="$TRASH_DIRECTORY/cli" git="$TRASH_DIRECTORY/git" pidfile="$TRASH_DIRECTORY/p4d.pid" +# Sometimes "prove" seems to hang on exit because p4d is still running +cleanup() { + if test -f "$pidfile" + then + kill -9 $(cat "$pidfile") 2>/dev/null && exit 255 + fi +} +trap cleanup EXIT + # git p4 submit generates a temp file, which will # not get cleaned up if the submission fails. Don't # clutter up /tmp on the test machine.