Skip to content
Snippets Groups Projects
Commit 54a16df0 authored by Heikki Linnakangas's avatar Heikki Linnakangas
Browse files

Make the pg_rewind regression tests more robust on slow systems.

There were a couple of hard-coded sleeps in the tests: to wait for standby
to catch up with master, and to wait for promotion with "pg_ctl promote"
to complete. Instead of a fixed, hard-coded sleep, poll the server with a
query once a second. This isn't ideal either, and I wish we had a better
solution for real-world applications too, but this should fix the
immediate problem.

Patch by Michael Paquier, with some editing by me.
parent cef939c3
No related branches found
No related tags found
No related merge requests found
......@@ -125,6 +125,37 @@ sub check_query
}
}
# Run a query once a second, until it returns 't' (i.e. SQL boolean true).
sub poll_query_until
{
my ($query, $connstr) = @_;
my $max_attempts = 30;
my $attempts = 0;
my ($stdout, $stderr);
while ($attempts < $max_attempts)
{
my $cmd = ['psql', '-At', '-c', "$query", '-d', "$connstr" ];
my $result = run $cmd, '>', \$stdout, '2>', \$stderr;
chomp($stdout);
if ($stdout eq "t")
{
return 1;
}
# Wait a second before retrying.
sleep 1;
$attempts++;
}
# The query result didn't change in 30 seconds. Give up. Print the stderr
# from the last attempt, hopefully that's useful for debugging.
diag $stderr;
return 0;
}
sub append_to_file
{
my($filename, $str) = @_;
......@@ -185,7 +216,7 @@ sub create_standby
# Base backup is taken with xlog files included
system_or_bail("pg_basebackup -D $test_standby_datadir -p $port_master -x >>$log_path 2>&1");
append_to_file("$test_standby_datadir/recovery.conf", qq(
primary_conninfo='$connstr_master'
primary_conninfo='$connstr_master application_name=rewind_standby'
standby_mode=on
recovery_target_timeline='latest'
));
......@@ -193,8 +224,11 @@ recovery_target_timeline='latest'
# Start standby
system_or_bail("pg_ctl -w -D $test_standby_datadir -o \"-k $tempdir_short --listen-addresses='' -p $port_standby\" start >>$log_path 2>&1");
# sleep a bit to make sure the standby has caught up.
sleep 1;
# Wait until the standby has caught up with the primary, by polling
# pg_stat_replication.
my $caughtup_query = "SELECT pg_current_xlog_location() = replay_location FROM pg_stat_replication WHERE application_name = 'rewind_standby';";
poll_query_until($caughtup_query, $connstr_master)
or die "Timed out while waiting for standby to catch up";
}
sub promote_standby
......@@ -203,9 +237,11 @@ sub promote_standby
# up standby
# Now promote slave and insert some new data on master, this will put
# the master out-of-sync with the standby.
# the master out-of-sync with the standby. Wait until the standby is
# out of recovery mode, and is ready to accept read-write connections.
system_or_bail("pg_ctl -w -D $test_standby_datadir promote >>$log_path 2>&1");
sleep 2;
poll_query_until("SELECT NOT pg_is_in_recovery()", $connstr_standby)
or die "Timed out while waiting for promotion of standby";
}
sub run_pg_rewind
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment