File SAPHanaSR-testDriver of Package SAPHanaSR.2773
#!/usr/bin/perl
#
# SAPHanaSR-testDriver
# (c) 2014 SUSE Linux Products GmbH, Nuremberg, Germany
# (c) 2015 SUSE Linux GmbH, Nuremberg, Germany
# Author: Fabian Herschel <fabian.herschel@suse.com>
# License: GPL v2+
my $Version="0.16.2016.02.05.1";
#
##################################################################
# THIS PROGRAM IS NOT INTENDED TO RUN IN PRODUCTIVE ENVIRONMENTS
# AS IT TESTS THE FUNCTIONALITY OF THE SAPHanaSR RESOURCE AGENTS
# THEIRFORE IT STOPS, KILLS AND EVEN MORE SAP HANA INSTANCES AND
# ALSO SHUTDOWN, FENCES OR BLOCK CLUSTER NODES
##################################################################
use POSIX;
use strict;
use Sys::Syslog;
use Sys::Hostname;
use File::Path;
use Getopt::Long;
use lib '/usr/share/SAPHanaSR/tests';
use SAPHanaSRTools;
###################################
my $ClusterNodes=2;
my $ClusterPrimaries=1;
my $ClusterSecondaries=1;
my %Name;
my $host = hostname();
my %Host;
my %Site;
my %Global;
my %HName;
my %SName;
my %GName;
my $varlib='/var/lib/SAPHanaTD';
my $testfile='SAPHanaTD.status';
my $testcount=0;
my $testCase="all";
my @testCaseSetALL = ("cmp","ksi2","kpi11","ksi11","kpi9","ksi9","ssi","spi","ssns","spns","rms","sps","sss");
my @testCaseSet = @testCaseSetALL;
my $standbyHanas=0;
my $msl;
my $first_test=1;
my $sid;
my @sids;
my $ino;
my @primaryHanaList;
my @secondaryHanaList;
my $testName="";
my %testCaseCount;
my $help=0;
my $result = GetOptions ("sid=s" => \@sids,
"nodes=s" => \$ClusterNodes,
"case=s" => \$testCase,
"help" => \$help,
);
if ( $help ) {
printf("SAPHanaSR-testDriver [--help] [--sid=<sid[:instNr]>] [--nodes=<number of nodes>] [--case=<list of test cases>]\n");
printf(" valid test cases are: all,%s\n", join (",", @testCaseSetALL));
exit 0;
}
if ( $testCase ne "all" ) {
@testCaseSet = split("," , $testCase);
}
printf "Testcases: %s\n", join (",", @testCaseSet);
my $message;
my $phost;
my $shost;
my $sSite="";
my $pSite="";
my $testActive=0;
sub init()
{
mkpath($varlib, { mode => 0700, });
open(STATFILE, "<", "$varlib/$testfile");
while (<STATFILE>) {
if ( /^testnr=(.+)/ ) {
$testcount=$1;
}
}
$msl="msl_";
#$msl="msl_SAPHana_SLE_HDB00";
#crm resource list
# stonith_fscs98 (stonith:external/ipmi): Started
# stonith_fscs99 (stonith:external/ipmi): Started
# rsc_ip_SLE_HDB00 (ocf::heartbeat:IPaddr2): Started
# Master/Slave Set: msl_SAPHana_SLE_HDB00 [rsc_SAPHana_SLE_HDB00]
# TODO: If we can pick all cluster cmd-output from a remote system we would be able to
# run the testDriver on a cluster-external system. This would include the possibility
# to test also node reboots and re-joins
open crmRList, "crm resource list |";
while (<crmRList>) {
#printf "%s", $_;
if (/^\s*Master.Slave Set:\s+(\S+)\s+/) {
printf "master-slave resource found: <%s>\n", $1;
$msl=$1;
}
}
close crmRList;
close STATFILE;
return 0;
}
my $ident = "fhTD";
my $logopt = "pid,perror";
my $facility = "LOCAL0";
my $priority = "info";
openlog $ident, $logopt, $facility; # don't forget this
my $sid="";
my $INr="";
if ( 0 == @sids ) {
my $sid_ino_list;
( $sid_ino_list ) = get_sid_and_InstNr();
@sids = split(",", $sid_ino_list);
}
sub run_test_stop_hana( $ $ $ )
{
my $sid=shift;
my $theHost=shift;
my $testNr=shift;
my $rc=0;
my $SID=toupper($sid);
my $command="HDB stop";
mysyslog $priority, "%s", "TEST Try to stop HDB at $theHost testnr=$testNr";
mysyslog $priority, "%s", "TEST ssh $theHost \"su - ${sid}adm -c '$command'\"";
system("ssh $theHost \"su - ${sid}adm -c '$command'\"");
mysyslog $priority, "%s", "TEST Stopped HDB at $theHost testnr=$testNr";
return $rc;
}
sub run_test_stop_hana_system( $ $ $ )
{
my $sid=shift;
my $theHost=shift;
my $testNr=shift;
my $rc=0;
my $SID=toupper($sid);
my $command="sapcontrol -nr $INr -function StopSystem";
mysyslog $priority, "%s", "TEST Try to stop HANA SYSTEM at $theHost testnr=$testNr";
mysyslog $priority, "%s", "TEST ssh $theHost \"su - ${sid}adm -c '$command'\"";
system("ssh $theHost \"su - ${sid}adm -c '$command'\"");
mysyslog $priority, "%s", "TEST Stopped HDB at $theHost testnr=$testNr";
}
#
# kill a single instance
#
sub run_test_kill_hana( $ $ $ $ )
{
my $sid=shift;
my $theHost=shift;
my $testNr=shift;
my $signal=shift;
my $rc=0;
my $SID=toupper($sid);
my $command="HDB kill-$signal";
mysyslog $priority, "%s", "TEST Try to kill-$signal HDB at $theHost testnr=$testNr";
mysyslog $priority, "%s", "TEST ssh $theHost \"su - ${sid}adm -c '$command'\"";
system("ssh $theHost \"su - ${sid}adm -c '$command'\"");
mysyslog $priority, "%s", "TEST Killed HDB at $theHost testnr=$testNr";
return $rc;
}
sub run_test_standby_node( $ $ $ )
{
my $sid=shift;
my $node=shift;
my $testNr=shift;
my $rc=0;
if ( get_nodes_online() == 2 ) {
mysyslog $priority, "%s", "Try standby $node testnr=$testNr";
system("crm node standby $node");
while ( get_nodes_online() != 1 ) {
mysyslog $priority, "%s", "Wait for standby status for $node testnr=$testNr";
sleep 10;
}
mysyslog $priority, "%s", "Set standby for $node testnr=$testNr";
# TODO: We should wait till S_IDLE - for now we wait for 3 minutes
mysyslog $priority, "%s", "sleeping 180s testnr=$testNr";
sleep 180;
mysyslog $priority, "%s", "Try set online for $node testnr=$testNr";
system("crm node online $node");
while ( get_nodes_online() != 2 ) {
mysyslog $priority, "%s", "Wait for online status for $node testnr=$testNr";
sleep 10;
}
mysyslog $priority, "%s", "Set online for $node testnr=$testNr";
}
return $rc;
}
sub run_test_stop_masterslave( $ $ )
{
my $msl=shift;
my $testNr=shift;
mysyslog $priority, "%s", "Try stop msl $msl testnr=$testNr";
system("crm resource stop $msl");
mysyslog $priority, "%s", "sleeping 180s testnr=$testNr";
sleep 180;
mysyslog $priority, "%s", "Try start msl $msl testnr=$testNr";
system("crm resource start $msl");
return 0;
}
sub run_test_cluster_maintenance_procedure( $ )
{
my $testNr=shift;
# crm node maintenance fscs98; crm node maintenance fscs99;
# for clN in fscs99 fscs98; do ssh $clN "hostname; rcopenais stop"; done;
# sleep 60;
# for clN in fscs98 fscs99; do ssh $clN "hostname; rcopenais start"; done;
# sleep 60;
# crm node ready fscs99; crm node ready fscs98
my $message;
my @nodes = get_node_list();
my $node;
$message="TEST Nodes for maintenance: " . join(",", @nodes);
mysyslog $priority, "%s", $message;
mysyslog $priority, "TEST Set MAINTENANCE on all nodes","";
for $node ( @nodes ) {
mysyslog $priority, "TEST Set MAINTENANCE on %s","$node";
system ("crm node maintenance $node");
}
sleep (10);
mysyslog $priority, "TEST Stopping cluster on all nodes","";
for $node ( @nodes ) {
system("ssh $node \"/usr/sbin/rcopenais stop\"");
}
sleep (60);
mysyslog $priority, "TEST Starting cluster on all nodes","";
for $node ( reverse(@nodes) ) {
system("ssh $node \"/usr/sbin/rcopenais start\"");
}
sleep (60);
mysyslog $priority, "TEST Set READY on all nodes","";
for $node ( @nodes ) {
mysyslog $priority, "TEST Set READY on %s","$node";
system ("crm node ready $node");
}
sleep (10);
return 0;
}
sub run_testCase($)
{
my $testCase=shift;
my $tc=0;
my $message;
if ( defined $testCaseCount{$testCase} ) {
$testCaseCount{$testCase}++;
} else {
$testCaseCount{$testCase}=1;
}
$tc=$testCaseCount{$testCase};
if (( $testCase == 1 ) || ( $testCase eq "kpi2" )) {
$testName = "Kill-2 primary instances";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_kill_hana($sid, $phost, $testcount, 2);
} elsif (( $testCase == 2 ) || ( $testCase eq "ksi2" )) {
$testName = "Kill-2 secondary instances";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_kill_hana($sid, $shost, $testcount, 2);
} elsif (( $testCase == 3 ) || ( $testCase eq "kpi11" )) {
$testName = "Kill-11 primary instances";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_kill_hana($sid, $phost, $testcount, 11);
} elsif (( $testCase == 4 ) || ( $testCase eq "ksi11" )) {
$testName = "Kill-11 secondary instances";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_kill_hana($sid, $shost, $testcount, 11);
} elsif (( $testCase == 5 ) || ( $testCase eq "kpi9" )) {
$testName = "Kill-9 primary instances";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_kill_hana($sid, $phost, $testcount, 9);
} elsif (( $testCase == 6 ) || ( $testCase eq "ksi9" )) {
$testName = "Kill-9 secondary instances";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_kill_hana($sid, $shost, $testcount, 9);
} elsif (( $testCase == 7 ) || ( $testCase eq "ssi" )) {
$testName = "Stop secondary instance";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_stop_hana($sid, $shost, $testcount);
$testActive=0;
} elsif (( $testCase == 8 ) || ( $testCase eq "spi" )) {
$testName = "Stop primary instance";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_stop_hana($sid, $phost, $testcount);
$testActive=0;
} elsif (( $testCase == 9 ) || ( $testCase eq "ssns" )) {
$testName = "Set secondary node standby";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_standby_node($sid, $shost, $testcount);
$testActive=0;
} elsif (( $testCase == 10 ) || ( $testCase eq "spns" )) {
$testName = "Set primary node standby";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_standby_node($sid, $phost, $testcount);
$testActive=0;
} elsif (( $testCase == 11 ) || ( $testCase eq "rms" )) {
$testName = "Restart master/slave";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_stop_masterslave($msl, $testcount);
} elsif (( $testCase == 12 ) || ( $testCase eq "sps" )) {
$testName = "Stop primary system";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_stop_hana_system($sid, $phost, $testcount);
} elsif (( $testCase == 13 ) || ( $testCase eq "sss" )) {
$testName = "Stop secondary system";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_stop_hana_system($sid, $shost, $testcount);
} elsif (( $testCase == 14 ) || ( $testCase eq "cmp" )) {
$testName = "Cluster maintenance procedure";
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
run_test_cluster_maintenance_procedure($testcount);
$testActive=0;
} else {
$testName = "NOP";
$testActive=0;
$message="Run testcase $testCase - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' count=$tc testnr=$testcount";
mysyslog $priority, "%s", $message;
}
}
init();
mysyslog $priority, "fhTD: Tests running. Next TestNr=%i", $testcount;
my $goodloops=0;
my $badloops=0;
while ( 1==1 ) {
($sid, $INr) = split(":", $sids[0]);
get_hana_attributes($sid);
$phost=get_host_primary($sid, "1234");
$shost=get_host_secondary($sid, "1234");
$pSite=get_site_by_host($sid, $phost);
$sSite=get_site_by_host($sid, $shost);
$standbyHanas = get_number_HANA_standby ($sid, $pSite);
#mysyslog $priority, "%s", "hanaStandby=$hanaStandby";
@primaryHanaList = sort(get_HANA_nodes($sid, $pSite));
@secondaryHanaList = sort(get_HANA_nodes($sid, $sSite));
my $primaryHanaList = join (" ", @primaryHanaList);
my $secondaryHanaList = join (" ", @secondaryHanaList);
#$message="HANA nodes primary site $pSite: $primaryHanaList";
#mysyslog $priority, "%s", $message;
#$message="HANA nodes secondary site $sSite: $secondaryHanaList";
#mysyslog $priority, "%s", $message;
my ($checkOK, $failures ) = check_all_ok($sid, $ClusterNodes);
if ( ( $checkOK == 0 ) && ( $testActive==0 ) ) {
#
# OK back again after it failed
#
$badloops=0;
$goodloops++;
$message="All checks passed - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' loop=$goodloops testnr=$testcount";
mysyslog $priority, "%s", $message;
if ( ( $goodloops >= 12 ) || ( $first_test == 1 ) ) {
$testcount++;
$first_test=0;
# TODO: Later add other values for writing the status file (now only testnr)
$message="STATUS primary=$phost ($pSite) secondary=$shost ($sSite) testnr=$testcount";
mysyslog $priority, "%s", $message;
open(STATFILE, ">", "$varlib/$testfile");
printf STATFILE "testnr=%i\n", $testcount;
close STATFILE;
my $maxTestCase=10;
# $testCase=$testcount % $maxTestCase + 1 ;
#$testCase=1;
$testActive=1;
my $testCaseMaxIndex = @testCaseSet;
my $testCaseIndex = int(rand($testCaseMaxIndex));
my $theTestCase = $testCaseSet[$testCaseIndex];
$message="TestCase=$theTestCase testnr=$testcount";
mysyslog $priority, "%s", $message;
run_testCase($theTestCase);
$goodloops=0;
}
} elsif ( ( $checkOK == 0 ) && ( $testActive==1 ) ) {
#
# STILL OK after a test, so cluster did not already catched the problem
#
$goodloops++;
$message="Still all checks passed - primary=$phost ($pSite) secondary=$shost ($sSite) test=$testName loop=$goodloops testnr=$testcount";
mysyslog $priority, "%s", $message;
} else {
$goodloops=0;
$badloops++;
$testActive=0;
if ( $checkOK == 1 ) {
$message="$checkOK check failed ($failures) - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' loop=$badloops testnr=$testcount";
} else {
$message="$checkOK checks failed ($failures) - primary=$phost ($pSite) secondary=$shost ($sSite) test='$testName' loop=$badloops testnr=$testcount";
}
mysyslog $priority, "%s", $message;
}
sleep 10;
}
#$oldmask = setlogmask $mask_priority;
closelog;