File s390-tools-sles15sp3-01-dbginfo.sh-add-kvm-data-collection.patch of Package s390-tools.27266

Subject: [PATCH] [BZ 195579] dbginfo.sh: Add KVM commands and rework data collection
From: Joern Siglen <siglen@de.ibm.com>

Description:   dbginfo.sh: stabilzation of data collection
Symptom:       o script hangup possible on single commands
               o missing data collection on error
               o overwrite of buffers by diag commands
Problem:       hangup and lost of data collection
Solution:      rework code regarding 
                o add timeout function
                o remove probelmatic commands
                o include handling improvements         
Reproduction:  -
Upstream-ID:   b7807d019514cd6ddf790d047170bbc709d8815d
Problem-ID:    195579

Upstream-Description:

              dbginfo.sh: Add KVM commands and rework data collection

              Add virsh commands for KVM debug data collection and rework the domain
              data collection and step numbering. Update the man page accordingly.

              [hoeppner@linux.ibm.com: Reword commit message]
              Signed-off-by: Joern Siglen <siglen@de.ibm.com>
              Signed-off-by: Jan Hoeppner <hoeppner@linux.ibm.com>


Signed-off-by: Joern Siglen <siglen@de.ibm.com>
--- s390-tools-service.orig/scripts/dbginfo.sh
+++ s390-tools-service/scripts/dbginfo.sh
@@ -35,7 +35,7 @@ print_usage()
     cat <<EOF
 
 
-Usage: ${SCRIPTNAME} [OPTIONS]
+Usage: ${SCRIPTNAME} [OPTION]
 
 This script collects runtime, configuration and trace information on
 a Linux on IBM Z installation for debugging purposes.
@@ -65,16 +65,9 @@ Please report bugs to: linux390@de.ibm.c
 EOF
 }
 
-######################################
-# Verification to run as root
-#
-if test "$(/usr/bin/id -u 2>/dev/null)" -ne 0; then
-    echo "${SCRIPTNAME}: Error: You must be user root to run \"${SCRIPTNAME}\"!"
-    exit 1
-fi
 
 #######################################
-# Parsing the command line
+# Parsing the command line and pre checks
 #
 paramWORKDIR_BASE="/tmp/"
 
@@ -90,7 +83,14 @@ while [ ${#} -gt 0 ]; do
 	    ;;
 	--directory|-d)
 	    paramWORKDIR_BASE=${2}
-	    shift
+	    if test -z "${paramWORKDIR_BASE}"; then
+	        echo "${SCRIPTNAME}: Error: No directory specified for data collection!"
+		echo
+		exit 1
+	    else
+	        # jump to next param, if already last the final shift can do termination
+		shift
+	    fi
 	    ;;
 	-*|--*|*)
 	    echo
@@ -100,20 +100,23 @@ while [ ${#} -gt 0 ]; do
 	    exit 1
 	    ;;
     esac
+    # next parameter
     shift
 done
 
-if test -z "${paramWORKDIR_BASE}"; then
-    echo "${SCRIPTNAME}: Error: No directory specified for data collection!"
-    echo
-    exit 1
-fi
+# check for a valid path
 if test ! -d "${paramWORKDIR_BASE}"; then
     echo "${SCRIPTNAME}: Error: The specified directory \"${paramWORKDIR_BASE}\" does not exist!"
     echo
     exit 1
 fi
 
+# finally verification to run as root
+if test "$(/usr/bin/id -u 2>/dev/null)" -ne 0; then
+    echo "${SCRIPTNAME}: Error: You must be user root to run \"${SCRIPTNAME}\"!"
+    exit 1
+fi
+
 
 ########################################
 # Global used variables
@@ -188,21 +191,18 @@ readonly OUTPUT_FILE_JOURNALCTL="${WORKP
 # File that includes the output of OpenVSwitch
 readonly OUTPUT_FILE_OVS="${WORKPATH}openvswitch"
 
-# File that includes the KVM domain xml file
-readonly OUTPUT_FILE_XML="${WORKPATH}domain_xml"
-
 # File that includes the docker inspect output
 readonly OUTPUT_FILE_DOCKER="${WORKPATH}docker_inspect.out"
 
 # File that includes nvme related information
 readonly OUTPUT_FILE_NVME="${WORKPATH}nvme.out"
 
+# File that includes KVM related information
+readonly OUTPUT_FILE_KVM="${WORKPATH}kvm_runtime.out"
+
 # Mount point of the debug file system
 readonly MOUNT_POINT_DEBUGFS="/sys/kernel/debug"
 
-# The amount of steps running the whole collections
-readonly COLLECTION_COUNT=15
-
 # The kernel version (e.g. '2' from 2.6.32 or '3' from 3.2.1)
 readonly KERNEL_VERSION=$(uname -r 2>/dev/null | cut -d'.' -f1)
 
@@ -236,6 +236,29 @@ else
     readonly RUNTIME_ENVIRONMENT="LPAR"
 fi
 
+# define order of collection steps
+ALL_STEPS="\
+ collect_cmdsout\
+ collect_vmcmdsout\
+ collect_procfs\
+ collect_sysfs\
+ collect_logfiles\
+ collect_configfiles\
+ collect_osaoat\
+ collect_ethtool\
+ collect_tc\
+ collect_bridge\
+ collect_ovs\
+ collect_docker\
+ collect_nvme\
+ collect_kvm\
+ post_processing\
+ create_package\
+ environment_cleanup\
+ "
+
+# The amount of steps running the whole collections, without last cleanup
+readonly COLLECTION_COUNT=`expr $(echo ${ALL_STEPS} | wc -w) - 1`
 
 ########################################
 
@@ -319,6 +342,7 @@ LOGFILES="\
   /var/log/IBMtape.trace\
   /var/log/IBMtape.errorlog\
   /var/log/libvirt\
+  /sys/module/kvm/parameters\
   /var/log/lin_tape.trace\
   /var/log/lin_tape.errorlog\
   /var/log/messages*\
@@ -387,7 +411,6 @@ CONFIGFILES="\
   "
 
 ########################################
-
 CMDS="uname -a\
   :uptime\
   :runlevel\
@@ -442,7 +465,7 @@ CMDS="uname -a\
   :lsdasd -u\
   :ziorep_config -ADM\
   :lsmod\
-  :lsdev\
+  :lszdev\
   :lsscsi\
   :lstape\
   :lszfcp\
@@ -490,7 +513,6 @@ CMDS="uname -a\
   "
 
 ########################################
-
 VM_CMDS="q userid\
   :q users\
   :q privclass\
@@ -556,8 +578,21 @@ VM_CMDS="q userid\
   :ind load\
   :ind sp\
   :ind user\
+  :qemu-ga -V\
   "
 ###############################################################################
+KVM_CMDS="virsh version\
+  :virsh nodeinfo\
+  :virsh nodememstats\
+  :virsh nodecpustats\
+  :virsh list --all\
+  :virsh iface-list\
+  :virsh net-list\
+  :virsh nwfilter-list\
+  :virsh nodedev-list --tree\
+  :virsh pool-list\
+  :virt-host-validate\
+  "
 
 ########################################
 collect_cmdsout() {
@@ -565,7 +600,7 @@ collect_cmdsout() {
     local ifs_orig
 
     ifs_orig="${IFS}"
-    pr_syslog_stdout "1 of ${COLLECTION_COUNT}: Collecting command output"
+    pr_syslog_stdout "${step_num} Collecting command output"
 
     IFS=:
     for cmd in ${CMDS}; do
@@ -597,7 +632,7 @@ collect_vmcmdsout() {
     ifs_orig="${IFS}"
 
     if echo "${RUNTIME_ENVIRONMENT}" | grep -qi "z/VM" >/dev/null 2>&1; then
-	pr_syslog_stdout "2 of ${COLLECTION_COUNT}: Collecting z/VM command output"
+	pr_syslog_stdout "${step_num} Collecting z/VM command output"
 
 	if which vmcp >/dev/null 2>&1; then
 	    cp_command="vmcp"
@@ -642,7 +677,7 @@ collect_vmcmdsout() {
 	    rmmod vmcp
 	fi
     else
-	pr_syslog_stdout "2 of ${COLLECTION_COUNT}: Collecting z/VM command output skipped - no z/VM environment"
+	pr_syslog_stdout "${step_num} Collecting z/VM command output skipped - no z/VM environment"
     fi
 
     pr_log_stdout " "
@@ -653,7 +688,7 @@ collect_vmcmdsout() {
 collect_procfs() {
     local file_name
 
-    pr_syslog_stdout "3 of ${COLLECTION_COUNT}: Collecting procfs"
+    pr_syslog_stdout "${step_num} Collecting procfs"
 
     for file_name in ${PROCFILES}; do
 	call_collect_file "${file_name}"
@@ -672,7 +707,7 @@ collect_sysfs() {
     debugfs_mounted=0
     # Requires kernel version newer then 2.4
     if test "${LINUX_SUPPORT_SYSFS}" -eq 0; then
-	pr_syslog_stdout "4 of ${COLLECTION_COUNT}: Collecting sysfs"
+	pr_syslog_stdout "${step_num} Collecting sysfs"
 	# Requires kernel version of 2.6.13 or newer
 	if test "${LINUX_SUPPORT_SYSFSDBF}" -eq 0; then
 	    if ! grep -qE "${MOUNT_POINT_DEBUGFS}.*debugfs" /proc/mounts 2>/dev/null; then
@@ -713,7 +748,7 @@ collect_sysfs() {
 	    umount "${MOUNT_POINT_DEBUGFS}"
 	fi
     else
-	pr_syslog_stdout "4 of ${COLLECTION_COUNT}: Collecting sysfs skipped. Kernel $(uname -r) must be newer than 2.4"
+	pr_syslog_stdout "${step_num} Collecting sysfs skipped. Kernel $(uname -r) must be newer than 2.4"
     fi
 
     pr_log_stdout " "
@@ -724,7 +759,7 @@ collect_sysfs() {
 collect_logfiles() {
     local file_name
 
-    pr_syslog_stdout "5 of ${COLLECTION_COUNT}: Collecting log files"
+    pr_syslog_stdout "${step_num} Collecting log files"
 
     for file_name in ${LOGFILES}; do
 	call_collect_file "${file_name}"
@@ -738,7 +773,7 @@ collect_logfiles() {
 collect_configfiles() {
     local file_name
 
-    pr_syslog_stdout "6 of ${COLLECTION_COUNT}: Collecting config files"
+    pr_syslog_stdout "${step_num} Collecting config files"
 
     for file_name in ${CONFIGFILES}; do
 	call_collect_file "${file_name}"
@@ -757,16 +792,16 @@ collect_osaoat() {
                      | sed 's/.*:[[:space:]]\+\([^[:space:]]*\)[[:space:]]\+/\1/g')
     if which qethqoat >/dev/null 2>&1; then
 	if test -n "${network_devices}"; then
-	    pr_syslog_stdout "7 of ${COLLECTION_COUNT}: Collecting osa oat output"
+	    pr_syslog_stdout "${step_num} Collecting osa oat output"
 	    for network_device in ${network_devices}; do
 		call_run_command "qethqoat ${network_device}" "${OUTPUT_FILE_OSAOAT}.out" &&
 		call_run_command "qethqoat -r ${network_device}" "${OUTPUT_FILE_OSAOAT}_${network_device}.raw"
 	    done
 	else
-	    pr_syslog_stdout "7 of ${COLLECTION_COUNT}: Collecting osa oat output skipped - no devices"
+	    pr_syslog_stdout "${step_num} Collecting osa oat output skipped - no devices"
 	fi
     else
-	pr_syslog_stdout "7 of ${COLLECTION_COUNT}: Collecting osa oat output skipped - not available"
+	pr_syslog_stdout "${step_num} Collecting osa oat output skipped - not available"
     fi
 
     pr_log_stdout " "
@@ -780,7 +815,7 @@ collect_ethtool() {
     network_devices=$(ls /sys/class/net 2>/dev/null)
     if which ethtool >/dev/null 2>&1; then
 	if test -n "${network_devices}"; then
-	    pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting ethtool output"
+	    pr_syslog_stdout "${step_num} Collecting ethtool output"
 	    for network_device in ${network_devices}; do
 		call_run_command "ethtool ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
 		call_run_command "ethtool -k ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
@@ -795,10 +830,10 @@ collect_ethtool() {
 		call_run_command "ethtool -T ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
 	    done
 	else
-	    pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting ethtool output skipped - no devices"
+	    pr_syslog_stdout "${step_num} Collecting ethtool output skipped - no devices"
 	fi
     else
-	pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting ethtool output skipped - not available"
+	pr_syslog_stdout "${step_num} Collecting ethtool output skipped - not available"
     fi
 
     pr_log_stdout " "
@@ -812,15 +847,15 @@ collect_tc() {
     network_devices=$(ls /sys/class/net 2>/dev/null)
     if which tc >/dev/null 2>&1; then
 	if test -n "${network_devices}"; then
-	    pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting tc output"
+	    pr_syslog_stdout "${step_num} Collecting tc output"
 	    for network_device in ${network_devices}; do
 		call_run_command "tc -s qdisc show dev ${network_device}" "${OUTPUT_FILE_TC}"
 	    done
 	else
-	    pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting tc output skipped - no devices"
+	    pr_syslog_stdout "${step_num} Collecting tc output skipped - no devices"
 	fi
     else
-	pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting tc output skipped - not available"
+	pr_syslog_stdout "${step_num} Collecting tc output skipped - not available"
     fi
 
     pr_log_stdout " "
@@ -834,17 +869,17 @@ collect_bridge() {
     network_devices=$(ls /sys/class/net 2>/dev/null)
     if which bridge >/dev/null 2>&1; then
 	if test -n "${network_devices}"; then
-	    pr_syslog_stdout "10 of ${COLLECTION_COUNT}: Collecting bridge output"
+	    pr_syslog_stdout "${step_num} Collecting bridge output"
 	    for network_device in ${network_devices}; do
 		call_run_command "bridge -d link show dev ${network_device}" "${OUTPUT_FILE_BRIDGE}"
 		call_run_command "bridge -s fdb show dev ${network_device}" "${OUTPUT_FILE_BRIDGE}"
 		call_run_command "bridge -d mdb show dev ${network_device}" "${OUTPUT_FILE_BRIDGE}"
 	    done
 	else
-	    pr_syslog_stdout "10 of ${COLLECTION_COUNT}: Collecting bridge output skipped - no devices"
+	    pr_syslog_stdout "${step_num} Collecting bridge output skipped - no devices"
 	fi
     else
-	pr_syslog_stdout "10 of ${COLLECTION_COUNT}: Collecting bridge output skipped - not available"
+	pr_syslog_stdout "${step_num} Collecting bridge output skipped - not available"
     fi
 
     pr_log_stdout " "
@@ -866,7 +901,7 @@ collect_ovs() {
             :ovsdb-client dump\
             "
     if test -n "${br_list}"; then
-        pr_syslog_stdout "11 of ${COLLECTION_COUNT}: Collecting OpenVSwitch output"
+        pr_syslog_stdout "${step_num} Collecting OpenVSwitch output"
         IFS=:
           for ovscmd in ${ovscmds}; do
             IFS=${ifs_orig} call_run_command "${ovscmd}" "${OUTPUT_FILE_OVS}.out"
@@ -885,25 +920,7 @@ collect_ovs() {
          IFS="${ifs_orig}"
         done
     else
-        pr_syslog_stdout "11 of ${COLLECTION_COUNT}: Collecting OpenVSwitch output skipped"
-    fi
-
-    pr_log_stdout " "
-}
-
-########################################
-collect_domain_xml() {
-    local domain_list
-    local domain
-
-    domain_list=$(virsh list --all --name)
-    if test -n "${domain_list}"; then
-        pr_syslog_stdout "12 of ${COLLECTION_COUNT}: Collecting domain xml files"
-	  for domain in ${domain_list}; do
-	    call_run_command "virsh dumpxml ${domain}" "${OUTPUT_FILE_XML}_${domain}.xml"
-          done
-    else
-        pr_syslog_stdout "12 of ${COLLECTION_COUNT}: Collecting domain xml files skipped"
+        pr_syslog_stdout "${step_num} Collecting OpenVSwitch output skipped"
     fi
 
     pr_log_stdout " "
@@ -917,23 +934,23 @@ collect_docker() {
     # call docker inspect for all containers
     item_list=$(docker ps -qa)
     if test -n "${item_list}"; then
-        pr_syslog_stdout "13a of ${COLLECTION_COUNT}: Collecting docker container output"
+        pr_syslog_stdout "${current_step}a of ${COLLECTION_COUNT}: Collecting docker container output"
         for item in ${item_list}; do
             call_run_command "docker inspect ${item}" "${OUTPUT_FILE_DOCKER}"
         done
     else
-        pr_syslog_stdout "13a of ${COLLECTION_COUNT}: Collecting docker container output skipped"
+        pr_syslog_stdout "${current_step}a of ${COLLECTION_COUNT}: Collecting docker container output skipped"
     fi
 
     # call docker inspect for all networks
     item_list=$(docker network ls -q)
     if test -n "${item_list}"; then
-        pr_syslog_stdout "13b of ${COLLECTION_COUNT}: Collecting docker network output"
+        pr_syslog_stdout "${current_step}b of ${COLLECTION_COUNT}: Collecting docker network output"
         for item in ${item_list}; do
             call_run_command "docker network inspect ${item}" "${OUTPUT_FILE_DOCKER}"
         done
     else
-        pr_syslog_stdout "13b of ${COLLECTION_COUNT}: Collecting docker network output skipped"
+        pr_syslog_stdout "${current_step}b of ${COLLECTION_COUNT}: Collecting docker network output skipped"
     fi
 
     pr_log_stdout " "
@@ -943,7 +960,7 @@ collect_docker() {
 collect_nvme() {
     local NVME
 
-    pr_syslog_stdout "14 of ${COLLECTION_COUNT}: Collecting nvme output"
+    pr_syslog_stdout "${step_num} Collecting nvme output"
     call_run_command "nvme list" "${OUTPUT_FILE_NVME}"
 
     for NVME in /dev/nvme[0-9]*; do
@@ -959,13 +976,49 @@ collect_nvme() {
 }
 
 ########################################
+collect_kvm() {
+    local cmd
+    local ifs_orig
+    local domain_list
+    local domain
+
+    # check if KVM virsh command exists
+    if type virsh >/dev/null 2>&1;
+    then
+        pr_syslog_stdout "${step_num} Collecting KVM data"
+        ifs_orig="${IFS}"
+	IFS=:
+	for cmd in ${KVM_CMDS}; do
+            IFS=${ifs_orig} call_run_command "${cmd}" "${OUTPUT_FILE_KVM}"
+	done
+	IFS="${ifs_orig}"
+
+	# domain/guest specific commands
+        domain_list=$(virsh list --all --name)
+        if test -n "${domain_list}"; then
+	  for domain in ${domain_list}; do
+	    call_run_command "virsh dominfo ${domain}" "${OUTPUT_FILE_KVM}"
+	    call_run_command "virsh domblklist ${domain}" "${OUTPUT_FILE_KVM}"
+	    call_run_command "virsh domstats ${domain}" "${OUTPUT_FILE_KVM}"
+          done
+	else
+	  echo "no KVM doamins found" | tee -a ${OUTPUT_FILE_KVM}
+        fi
+    else
+        pr_syslog_stdout "${step_num} Skip KVM data - no virsh command"
+    fi
+
+    pr_log_stdout " "
+}
+
+########################################
 post_processing() {
     local file_mtime
     local file_mtime_epoche
     local tmp_file
     local file_name
 
-    pr_syslog_stdout "${COLLECTION_COUNT} of ${COLLECTION_COUNT}: Postprocessing"
+    pr_syslog_stdout "${step_num} Postprocessing"
 
     find "${WORKPATH}etc/libvirt/qemu/" -maxdepth 1 -name "*.xml" 2>/dev/null | while IFS= read -r file_name; do
 	file_mtime_epoche=$(stat --format=%Y "${file_name}")
@@ -1141,7 +1194,7 @@ environment_setup()
 create_package()
 {
     local rc_tar
-    pr_stdout "Finalizing: Creating archive with collected data"
+    pr_stdout "${step_num} Finalizing: Creating archive with collected data"
     cd "${WORKDIR_BASE}"
 
     touch "${WORKARCHIVE}"
@@ -1252,41 +1305,16 @@ pr_log_stdout ""
 
 logger -t "${SCRIPTNAME}" "Starting data collection"
 
-collect_cmdsout
-
-collect_vmcmdsout
-
-# Collecting the proc file system (content is specific based on kernel version)
-collect_procfs
-
-# Collecting sysfs in case we run on Kernel 2.4 or newer
-collect_sysfs
-
-collect_logfiles
-
-collect_configfiles
-
-collect_osaoat
-
-collect_ethtool
-
-collect_tc
-
-collect_bridge
-
-collect_ovs
-
-collect_domain_xml
-
-collect_docker
-
-collect_nvme
-
-post_processing
-
-create_package
-
-environment_cleanup
+# step counter
+current_step=1
+# run all collection steps
+for step in ${ALL_STEPS}; do
+  # generate step numbering
+  step_num="${current_step} of ${COLLECTION_COUNT}: "
+  # calling step procedure
+  ${step}
+  current_step=`expr ${current_step} + 1`
+done
 
 logger -t "${SCRIPTNAME}" "Data collection completed"
 
--- s390-tools-service.orig/scripts/dbginfo.sh.1
+++ s390-tools-service/scripts/dbginfo.sh.1
@@ -1,4 +1,4 @@
-.TH DBGINFO.SH  1 "February 2017" "s390-tools"
+.TH DBGINFO.SH  1 "April 2021" "s390-tools"
 
 .SH NAME
 dbginfo.sh \- collect runtime, configuration and trace information
@@ -56,46 +56,46 @@ Copyright IBM Corp. 2002, 2021
 .PP
 Hardware platform     = s390x
 .br
-Kernel version        = <kernel\-version>
+Kernel version        = 5.4.0 (5.4.0-70-generic)
 .br
 Runtime environment   = z/VM
 .PP
-1 of 15: Collecting command output
+1 of 16: Collecting command output
 .PP
-2 of 15: Collecting z/VM command output
+2 of 16: Collecting z/VM command output
 .PP
-3 of 15: Collecting procfs
+3 of 16: Collecting procfs
 .PP
-4 of 15: Collecting sysfs
+4 of 16: Collecting sysfs
 .PP
-5 of 15: Collecting log files
+5 of 16: Collecting log files
 .PP
-6 of 15: Collecting config files
+6 of 16: Collecting config files
 .PP
-7 of 15: Collecting osa oat output skipped \- not available
+7 of 16: Collecting osa oat output
 .PP
-8 of 15: Collecting ethtool output
+8 of 16: Collecting ethtool output
 .PP
-9 of 15: Collecting tc output
-.pp
-10 of 15: Collecting bridge output
-.pp
-11 of 15: Collecting OpenVSwitch output
+9 of 16: Collecting tc output
 .PP
-12 of 15: Collecting domain xml files
+10 of 16: Collecting bridge output
 .PP
-13a of 15: Collecting docker container output
-13b of 15: Collecting docker network output
+11 of 16: Collecting OpenVSwitch output
 .PP
-14 of 15: Collecting nvme output
+12a of 16: Collecting docker container
+12b of 16: Collecting docker network
 .PP
-15 of 15: Postprocessing
+13 of 16: Collecting nvme output
 .PP
-Finalizing: Creating archive with collected data
+14 of 16: Collecting KVM data
+.PP
+15 of 16: Postprocessing
+.PP
+16 of 16: Finalizing: Creating archive with collected data
 .PP
 Collected data was saved to:
 .br
- >>  /data\-collection/DBGINFO\-2019\-08\-19\-21\-39\-16\-host\-012345.tgz  <<
+ >>  /data\-collection/DBGINFO\-2021\-04\-20\-14\-00\-07\-host\-012345.tgz  <<
 .br
 Review the collected data before sending to your service organization.
 .SH HINTS
openSUSE Build Service is sponsored by