File s390-tools-sles15sp3-01-dbginfo.sh-add-kvm-data-collection.patch of Package s390-tools.27266
Subject: [PATCH] [BZ 195579] dbginfo.sh: Add KVM commands and rework data collection
From: Joern Siglen <siglen@de.ibm.com>
Description: dbginfo.sh: stabilzation of data collection
Symptom: o script hangup possible on single commands
o missing data collection on error
o overwrite of buffers by diag commands
Problem: hangup and lost of data collection
Solution: rework code regarding
o add timeout function
o remove probelmatic commands
o include handling improvements
Reproduction: -
Upstream-ID: b7807d019514cd6ddf790d047170bbc709d8815d
Problem-ID: 195579
Upstream-Description:
dbginfo.sh: Add KVM commands and rework data collection
Add virsh commands for KVM debug data collection and rework the domain
data collection and step numbering. Update the man page accordingly.
[hoeppner@linux.ibm.com: Reword commit message]
Signed-off-by: Joern Siglen <siglen@de.ibm.com>
Signed-off-by: Jan Hoeppner <hoeppner@linux.ibm.com>
Signed-off-by: Joern Siglen <siglen@de.ibm.com>
--- s390-tools-service.orig/scripts/dbginfo.sh
+++ s390-tools-service/scripts/dbginfo.sh
@@ -35,7 +35,7 @@ print_usage()
cat <<EOF
-Usage: ${SCRIPTNAME} [OPTIONS]
+Usage: ${SCRIPTNAME} [OPTION]
This script collects runtime, configuration and trace information on
a Linux on IBM Z installation for debugging purposes.
@@ -65,16 +65,9 @@ Please report bugs to: linux390@de.ibm.c
EOF
}
-######################################
-# Verification to run as root
-#
-if test "$(/usr/bin/id -u 2>/dev/null)" -ne 0; then
- echo "${SCRIPTNAME}: Error: You must be user root to run \"${SCRIPTNAME}\"!"
- exit 1
-fi
#######################################
-# Parsing the command line
+# Parsing the command line and pre checks
#
paramWORKDIR_BASE="/tmp/"
@@ -90,7 +83,14 @@ while [ ${#} -gt 0 ]; do
;;
--directory|-d)
paramWORKDIR_BASE=${2}
- shift
+ if test -z "${paramWORKDIR_BASE}"; then
+ echo "${SCRIPTNAME}: Error: No directory specified for data collection!"
+ echo
+ exit 1
+ else
+ # jump to next param, if already last the final shift can do termination
+ shift
+ fi
;;
-*|--*|*)
echo
@@ -100,20 +100,23 @@ while [ ${#} -gt 0 ]; do
exit 1
;;
esac
+ # next parameter
shift
done
-if test -z "${paramWORKDIR_BASE}"; then
- echo "${SCRIPTNAME}: Error: No directory specified for data collection!"
- echo
- exit 1
-fi
+# check for a valid path
if test ! -d "${paramWORKDIR_BASE}"; then
echo "${SCRIPTNAME}: Error: The specified directory \"${paramWORKDIR_BASE}\" does not exist!"
echo
exit 1
fi
+# finally verification to run as root
+if test "$(/usr/bin/id -u 2>/dev/null)" -ne 0; then
+ echo "${SCRIPTNAME}: Error: You must be user root to run \"${SCRIPTNAME}\"!"
+ exit 1
+fi
+
########################################
# Global used variables
@@ -188,21 +191,18 @@ readonly OUTPUT_FILE_JOURNALCTL="${WORKP
# File that includes the output of OpenVSwitch
readonly OUTPUT_FILE_OVS="${WORKPATH}openvswitch"
-# File that includes the KVM domain xml file
-readonly OUTPUT_FILE_XML="${WORKPATH}domain_xml"
-
# File that includes the docker inspect output
readonly OUTPUT_FILE_DOCKER="${WORKPATH}docker_inspect.out"
# File that includes nvme related information
readonly OUTPUT_FILE_NVME="${WORKPATH}nvme.out"
+# File that includes KVM related information
+readonly OUTPUT_FILE_KVM="${WORKPATH}kvm_runtime.out"
+
# Mount point of the debug file system
readonly MOUNT_POINT_DEBUGFS="/sys/kernel/debug"
-# The amount of steps running the whole collections
-readonly COLLECTION_COUNT=15
-
# The kernel version (e.g. '2' from 2.6.32 or '3' from 3.2.1)
readonly KERNEL_VERSION=$(uname -r 2>/dev/null | cut -d'.' -f1)
@@ -236,6 +236,29 @@ else
readonly RUNTIME_ENVIRONMENT="LPAR"
fi
+# define order of collection steps
+ALL_STEPS="\
+ collect_cmdsout\
+ collect_vmcmdsout\
+ collect_procfs\
+ collect_sysfs\
+ collect_logfiles\
+ collect_configfiles\
+ collect_osaoat\
+ collect_ethtool\
+ collect_tc\
+ collect_bridge\
+ collect_ovs\
+ collect_docker\
+ collect_nvme\
+ collect_kvm\
+ post_processing\
+ create_package\
+ environment_cleanup\
+ "
+
+# The amount of steps running the whole collections, without last cleanup
+readonly COLLECTION_COUNT=`expr $(echo ${ALL_STEPS} | wc -w) - 1`
########################################
@@ -319,6 +342,7 @@ LOGFILES="\
/var/log/IBMtape.trace\
/var/log/IBMtape.errorlog\
/var/log/libvirt\
+ /sys/module/kvm/parameters\
/var/log/lin_tape.trace\
/var/log/lin_tape.errorlog\
/var/log/messages*\
@@ -387,7 +411,6 @@ CONFIGFILES="\
"
########################################
-
CMDS="uname -a\
:uptime\
:runlevel\
@@ -442,7 +465,7 @@ CMDS="uname -a\
:lsdasd -u\
:ziorep_config -ADM\
:lsmod\
- :lsdev\
+ :lszdev\
:lsscsi\
:lstape\
:lszfcp\
@@ -490,7 +513,6 @@ CMDS="uname -a\
"
########################################
-
VM_CMDS="q userid\
:q users\
:q privclass\
@@ -556,8 +578,21 @@ VM_CMDS="q userid\
:ind load\
:ind sp\
:ind user\
+ :qemu-ga -V\
"
###############################################################################
+KVM_CMDS="virsh version\
+ :virsh nodeinfo\
+ :virsh nodememstats\
+ :virsh nodecpustats\
+ :virsh list --all\
+ :virsh iface-list\
+ :virsh net-list\
+ :virsh nwfilter-list\
+ :virsh nodedev-list --tree\
+ :virsh pool-list\
+ :virt-host-validate\
+ "
########################################
collect_cmdsout() {
@@ -565,7 +600,7 @@ collect_cmdsout() {
local ifs_orig
ifs_orig="${IFS}"
- pr_syslog_stdout "1 of ${COLLECTION_COUNT}: Collecting command output"
+ pr_syslog_stdout "${step_num} Collecting command output"
IFS=:
for cmd in ${CMDS}; do
@@ -597,7 +632,7 @@ collect_vmcmdsout() {
ifs_orig="${IFS}"
if echo "${RUNTIME_ENVIRONMENT}" | grep -qi "z/VM" >/dev/null 2>&1; then
- pr_syslog_stdout "2 of ${COLLECTION_COUNT}: Collecting z/VM command output"
+ pr_syslog_stdout "${step_num} Collecting z/VM command output"
if which vmcp >/dev/null 2>&1; then
cp_command="vmcp"
@@ -642,7 +677,7 @@ collect_vmcmdsout() {
rmmod vmcp
fi
else
- pr_syslog_stdout "2 of ${COLLECTION_COUNT}: Collecting z/VM command output skipped - no z/VM environment"
+ pr_syslog_stdout "${step_num} Collecting z/VM command output skipped - no z/VM environment"
fi
pr_log_stdout " "
@@ -653,7 +688,7 @@ collect_vmcmdsout() {
collect_procfs() {
local file_name
- pr_syslog_stdout "3 of ${COLLECTION_COUNT}: Collecting procfs"
+ pr_syslog_stdout "${step_num} Collecting procfs"
for file_name in ${PROCFILES}; do
call_collect_file "${file_name}"
@@ -672,7 +707,7 @@ collect_sysfs() {
debugfs_mounted=0
# Requires kernel version newer then 2.4
if test "${LINUX_SUPPORT_SYSFS}" -eq 0; then
- pr_syslog_stdout "4 of ${COLLECTION_COUNT}: Collecting sysfs"
+ pr_syslog_stdout "${step_num} Collecting sysfs"
# Requires kernel version of 2.6.13 or newer
if test "${LINUX_SUPPORT_SYSFSDBF}" -eq 0; then
if ! grep -qE "${MOUNT_POINT_DEBUGFS}.*debugfs" /proc/mounts 2>/dev/null; then
@@ -713,7 +748,7 @@ collect_sysfs() {
umount "${MOUNT_POINT_DEBUGFS}"
fi
else
- pr_syslog_stdout "4 of ${COLLECTION_COUNT}: Collecting sysfs skipped. Kernel $(uname -r) must be newer than 2.4"
+ pr_syslog_stdout "${step_num} Collecting sysfs skipped. Kernel $(uname -r) must be newer than 2.4"
fi
pr_log_stdout " "
@@ -724,7 +759,7 @@ collect_sysfs() {
collect_logfiles() {
local file_name
- pr_syslog_stdout "5 of ${COLLECTION_COUNT}: Collecting log files"
+ pr_syslog_stdout "${step_num} Collecting log files"
for file_name in ${LOGFILES}; do
call_collect_file "${file_name}"
@@ -738,7 +773,7 @@ collect_logfiles() {
collect_configfiles() {
local file_name
- pr_syslog_stdout "6 of ${COLLECTION_COUNT}: Collecting config files"
+ pr_syslog_stdout "${step_num} Collecting config files"
for file_name in ${CONFIGFILES}; do
call_collect_file "${file_name}"
@@ -757,16 +792,16 @@ collect_osaoat() {
| sed 's/.*:[[:space:]]\+\([^[:space:]]*\)[[:space:]]\+/\1/g')
if which qethqoat >/dev/null 2>&1; then
if test -n "${network_devices}"; then
- pr_syslog_stdout "7 of ${COLLECTION_COUNT}: Collecting osa oat output"
+ pr_syslog_stdout "${step_num} Collecting osa oat output"
for network_device in ${network_devices}; do
call_run_command "qethqoat ${network_device}" "${OUTPUT_FILE_OSAOAT}.out" &&
call_run_command "qethqoat -r ${network_device}" "${OUTPUT_FILE_OSAOAT}_${network_device}.raw"
done
else
- pr_syslog_stdout "7 of ${COLLECTION_COUNT}: Collecting osa oat output skipped - no devices"
+ pr_syslog_stdout "${step_num} Collecting osa oat output skipped - no devices"
fi
else
- pr_syslog_stdout "7 of ${COLLECTION_COUNT}: Collecting osa oat output skipped - not available"
+ pr_syslog_stdout "${step_num} Collecting osa oat output skipped - not available"
fi
pr_log_stdout " "
@@ -780,7 +815,7 @@ collect_ethtool() {
network_devices=$(ls /sys/class/net 2>/dev/null)
if which ethtool >/dev/null 2>&1; then
if test -n "${network_devices}"; then
- pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting ethtool output"
+ pr_syslog_stdout "${step_num} Collecting ethtool output"
for network_device in ${network_devices}; do
call_run_command "ethtool ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
call_run_command "ethtool -k ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
@@ -795,10 +830,10 @@ collect_ethtool() {
call_run_command "ethtool -T ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
done
else
- pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting ethtool output skipped - no devices"
+ pr_syslog_stdout "${step_num} Collecting ethtool output skipped - no devices"
fi
else
- pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting ethtool output skipped - not available"
+ pr_syslog_stdout "${step_num} Collecting ethtool output skipped - not available"
fi
pr_log_stdout " "
@@ -812,15 +847,15 @@ collect_tc() {
network_devices=$(ls /sys/class/net 2>/dev/null)
if which tc >/dev/null 2>&1; then
if test -n "${network_devices}"; then
- pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting tc output"
+ pr_syslog_stdout "${step_num} Collecting tc output"
for network_device in ${network_devices}; do
call_run_command "tc -s qdisc show dev ${network_device}" "${OUTPUT_FILE_TC}"
done
else
- pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting tc output skipped - no devices"
+ pr_syslog_stdout "${step_num} Collecting tc output skipped - no devices"
fi
else
- pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting tc output skipped - not available"
+ pr_syslog_stdout "${step_num} Collecting tc output skipped - not available"
fi
pr_log_stdout " "
@@ -834,17 +869,17 @@ collect_bridge() {
network_devices=$(ls /sys/class/net 2>/dev/null)
if which bridge >/dev/null 2>&1; then
if test -n "${network_devices}"; then
- pr_syslog_stdout "10 of ${COLLECTION_COUNT}: Collecting bridge output"
+ pr_syslog_stdout "${step_num} Collecting bridge output"
for network_device in ${network_devices}; do
call_run_command "bridge -d link show dev ${network_device}" "${OUTPUT_FILE_BRIDGE}"
call_run_command "bridge -s fdb show dev ${network_device}" "${OUTPUT_FILE_BRIDGE}"
call_run_command "bridge -d mdb show dev ${network_device}" "${OUTPUT_FILE_BRIDGE}"
done
else
- pr_syslog_stdout "10 of ${COLLECTION_COUNT}: Collecting bridge output skipped - no devices"
+ pr_syslog_stdout "${step_num} Collecting bridge output skipped - no devices"
fi
else
- pr_syslog_stdout "10 of ${COLLECTION_COUNT}: Collecting bridge output skipped - not available"
+ pr_syslog_stdout "${step_num} Collecting bridge output skipped - not available"
fi
pr_log_stdout " "
@@ -866,7 +901,7 @@ collect_ovs() {
:ovsdb-client dump\
"
if test -n "${br_list}"; then
- pr_syslog_stdout "11 of ${COLLECTION_COUNT}: Collecting OpenVSwitch output"
+ pr_syslog_stdout "${step_num} Collecting OpenVSwitch output"
IFS=:
for ovscmd in ${ovscmds}; do
IFS=${ifs_orig} call_run_command "${ovscmd}" "${OUTPUT_FILE_OVS}.out"
@@ -885,25 +920,7 @@ collect_ovs() {
IFS="${ifs_orig}"
done
else
- pr_syslog_stdout "11 of ${COLLECTION_COUNT}: Collecting OpenVSwitch output skipped"
- fi
-
- pr_log_stdout " "
-}
-
-########################################
-collect_domain_xml() {
- local domain_list
- local domain
-
- domain_list=$(virsh list --all --name)
- if test -n "${domain_list}"; then
- pr_syslog_stdout "12 of ${COLLECTION_COUNT}: Collecting domain xml files"
- for domain in ${domain_list}; do
- call_run_command "virsh dumpxml ${domain}" "${OUTPUT_FILE_XML}_${domain}.xml"
- done
- else
- pr_syslog_stdout "12 of ${COLLECTION_COUNT}: Collecting domain xml files skipped"
+ pr_syslog_stdout "${step_num} Collecting OpenVSwitch output skipped"
fi
pr_log_stdout " "
@@ -917,23 +934,23 @@ collect_docker() {
# call docker inspect for all containers
item_list=$(docker ps -qa)
if test -n "${item_list}"; then
- pr_syslog_stdout "13a of ${COLLECTION_COUNT}: Collecting docker container output"
+ pr_syslog_stdout "${current_step}a of ${COLLECTION_COUNT}: Collecting docker container output"
for item in ${item_list}; do
call_run_command "docker inspect ${item}" "${OUTPUT_FILE_DOCKER}"
done
else
- pr_syslog_stdout "13a of ${COLLECTION_COUNT}: Collecting docker container output skipped"
+ pr_syslog_stdout "${current_step}a of ${COLLECTION_COUNT}: Collecting docker container output skipped"
fi
# call docker inspect for all networks
item_list=$(docker network ls -q)
if test -n "${item_list}"; then
- pr_syslog_stdout "13b of ${COLLECTION_COUNT}: Collecting docker network output"
+ pr_syslog_stdout "${current_step}b of ${COLLECTION_COUNT}: Collecting docker network output"
for item in ${item_list}; do
call_run_command "docker network inspect ${item}" "${OUTPUT_FILE_DOCKER}"
done
else
- pr_syslog_stdout "13b of ${COLLECTION_COUNT}: Collecting docker network output skipped"
+ pr_syslog_stdout "${current_step}b of ${COLLECTION_COUNT}: Collecting docker network output skipped"
fi
pr_log_stdout " "
@@ -943,7 +960,7 @@ collect_docker() {
collect_nvme() {
local NVME
- pr_syslog_stdout "14 of ${COLLECTION_COUNT}: Collecting nvme output"
+ pr_syslog_stdout "${step_num} Collecting nvme output"
call_run_command "nvme list" "${OUTPUT_FILE_NVME}"
for NVME in /dev/nvme[0-9]*; do
@@ -959,13 +976,49 @@ collect_nvme() {
}
########################################
+collect_kvm() {
+ local cmd
+ local ifs_orig
+ local domain_list
+ local domain
+
+ # check if KVM virsh command exists
+ if type virsh >/dev/null 2>&1;
+ then
+ pr_syslog_stdout "${step_num} Collecting KVM data"
+ ifs_orig="${IFS}"
+ IFS=:
+ for cmd in ${KVM_CMDS}; do
+ IFS=${ifs_orig} call_run_command "${cmd}" "${OUTPUT_FILE_KVM}"
+ done
+ IFS="${ifs_orig}"
+
+ # domain/guest specific commands
+ domain_list=$(virsh list --all --name)
+ if test -n "${domain_list}"; then
+ for domain in ${domain_list}; do
+ call_run_command "virsh dominfo ${domain}" "${OUTPUT_FILE_KVM}"
+ call_run_command "virsh domblklist ${domain}" "${OUTPUT_FILE_KVM}"
+ call_run_command "virsh domstats ${domain}" "${OUTPUT_FILE_KVM}"
+ done
+ else
+ echo "no KVM doamins found" | tee -a ${OUTPUT_FILE_KVM}
+ fi
+ else
+ pr_syslog_stdout "${step_num} Skip KVM data - no virsh command"
+ fi
+
+ pr_log_stdout " "
+}
+
+########################################
post_processing() {
local file_mtime
local file_mtime_epoche
local tmp_file
local file_name
- pr_syslog_stdout "${COLLECTION_COUNT} of ${COLLECTION_COUNT}: Postprocessing"
+ pr_syslog_stdout "${step_num} Postprocessing"
find "${WORKPATH}etc/libvirt/qemu/" -maxdepth 1 -name "*.xml" 2>/dev/null | while IFS= read -r file_name; do
file_mtime_epoche=$(stat --format=%Y "${file_name}")
@@ -1141,7 +1194,7 @@ environment_setup()
create_package()
{
local rc_tar
- pr_stdout "Finalizing: Creating archive with collected data"
+ pr_stdout "${step_num} Finalizing: Creating archive with collected data"
cd "${WORKDIR_BASE}"
touch "${WORKARCHIVE}"
@@ -1252,41 +1305,16 @@ pr_log_stdout ""
logger -t "${SCRIPTNAME}" "Starting data collection"
-collect_cmdsout
-
-collect_vmcmdsout
-
-# Collecting the proc file system (content is specific based on kernel version)
-collect_procfs
-
-# Collecting sysfs in case we run on Kernel 2.4 or newer
-collect_sysfs
-
-collect_logfiles
-
-collect_configfiles
-
-collect_osaoat
-
-collect_ethtool
-
-collect_tc
-
-collect_bridge
-
-collect_ovs
-
-collect_domain_xml
-
-collect_docker
-
-collect_nvme
-
-post_processing
-
-create_package
-
-environment_cleanup
+# step counter
+current_step=1
+# run all collection steps
+for step in ${ALL_STEPS}; do
+ # generate step numbering
+ step_num="${current_step} of ${COLLECTION_COUNT}: "
+ # calling step procedure
+ ${step}
+ current_step=`expr ${current_step} + 1`
+done
logger -t "${SCRIPTNAME}" "Data collection completed"
--- s390-tools-service.orig/scripts/dbginfo.sh.1
+++ s390-tools-service/scripts/dbginfo.sh.1
@@ -1,4 +1,4 @@
-.TH DBGINFO.SH 1 "February 2017" "s390-tools"
+.TH DBGINFO.SH 1 "April 2021" "s390-tools"
.SH NAME
dbginfo.sh \- collect runtime, configuration and trace information
@@ -56,46 +56,46 @@ Copyright IBM Corp. 2002, 2021
.PP
Hardware platform = s390x
.br
-Kernel version = <kernel\-version>
+Kernel version = 5.4.0 (5.4.0-70-generic)
.br
Runtime environment = z/VM
.PP
-1 of 15: Collecting command output
+1 of 16: Collecting command output
.PP
-2 of 15: Collecting z/VM command output
+2 of 16: Collecting z/VM command output
.PP
-3 of 15: Collecting procfs
+3 of 16: Collecting procfs
.PP
-4 of 15: Collecting sysfs
+4 of 16: Collecting sysfs
.PP
-5 of 15: Collecting log files
+5 of 16: Collecting log files
.PP
-6 of 15: Collecting config files
+6 of 16: Collecting config files
.PP
-7 of 15: Collecting osa oat output skipped \- not available
+7 of 16: Collecting osa oat output
.PP
-8 of 15: Collecting ethtool output
+8 of 16: Collecting ethtool output
.PP
-9 of 15: Collecting tc output
-.pp
-10 of 15: Collecting bridge output
-.pp
-11 of 15: Collecting OpenVSwitch output
+9 of 16: Collecting tc output
.PP
-12 of 15: Collecting domain xml files
+10 of 16: Collecting bridge output
.PP
-13a of 15: Collecting docker container output
-13b of 15: Collecting docker network output
+11 of 16: Collecting OpenVSwitch output
.PP
-14 of 15: Collecting nvme output
+12a of 16: Collecting docker container
+12b of 16: Collecting docker network
.PP
-15 of 15: Postprocessing
+13 of 16: Collecting nvme output
.PP
-Finalizing: Creating archive with collected data
+14 of 16: Collecting KVM data
+.PP
+15 of 16: Postprocessing
+.PP
+16 of 16: Finalizing: Creating archive with collected data
.PP
Collected data was saved to:
.br
- >> /data\-collection/DBGINFO\-2019\-08\-19\-21\-39\-16\-host\-012345.tgz <<
+ >> /data\-collection/DBGINFO\-2021\-04\-20\-14\-00\-07\-host\-012345.tgz <<
.br
Review the collected data before sending to your service organization.
.SH HINTS