File s390-tools-sles15sp1-dbginfo-extend-data-collection.patch of Package s390-tools.14411
Subject: [PATCH] [BZ 184025] dbginfo.sh: Extend data collection
From: Sa Liu <saliu@de.ibm.com>
Description: dbginfo.sh: Extend data collection
Symptom: This update covers various symptoms on dbginfo.sh data
collection:
- There is no data collection for ethtool, tc and bridge
output for debugging network issues
- There is no data collection for hyptop output
- There is no data collection for nvme devices
- There is no data collection for smc devices
- Sometimes the lsof output is too long which makes the
runtime.out hard to be opened
- The journalctl was limited to 50000 lines
Problem: Following problems exist:
- Missing data collection for ethtool, tc and bridge
- Missing data collection for hyptop output
- Missing data collection for nvme devices
- Missing data collection for smc devices
- Sometimes the lsof output is too long which makes the
runtime.out hard to be opened
- The journalctl was limited to 50000 lines. A very long
journalctl will overwrite the beginning part
Solution: - Extend the data collection to collect output for
ethtool, tc, bridge and hyptop output
- Extend the data collection to collect nvme and smc
device information
- Write the output of lsof to a separate file
- Extend journalctl to 100000 lines
Reproduction: Run this script and verify the output
Upstream-ID: -
Problem-ID: 184025
Signed-off-by: Sa Liu <saliu@de.ibm.com>
---
scripts/dbginfo.sh | 173 +++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 143 insertions(+), 30 deletions(-)
--- a/scripts/dbginfo.sh
+++ b/scripts/dbginfo.sh
@@ -2,7 +2,7 @@
#
# dbginfo.sh - Tool to collect runtime, configuration, and trace information
#
-# Copyright IBM Corp. 2002, 2017
+# Copyright IBM Corp. 2002, 2020
#
# s390-tools is free software; you can redistribute it and/or modify
# it under the terms of the MIT license. See LICENSE for details.
@@ -21,7 +21,7 @@ readonly SCRIPTNAME="${0##*/}"
print_version() {
cat <<EOF
${SCRIPTNAME}: Debug information script version %S390_TOOLS_VERSION%
-Copyright IBM Corp. 2002, 2017
+Copyright IBM Corp. 2002, 2020
EOF
}
@@ -167,9 +167,21 @@ readonly OUTPUT_FILE_VMCMD="${WORKPATH}z
# File that includes content of files from sysfs
readonly OUTPUT_FILE_SYSFS="${WORKPATH}sysfsfiles.out"
+# File that includes the output of lsof
+readonly OUTPUT_FILE_LSOF="${WORKPATH}open_files.out"
+
# File that includes content of OSA OAT
readonly OUTPUT_FILE_OSAOAT="${WORKPATH}osa_oat"
+# File that includes content of Ethtool commands
+readonly OUTPUT_FILE_ETHTOOL="${WORKPATH}ethtool.out"
+
+# File that includes content of tc commands
+readonly OUTPUT_FILE_TC="${WORKPATH}tc.out"
+
+# File that includes content of bridge commands
+readonly OUTPUT_FILE_BRIDGE="${WORKPATH}bridge.out"
+
# File that includes the output of journalctl
readonly OUTPUT_FILE_JOURNALCTL="${WORKPATH}journalctl.out"
@@ -189,7 +201,7 @@ readonly OUTPUT_FILE_NVME="${WORKPATH}nv
readonly MOUNT_POINT_DEBUGFS="/sys/kernel/debug"
# The amount of steps running the whole collections
-readonly COLLECTION_COUNT=12
+readonly COLLECTION_COUNT=15
# The kernel version (e.g. '2' from 2.6.32 or '3' from 3.2.1)
readonly KERNEL_VERSION=$(uname -r 2>/dev/null | cut -d'.' -f1)
@@ -312,7 +324,7 @@ LOGFILES="\
/var/log/yum.log\
/var/log/openvswitch/ovs-vswitchd.log\
/var/log/openvswitch/ovsdb-server.log\
- /var/run/docker/libcontainerd/containerd/events.log\
+ /run/docker/libcontainerd/containerd/events.log\
/run/containerd/events.log\
"
@@ -367,7 +379,7 @@ CONFIGFILES="\
$(find /lib/modules -name modules.dep 2>/dev/null)\
/etc/docker\
/lib/systemd/system/docker.service\
- /usr/lib/systemd/system/docker.service\
+ /usr/lib/systemd/system\
/etc/apparmor.d\
"
@@ -394,6 +406,7 @@ CMDS="uname -a\
:ip link show\
:ip ntable\
:ip a sh\
+ :ip -s -s link\
:firewall-cmd --list-all\
:ipcs -a\
:netstat -pantu\
@@ -441,7 +454,8 @@ CMDS="uname -a\
:SPident\
:rpm -qa | sort\
:sysctl -a\
- :lsof\
+ :lsof \
+ > '${OUTPUT_FILE_LSOF}'\
:mount\
:df -h\
:df -i\
@@ -451,7 +465,7 @@ CMDS="uname -a\
:java -version\
:cat /root/.bash_history\
:env\
- :journalctl --all --no-pager --since=$(date -d '5 day ago' +%Y-%m-%d) --until=now --lines=50000 \
+ :journalctl --all --no-pager --lines=100000 --output=short-precise\
> '${OUTPUT_FILE_JOURNALCTL}'\
:openssl engine\
:systemd-delta\
@@ -465,6 +479,10 @@ CMDS="uname -a\
:docker version\
:docker stats --no-stream\
:systemctl status docker.service\
+ :blockdev --report\
+ :lvdisplay\
+ :lspci -vv\
+ :smc_dbg\
"
########################################
@@ -480,6 +498,7 @@ VM_CMDS="q userid\
:q cpus\
:q srm\
:q vtod\
+ :q time full\
:q timezone\
:q loaddev\
:q v osa\
@@ -548,6 +567,11 @@ collect_cmdsout() {
done
IFS="${ifs_orig}"
+ if echo "${RUNTIME_ENVIRONMENT}" | grep -qi "z/VM" >/dev/null 2>&1; then
+ call_run_command "hyptop -b -d 1 -n 5 -f \#,c,m,C:s,M:s,o -S c" "${OUTPUT_FILE_CMD}"
+ else call_run_command "hyptop -b -d 1 -n 5 -f \#,T,c,e,m,C:s,E:s,M:s,o -S c" "${OUTPUT_FILE_CMD}"
+ fi
+
pr_log_stdout " "
}
@@ -743,6 +767,83 @@ collect_osaoat() {
}
########################################
+collect_ethtool() {
+ local network_devices
+ local network_device
+
+ network_devices=$(ls /sys/class/net 2>/dev/null)
+ if which ethtool >/dev/null 2>&1; then
+ if test -n "${network_devices}"; then
+ pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting ethtool output"
+ for network_device in ${network_devices}; do
+ call_run_command "ethtool ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+ call_run_command "ethtool -k ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+ call_run_command "ethtool -a ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+ call_run_command "ethtool -c ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+ call_run_command "ethtool -g ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+ call_run_command "ethtool -i ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+ call_run_command "ethtool -l ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+ call_run_command "ethtool -P ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+ call_run_command "ethtool -S ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+ call_run_command "ethtool -T ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+ done
+ else
+ pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting ethtool output skipped - no devices"
+ fi
+ else
+ pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting ethtool output skipped - not available"
+ fi
+
+ pr_log_stdout " "
+}
+
+########################################
+collect_tc() {
+ local network_devices
+ local network_device
+
+ network_devices=$(ls /sys/class/net 2>/dev/null)
+ if which tc >/dev/null 2>&1; then
+ if test -n "${network_devices}"; then
+ pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting tc output"
+ for network_device in ${network_devices}; do
+ call_run_command "tc -s qdisc show dev ${network_device}" "${OUTPUT_FILE_TC}"
+ done
+ else
+ pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting tc output skipped - no devices"
+ fi
+ else
+ pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting tc output skipped - not available"
+ fi
+
+ pr_log_stdout " "
+}
+
+########################################
+collect_bridge() {
+ local network_devices
+ local network_device
+
+ network_devices=$(ls /sys/class/net 2>/dev/null)
+ if which bridge >/dev/null 2>&1; then
+ if test -n "${network_devices}"; then
+ pr_syslog_stdout "10 of ${COLLECTION_COUNT}: Collecting bridge output"
+ for network_device in ${network_devices}; do
+ call_run_command "bridge -d link show dev ${network_device}" "${OUTPUT_FILE_BRIDGE}"
+ call_run_command "bridge -s fdb show dev ${network_device}" "${OUTPUT_FILE_BRIDGE}"
+ call_run_command "bridge -d mdb show dev ${network_device}" "${OUTPUT_FILE_BRIDGE}"
+ done
+ else
+ pr_syslog_stdout "10 of ${COLLECTION_COUNT}: Collecting bridge output skipped - no devices"
+ fi
+ else
+ pr_syslog_stdout "10 of ${COLLECTION_COUNT}: Collecting bridge output skipped - not available"
+ fi
+
+ pr_log_stdout " "
+}
+
+########################################
# OpenVSwitch
collect_ovs() {
local br_list
@@ -758,7 +859,7 @@ collect_ovs() {
:ovsdb-client dump\
"
if test -n "${br_list}"; then
- pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting OpenVSwitch output"
+ pr_syslog_stdout "11 of ${COLLECTION_COUNT}: Collecting OpenVSwitch output"
IFS=:
for ovscmd in ${ovscmds}; do
IFS=${ifs_orig} call_run_command "${ovscmd}" "${OUTPUT_FILE_OVS}.out"
@@ -777,7 +878,7 @@ collect_ovs() {
IFS="${ifs_orig}"
done
else
- pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting OpenVSwitch output skipped"
+ pr_syslog_stdout "11 of ${COLLECTION_COUNT}: Collecting OpenVSwitch output skipped"
fi
pr_log_stdout " "
@@ -790,12 +891,12 @@ collect_domain_xml() {
domain_list=$(virsh list --all --name)
if test -n "${domain_list}"; then
- pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting domain xml files"
+ pr_syslog_stdout "12 of ${COLLECTION_COUNT}: Collecting domain xml files"
for domain in ${domain_list}; do
call_run_command "virsh dumpxml ${domain}" "${OUTPUT_FILE_XML}_${domain}.xml"
done
else
- pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting domain xml files skipped"
+ pr_syslog_stdout "12 of ${COLLECTION_COUNT}: Collecting domain xml files skipped"
fi
pr_log_stdout " "
@@ -809,23 +910,23 @@ collect_docker() {
# call docker inspect for all containers
item_list=$(docker ps -qa)
if test -n "${item_list}"; then
- pr_syslog_stdout "10a of ${COLLECTION_COUNT}: Collecting docker container output"
+ pr_syslog_stdout "13a of ${COLLECTION_COUNT}: Collecting docker container output"
for item in ${item_list}; do
call_run_command "docker inspect ${item}" "${OUTPUT_FILE_DOCKER}"
done
else
- pr_syslog_stdout "10a of ${COLLECTION_COUNT}: Collecting docker container output skipped"
+ pr_syslog_stdout "13a of ${COLLECTION_COUNT}: Collecting docker container output skipped"
fi
# call docker inspect for all networks
item_list=$(docker network ls -q)
if test -n "${item_list}"; then
- pr_syslog_stdout "10b of ${COLLECTION_COUNT}: Collecting docker network output"
+ pr_syslog_stdout "13b of ${COLLECTION_COUNT}: Collecting docker network output"
for item in ${item_list}; do
call_run_command "docker network inspect ${item}" "${OUTPUT_FILE_DOCKER}"
done
else
- pr_syslog_stdout "10b of ${COLLECTION_COUNT}: Collecting docker network output skipped"
+ pr_syslog_stdout "13b of ${COLLECTION_COUNT}: Collecting docker network output skipped"
fi
pr_log_stdout " "
@@ -835,7 +936,7 @@ collect_docker() {
collect_nvme() {
local NVME
- pr_syslog_stdout "11 of ${COLLECTION_COUNT}: Collecting nvme output"
+ pr_syslog_stdout "14 of ${COLLECTION_COUNT}: Collecting nvme output"
call_run_command "nvme list" "${OUTPUT_FILE_NVME}"
for NVME in /dev/nvme[0-9]*; do
@@ -1016,25 +1117,31 @@ environment_setup()
# create gzip-ped tar file
create_package()
{
+ local rc_tar
pr_stdout "Finalizing: Creating archive with collected data"
cd "${WORKDIR_BASE}"
- if ! tar -czf "${WORKARCHIVE}" "${WORKDIR_CURRENT}"; then
- pr_stdout " "
- pr_stdout "${SCRIPTNAME}: Error: Collection of data failed!"
- pr_stdout " The creation of \"${WORKARCHIVE}\" was not successful."
- pr_stdout " Please check the directory \"${WORKDIR_BASE}\""
- pr_stdout " to provide enough free available space."
+ tar -czf "${WORKARCHIVE}" "${WORKDIR_CURRENT}"
+ rc_tar=$?
+ if [ $rc_tar -eq 0 ]; then
+ chmod 0600 "${WORKARCHIVE}"
+ pr_stdout " "
+ pr_stdout "Collected data was saved to:"
+ pr_stdout " >> ${WORKARCHIVE} <<"
+ pr_stdout " "
+ pr_stdout "Review the collected data before sending to your service organization. "
+ pr_stdout " "
+ elif [ $rc_tar -eq 127 ]; then
+ pr_stdout " "
+ pr_stdout "${SCRIPTNAME}: Error: tar command is not available!"
+ pr_stdout " Please install the corresponding package!"
else
- chmod 0600 "${WORKARCHIVE}"
- pr_stdout " "
- pr_stdout "Collected data was saved to:"
- pr_stdout " >> ${WORKARCHIVE} <<"
+ pr_stdout " "
+ pr_stdout "${SCRIPTNAME}: Error: Collection of data failed!"
+ pr_stdout " The creation of \"${WORKARCHIVE}\" was not successful."
+ pr_stdout " Please check the directory \"${WORKDIR_BASE}\""
+ pr_stdout " to provide enough free available space."
fi
-
- pr_stdout " "
- pr_stdout "Review the collected data before sending to your service organization. "
- pr_stdout " "
}
@@ -1136,6 +1243,12 @@ collect_configfiles
collect_osaoat
+collect_ethtool
+
+collect_tc
+
+collect_bridge
+
collect_ovs
collect_domain_xml