File s390-tools-sles15sp1-dbginfo-extend-data-collection.patch of Package s390-tools.14411

Subject: [PATCH] [BZ 184025] dbginfo.sh: Extend data collection
From: Sa Liu <saliu@de.ibm.com>

Description:  dbginfo.sh: Extend data collection 
Symptom:      This update covers various symptoms on dbginfo.sh data
              collection: 
              - There is no data collection for ethtool, tc and bridge
                output for debugging network issues
              - There is no data collection for hyptop output
              - There is no data collection for nvme devices
              - There is no data collection for smc devices
              - Sometimes the lsof output is too long which makes the 
                runtime.out hard to be opened
              - The journalctl was limited to 50000 lines
Problem:      Following problems exist:
              - Missing data collection for ethtool, tc and bridge  
              - Missing data collection for hyptop output
              - Missing data collection for nvme devices
              - Missing data collection for smc devices
              - Sometimes the lsof output is too long which makes the
                runtime.out hard to be opened
              - The journalctl was limited to 50000 lines. A very long
                journalctl will overwrite the beginning part
Solution:     - Extend the data collection to collect output for 
                ethtool, tc, bridge and hyptop output
              - Extend the data collection to collect nvme and smc 
                device information
              - Write the output of lsof to a separate file
              - Extend journalctl to 100000 lines
Reproduction: Run this script and verify the output 
Upstream-ID:   -
Problem-ID:    184025

Signed-off-by: Sa Liu <saliu@de.ibm.com>
---
 scripts/dbginfo.sh |  173 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 143 insertions(+), 30 deletions(-)

--- a/scripts/dbginfo.sh
+++ b/scripts/dbginfo.sh
@@ -2,7 +2,7 @@
 #
 # dbginfo.sh - Tool to collect runtime, configuration, and trace information
 #
-# Copyright IBM Corp. 2002, 2017
+# Copyright IBM Corp. 2002, 2020
 #
 # s390-tools is free software; you can redistribute it and/or modify
 # it under the terms of the MIT license. See LICENSE for details.
@@ -21,7 +21,7 @@ readonly SCRIPTNAME="${0##*/}"
 print_version() {
     cat <<EOF
 ${SCRIPTNAME}: Debug information script version %S390_TOOLS_VERSION%
-Copyright IBM Corp. 2002, 2017
+Copyright IBM Corp. 2002, 2020
 EOF
 }
 
@@ -167,9 +167,21 @@ readonly OUTPUT_FILE_VMCMD="${WORKPATH}z
 # File that includes content of files from sysfs
 readonly OUTPUT_FILE_SYSFS="${WORKPATH}sysfsfiles.out"
 
+# File that includes the output of lsof
+readonly OUTPUT_FILE_LSOF="${WORKPATH}open_files.out"
+
 # File that includes content of OSA OAT
 readonly OUTPUT_FILE_OSAOAT="${WORKPATH}osa_oat"
 
+# File that includes content of Ethtool commands
+readonly OUTPUT_FILE_ETHTOOL="${WORKPATH}ethtool.out"
+
+# File that includes content of tc commands
+readonly OUTPUT_FILE_TC="${WORKPATH}tc.out"
+
+# File that includes content of bridge commands
+readonly OUTPUT_FILE_BRIDGE="${WORKPATH}bridge.out"
+
 # File that includes the output of journalctl
 readonly OUTPUT_FILE_JOURNALCTL="${WORKPATH}journalctl.out"
 
@@ -189,7 +201,7 @@ readonly OUTPUT_FILE_NVME="${WORKPATH}nv
 readonly MOUNT_POINT_DEBUGFS="/sys/kernel/debug"
 
 # The amount of steps running the whole collections
-readonly COLLECTION_COUNT=12
+readonly COLLECTION_COUNT=15
 
 # The kernel version (e.g. '2' from 2.6.32 or '3' from 3.2.1)
 readonly KERNEL_VERSION=$(uname -r 2>/dev/null | cut -d'.' -f1)
@@ -312,7 +324,7 @@ LOGFILES="\
   /var/log/yum.log\
   /var/log/openvswitch/ovs-vswitchd.log\
   /var/log/openvswitch/ovsdb-server.log\
-  /var/run/docker/libcontainerd/containerd/events.log\
+  /run/docker/libcontainerd/containerd/events.log\
   /run/containerd/events.log\
   "
 
@@ -367,7 +379,7 @@ CONFIGFILES="\
   $(find /lib/modules -name modules.dep 2>/dev/null)\
   /etc/docker\
   /lib/systemd/system/docker.service\
-  /usr/lib/systemd/system/docker.service\
+  /usr/lib/systemd/system\
   /etc/apparmor.d\
   "
 
@@ -394,6 +406,7 @@ CMDS="uname -a\
   :ip link show\
   :ip ntable\
   :ip a sh\
+  :ip -s -s link\
   :firewall-cmd --list-all\
   :ipcs -a\
   :netstat -pantu\
@@ -441,7 +454,8 @@ CMDS="uname -a\
   :SPident\
   :rpm -qa | sort\
   :sysctl -a\
-  :lsof\
+  :lsof \
+   > '${OUTPUT_FILE_LSOF}'\
   :mount\
   :df -h\
   :df -i\
@@ -451,7 +465,7 @@ CMDS="uname -a\
   :java -version\
   :cat /root/.bash_history\
   :env\
-  :journalctl --all --no-pager --since=$(date -d '5 day ago' +%Y-%m-%d) --until=now --lines=50000 \
+  :journalctl --all --no-pager --lines=100000 --output=short-precise\
    > '${OUTPUT_FILE_JOURNALCTL}'\
   :openssl engine\
   :systemd-delta\
@@ -465,6 +479,10 @@ CMDS="uname -a\
   :docker version\
   :docker stats --no-stream\
   :systemctl status docker.service\
+  :blockdev --report\
+  :lvdisplay\
+  :lspci -vv\
+  :smc_dbg\
   "
 
 ########################################
@@ -480,6 +498,7 @@ VM_CMDS="q userid\
   :q cpus\
   :q srm\
   :q vtod\
+  :q time full\
   :q timezone\
   :q loaddev\
   :q v osa\
@@ -548,6 +567,11 @@ collect_cmdsout() {
     done
     IFS="${ifs_orig}"
 
+    if echo "${RUNTIME_ENVIRONMENT}" | grep -qi "z/VM" >/dev/null 2>&1; then
+        call_run_command "hyptop -b -d 1 -n 5 -f \#,c,m,C:s,M:s,o -S c" "${OUTPUT_FILE_CMD}"
+    else call_run_command "hyptop -b -d 1 -n 5 -f \#,T,c,e,m,C:s,E:s,M:s,o -S c" "${OUTPUT_FILE_CMD}"
+    fi
+
     pr_log_stdout " "
 }
 
@@ -743,6 +767,83 @@ collect_osaoat() {
 }
 
 ########################################
+collect_ethtool() {
+    local network_devices
+    local network_device
+
+    network_devices=$(ls /sys/class/net 2>/dev/null)
+    if which ethtool >/dev/null 2>&1; then
+	if test -n "${network_devices}"; then
+	    pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting ethtool output"
+	    for network_device in ${network_devices}; do
+		call_run_command "ethtool ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+		call_run_command "ethtool -k ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+		call_run_command "ethtool -a ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+		call_run_command "ethtool -c ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+		call_run_command "ethtool -g ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+		call_run_command "ethtool -i ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+		call_run_command "ethtool -l ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+		call_run_command "ethtool -P ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+		call_run_command "ethtool -S ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+		call_run_command "ethtool -T ${network_device}" "${OUTPUT_FILE_ETHTOOL}"
+	    done
+	else
+	    pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting ethtool output skipped - no devices"
+	fi
+    else
+	pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting ethtool output skipped - not available"
+    fi
+
+    pr_log_stdout " "
+}
+
+########################################
+collect_tc() {
+    local network_devices
+    local network_device
+
+    network_devices=$(ls /sys/class/net 2>/dev/null)
+    if which tc >/dev/null 2>&1; then
+	if test -n "${network_devices}"; then
+	    pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting tc output"
+	    for network_device in ${network_devices}; do
+		call_run_command "tc -s qdisc show dev ${network_device}" "${OUTPUT_FILE_TC}"
+	    done
+	else
+	    pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting tc output skipped - no devices"
+	fi
+    else
+	pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting tc output skipped - not available"
+    fi
+
+    pr_log_stdout " "
+}
+
+########################################
+collect_bridge() {
+    local network_devices
+    local network_device
+
+    network_devices=$(ls /sys/class/net 2>/dev/null)
+    if which bridge >/dev/null 2>&1; then
+	if test -n "${network_devices}"; then
+	    pr_syslog_stdout "10 of ${COLLECTION_COUNT}: Collecting bridge output"
+	    for network_device in ${network_devices}; do
+		call_run_command "bridge -d link show dev ${network_device}" "${OUTPUT_FILE_BRIDGE}"
+		call_run_command "bridge -s fdb show dev ${network_device}" "${OUTPUT_FILE_BRIDGE}"
+		call_run_command "bridge -d mdb show dev ${network_device}" "${OUTPUT_FILE_BRIDGE}"
+	    done
+	else
+	    pr_syslog_stdout "10 of ${COLLECTION_COUNT}: Collecting bridge output skipped - no devices"
+	fi
+    else
+	pr_syslog_stdout "10 of ${COLLECTION_COUNT}: Collecting bridge output skipped - not available"
+    fi
+
+    pr_log_stdout " "
+}
+
+########################################
 # OpenVSwitch
 collect_ovs() {
     local br_list
@@ -758,7 +859,7 @@ collect_ovs() {
             :ovsdb-client dump\
             "
     if test -n "${br_list}"; then
-        pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting OpenVSwitch output"
+        pr_syslog_stdout "11 of ${COLLECTION_COUNT}: Collecting OpenVSwitch output"
         IFS=:
           for ovscmd in ${ovscmds}; do
             IFS=${ifs_orig} call_run_command "${ovscmd}" "${OUTPUT_FILE_OVS}.out"
@@ -777,7 +878,7 @@ collect_ovs() {
          IFS="${ifs_orig}"
         done
     else
-        pr_syslog_stdout "8 of ${COLLECTION_COUNT}: Collecting OpenVSwitch output skipped"
+        pr_syslog_stdout "11 of ${COLLECTION_COUNT}: Collecting OpenVSwitch output skipped"
     fi
 
     pr_log_stdout " "
@@ -790,12 +891,12 @@ collect_domain_xml() {
 
     domain_list=$(virsh list --all --name)
     if test -n "${domain_list}"; then
-        pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting domain xml files"
+        pr_syslog_stdout "12 of ${COLLECTION_COUNT}: Collecting domain xml files"
 	  for domain in ${domain_list}; do
 	    call_run_command "virsh dumpxml ${domain}" "${OUTPUT_FILE_XML}_${domain}.xml"
           done
     else
-        pr_syslog_stdout "9 of ${COLLECTION_COUNT}: Collecting domain xml files skipped"
+        pr_syslog_stdout "12 of ${COLLECTION_COUNT}: Collecting domain xml files skipped"
     fi
 
     pr_log_stdout " "
@@ -809,23 +910,23 @@ collect_docker() {
     # call docker inspect for all containers
     item_list=$(docker ps -qa)
     if test -n "${item_list}"; then
-        pr_syslog_stdout "10a of ${COLLECTION_COUNT}: Collecting docker container output"
+        pr_syslog_stdout "13a of ${COLLECTION_COUNT}: Collecting docker container output"
         for item in ${item_list}; do
             call_run_command "docker inspect ${item}" "${OUTPUT_FILE_DOCKER}"
         done
     else
-        pr_syslog_stdout "10a of ${COLLECTION_COUNT}: Collecting docker container output skipped"
+        pr_syslog_stdout "13a of ${COLLECTION_COUNT}: Collecting docker container output skipped"
     fi
 
     # call docker inspect for all networks
     item_list=$(docker network ls -q)
     if test -n "${item_list}"; then
-        pr_syslog_stdout "10b of ${COLLECTION_COUNT}: Collecting docker network output"
+        pr_syslog_stdout "13b of ${COLLECTION_COUNT}: Collecting docker network output"
         for item in ${item_list}; do
             call_run_command "docker network inspect ${item}" "${OUTPUT_FILE_DOCKER}"
         done
     else
-        pr_syslog_stdout "10b of ${COLLECTION_COUNT}: Collecting docker network output skipped"
+        pr_syslog_stdout "13b of ${COLLECTION_COUNT}: Collecting docker network output skipped"
     fi
 
     pr_log_stdout " "
@@ -835,7 +936,7 @@ collect_docker() {
 collect_nvme() {
     local NVME
 
-    pr_syslog_stdout "11 of ${COLLECTION_COUNT}: Collecting nvme output"
+    pr_syslog_stdout "14 of ${COLLECTION_COUNT}: Collecting nvme output"
     call_run_command "nvme list" "${OUTPUT_FILE_NVME}"
 
     for NVME in /dev/nvme[0-9]*; do
@@ -1016,25 +1117,31 @@ environment_setup()
 # create gzip-ped tar file
 create_package()
 {
+    local rc_tar
     pr_stdout "Finalizing: Creating archive with collected data"
     cd "${WORKDIR_BASE}"
 
-    if ! tar -czf "${WORKARCHIVE}" "${WORKDIR_CURRENT}"; then
-	pr_stdout " "
-	pr_stdout "${SCRIPTNAME}: Error: Collection of data failed!"
-	pr_stdout "       The creation of \"${WORKARCHIVE}\" was not successful."
-	pr_stdout "       Please check the directory \"${WORKDIR_BASE}\""
-	pr_stdout "       to provide enough free available space."
+    tar -czf "${WORKARCHIVE}" "${WORKDIR_CURRENT}"
+    rc_tar=$?
+    if [ $rc_tar -eq 0 ]; then
+        chmod 0600 "${WORKARCHIVE}"
+        pr_stdout " "
+        pr_stdout "Collected data was saved to:"
+        pr_stdout " >>  ${WORKARCHIVE}  <<"
+        pr_stdout " "
+        pr_stdout "Review the collected data before sending to your service organization. "
+        pr_stdout " "
+    elif [ $rc_tar -eq 127 ]; then
+        pr_stdout " "
+        pr_stdout "${SCRIPTNAME}: Error: tar command is not available!"
+        pr_stdout "     Please install the corresponding package!"
     else
-	chmod 0600 "${WORKARCHIVE}"
-	pr_stdout " "
-	pr_stdout "Collected data was saved to:"
-	pr_stdout " >>  ${WORKARCHIVE}  <<"
+        pr_stdout " "
+        pr_stdout "${SCRIPTNAME}: Error: Collection of data failed!"
+        pr_stdout "       The creation of \"${WORKARCHIVE}\" was not successful."
+        pr_stdout "       Please check the directory \"${WORKDIR_BASE}\""
+        pr_stdout "       to provide enough free available space."
     fi
-
-    pr_stdout " "
-    pr_stdout "Review the collected data before sending to your service organization. "
-    pr_stdout " "
 }
 
 
@@ -1136,6 +1243,12 @@ collect_configfiles
 
 collect_osaoat
 
+collect_ethtool
+
+collect_tc
+
+collect_bridge
+
 collect_ovs
 
 collect_domain_xml
openSUSE Build Service is sponsored by