File nvidia-driver of Package nv-driver-container

#! /bin/bash

set -eu
#set -x

RUN_DIR=${RUN_DIR:-/run/nvidia}
PID_FILE=${RUN_DIR}/${0##*/}.pid
DRIVER_VERSION=${DRIVER_VERSION:?"Missing driver version"}
DRIVER_BRANCH=${DRIVER_VERSION%%.*}
DRIVER_PACKAGE=${DRIVER_PACKAGE:?"Missing driver version"}
KERNEL_UPDATE_HOOK=/run/kernel/postinst.d/update-nvidia-driver
NVIDIA_MODULE_PARAMS=()
NVIDIA_UVM_MODULE_PARAMS=()
NVIDIA_MODESET_MODULE_PARAMS=()

_update_package_cache() {
    local bci
    echo "Updating the package cache..."
    if [ "$FLAVOR" == "azure" ]; then
	# consumed by container-suseconnect when calling `zypper refresh`
	export ADDITIONAL_MODULES="sle-module-public-cloud"
    fi
    if [ -n "${CUDA_REPO_FINGERPRINT}" ]; then
	filename=${CUDA_REPO_FINGERPRINT: -8:8}.pub
	eval $(grep baseurl /etc/zypp/repos.d/CUDA.repo)
	if ! curl -fsS  ${baseurl}/$filename > /tmp/$filename; then
	    echo "FATAL: CUDA Repo Key not found!"
	    exit 1;
	fi
	fpr=$(gpg --with-colons --import-options show-only --import --fingerprint < /tmp/${filename} | grep "^fpr:" | cut -d: -f 10)
	if [ "$fpr" != "$CUDA_REPO_FINGERPRINT" ]; then
	    echo "FATAL: CUDA Repo Key Fingerprint does not match known one!"
	    exit 1
	fi
	if [ -x /usr/bin/rpmkeys ]; then
	    /usr/bin/rpmkeys --import /tmp/$filename
	else
	    rpm --import /tmp/$filename
	fi
	if [ $? -ne 0 ]; then
	    echo "FATAL: Import of CUDA Repo Key failed!"
	    exit 1
	fi
    else
	echo "FATAL: Fingerprint of CUDA Repo Key unknown!"
	exit 1
    fi
    # Make sure to not mix native repos with BCI repos - if both exist
    repos=$(zypper lr | cut -d'|' -f2)
    if grep -q container-suseconnect-zypp <<< $repos; then
	bci=$(grep BCI <<< $repos) && zypper rr $bci
    fi
    if ! zypper --non-interactive refresh CUDA || ! zypper refresh; then
        echo "FATAL: failed to reach SUSE package repositories. "\
             "Ensure that the cluster can access the proper networks."
        exit 1
    fi
}

_cleanup_package_cache() {
    echo "Cleaning up the package cache..."
    zypper clean
}

_get_kernel_flavor() {
    FLAVOR="$(echo ${KERNEL_VERSION} | cut -d- -f3)"
    [ -n "$FLAVOR" ] || { FLAVOR=default; KERNEL_VERSION+=-${FLAVOR}; }
}

_install_driver_package() {
    # Since we don't have a suitable flavored provides we need to dig out the version string by hand
    local f_kernel=$(uname -r | cut -d- -f3)
    local driver_repo_arg=
    declare -a v_strings result
    local v_string found=false
    [ "$FLAVOR" == "$f_kernel" ] || \
        { echo "[FATAL] Running kernel flavor ${f_kernel} doesn't match specified"; exit 1; }
    if [ -n "$DRIVER_REPO" ]; then
        zypper --gpg-auto-import-keys -n ar $DRIVER_REPO driver
        zypper --gpg-auto-import-keys -n refresh driver
    fi
    v_strings=($(zypper -x se ${DRIVER_REPO:+-r driver} -s -t package --match-exact ${DRIVER_PACKAGE}-${FLAVOR} | \
        grep "solvable "| \
        grep ${DRIVER_VERSION} | \
        sed -e 's/.*edition="\([^"]*\).*/\1/g;s/-.*//' | sort -r))
    [ ${#v_strings[@]} -ne 0 ] || \
	{ echo "[FATAL] no driver package version ${DRIVER_VERSION} found."; exit 1; }
    zypper --non-interactive refresh
    # defeat package supplements: make sure driver is not pulled in implicitly by HW dependies
    touch /tmp/modaliases
    # should we allow downgrade as well - ie. set --oldpackage?
    ZYPP_MODALIAS_SYSFS=/tmp/modaliases zypper  --non-interactive install --no-recommends -y \
                       kernel-${FLAVOR} = ${KERNEL_VERSION%-*} || \
	{ echo "[FATAL] No kernel package \"${FLAVOR}\" version ${KERNEL_VERSION%-*} found" >&2; exit 1; }
    for v_string in ${v_strings[@]}; do
	OFS=$IFS
	IFS='
'
	result=($(zypper -xn in -D ${DRIVER_REPO:+--from driver} -y ${DRIVER_PACKAGE}-${FLAVOR} = ${v_string}))
	IFS=$OFS
	[ $? -eq 104 ] && continue
	# Find if this package attempts to install a kernel. If it does, the running kernel does not
	# satisfy the driver API. Thus, skip to the next version.
	for i in ${!result[@]}; do
	    grep "solvable " <<< ${result[$i]} | grep -q "name=\"kernel-${FLAVOR}\"" && continue 2;
	done
	found=true; break;
    done
    $found || { echo "[FATAL]: No matching driver version ${DRIVER_VERSION} found for kernel $(uname -r)"; exit 1; }
    zypper --non-interactive install ${DRIVER_REPO:+--from driver} -y ${DRIVER_PACKAGE}-${FLAVOR} = ${v_string}
    zypper --non-interactive install -y nvidia-compute-utils-G06 = ${DRIVER_VERSION} \
           libnvidia-nscq = ${DRIVER_VERSION} \
           nvidia-fabric-manager = ${DRIVER_VERSION}
}

_assert_nvswitch_system() {
    [ -d /proc/driver/nvidia-nvswitch/devices ] || return 1
    if [ -z "$(ls -A /proc/driver/nvidia-nvswitch/devices)" ]; then
        return 1
    fi
    return 0
}

# For each kernel module configuration file mounted into the container,
# parse the file contents and extract the custom module parameters that
# are to be passed as input to 'modprobe'.
#
# Assumptions:
# - Configuration files are named <module-name>.conf (i.e. nvidia.conf, nvidia-uvm.conf).
# - Configuration files are mounted inside the container at /drivers.
# - Each line in the file contains at least one parameter, where parameters on the same line
#   are space delimited. It is up to the user to properly format the file to ensure
#   the correct set of parameters are passed to 'modprobe'.
_get_module_params() {
    local base_path="/drivers"
    # nvidia
    if [ -f "${base_path}/nvidia.conf" ]; then
       while IFS="" read -r param || [ -n "$param" ]; do
           NVIDIA_MODULE_PARAMS+=("$param")
       done <"${base_path}/nvidia.conf"
       echo "Module parameters provided for nvidia: ${NVIDIA_MODULE_PARAMS[@]}"
    fi
    # nvidia-uvm
    if [ -f "${base_path}/nvidia-uvm.conf" ]; then
       while IFS="" read -r param || [ -n "$param" ]; do
           NVIDIA_UVM_MODULE_PARAMS+=("$param")
       done <"${base_path}/nvidia-uvm.conf"
       echo "Module parameters provided for nvidia-uvm: ${NVIDIA_UVM_MODULE_PARAMS[@]}"
    fi
    # nvidia-modeset
    if [ -f "${base_path}/nvidia-modeset.conf" ]; then
       while IFS="" read -r param || [ -n "$param" ]; do
           NVIDIA_MODESET_MODULE_PARAMS+=("$param")
       done <"${base_path}/nvidia-modeset.conf"
       echo "Module parameters provided for nvidia-modeset: ${NVIDIA_MODESET_MODULE_PARAMS[@]}"
    fi
}

# Load the kernel modules and start persistenced.
_load_driver() {
    local nv_fw_search_path="$RUN_DIR/driver/lib/firmware"
    local set_fw_path="true"
    local fw_path_config_file="/sys/module/firmware_class/parameters/path"
    for param in "${NVIDIA_MODULE_PARAMS[@]}"; do
        if [[ "$param" == "NVreg_EnableGpuFirmware=0" ]]; then
          set_fw_path="false"
        fi
    done

    if [[ "$set_fw_path" == "true" ]]; then
        echo "Configuring the following firmware search path in '$fw_path_config_file': $nv_fw_search_path"
        if [[ ! -z $(grep '[^[:space:]]' $fw_path_config_file) ]]; then
            echo "WARNING: A search path is already configured in $fw_path_config_file"
            echo "         Retaining the current configuration"
        else
            echo -n "$nv_fw_search_path" > $fw_path_config_file || echo "WARNING: Failed to configure the firmware search path"
        fi
    fi

    echo "Parsing kernel module parameters..."
    _get_module_params

    echo "Loading NVIDIA driver kernel modules..."
    # set -o xtrace +o nounset
    modprobe nvidia "${NVIDIA_MODULE_PARAMS[@]}"
    modprobe nvidia-uvm "${NVIDIA_UVM_MODULE_PARAMS[@]}"
    modprobe nvidia-modeset "${NVIDIA_MODESET_MODULE_PARAMS[@]}"
    # set +o xtrace -o nounset

    echo "Starting NVIDIA persistence daemon..."
    command nvidia-persistenced && nvidia-persistenced --persistence-mode

    # TODO: handle vgpu driver

    if command nv-fabricmanager && _assert_nvswitch_system; then
        echo "Starting NVIDIA fabric manager daemon..."
        nv-fabricmanager -c /usr/share/nvidia/nvswitch/fabricmanager.cfg
    fi
    return 0
}

# Stop persistenced and unload the kernel modules if they are currently loaded.
_unload_driver() {
    local rmmod_args=()
    local nvidia_deps=0
    local nvidia_modeset_deps=0
    local nvidia_refs=0
    local nvidia_uvm_refs=0
    local nvidia_modeset_refs=0
    local nvidia_drm_refs=0

    echo "Stopping NVIDIA persistence daemon..."
    if [ -f /var/run/nvidia-persistenced/nvidia-persistenced.pid ]; then
        local pid=$(< /var/run/nvidia-persistenced/nvidia-persistenced.pid)

        kill -0 "${pid}" 2> /dev/null && kill -SIGTERM "${pid}"
        for i in $(seq 1 50); do
            kill -0 "${pid}" 2> /dev/null || break
            sleep 0.1
        done
        if [ $i -eq 50 ]; then
            echo "Could not stop NVIDIA persistence daemon" >&2
            return 1
        fi
    fi

    if [ -f /var/run/nvidia-gridd/nvidia-gridd.pid ]; then
        echo "Stopping NVIDIA grid daemon..."
        local pid=$(< /var/run/nvidia-gridd/nvidia-gridd.pid)

        kill -0 "${pid}" 2> /dev/null && kill -SIGTERM "${pid}"
        for i in $(seq 1 10); do
            kill -0 "${pid}" 2> /dev/null || break
            sleep 0.1
        done
        if [ $i -eq 10 ]; then
            echo "Could not stop NVIDIA Grid daemon" >&2
            return 1
        fi
    fi

    if [ -f /var/run/nvidia-fabricmanager/nv-fabricmanager.pid ]; then
        echo "Stopping NVIDIA fabric manager daemon..."
        local pid=$(< /var/run/nvidia-fabricmanager/nv-fabricmanager.pid)

        kill -0 "${pid}" 2> /dev/null && kill -SIGTERM "${pid}"
        for i in $(seq 1 50); do
            kill -0 "${pid}" 2> /dev/null || break
            sleep 0.1
        done
        if [ $i -eq 50 ]; then
            echo "Could not stop NVIDIA fabric manager daemon" >&2
            return 1
        fi
    fi

    echo "Unloading NVIDIA driver kernel modules..."
    if [ -f /sys/module/nvidia_drm/refcnt ]; then
        nvidia_drm_refs=$(< /sys/module/nvidia_drm/refcnt)
        rmmod_args+=("nvidia-drm")
        ((++nvidia_modeset_deps))
    fi
    if [ -f /sys/module/nvidia_modeset/refcnt ]; then
        nvidia_modeset_refs=$(< /sys/module/nvidia_modeset/refcnt)
        rmmod_args+=("nvidia-modeset")
        ((++nvidia_deps))
    fi
    if [ -f /sys/module/nvidia_uvm/refcnt ]; then
        nvidia_uvm_refs=$(< /sys/module/nvidia_uvm/refcnt)
        rmmod_args+=("nvidia-uvm")
        ((++nvidia_deps))
    fi
    if [ -f /sys/module/nvidia/refcnt ]; then
        nvidia_refs=$(< /sys/module/nvidia/refcnt)
        rmmod_args+=("nvidia")
    fi
    if [ ${nvidia_refs} -gt ${nvidia_deps} ] || [ ${nvidia_uvm_refs} -gt 0 ] || [ ${nvidia_modeset_refs} -gt ${nvidia_modeset_deps} ] || [ ${nvidia_drm_refs} -gt 0 ]; then
        echo "Could not unload NVIDIA driver kernel modules, driver is in use" >&2
        return 1
    fi

    if [ ${#rmmod_args[@]} -gt 0 ]; then
        rmmod ${rmmod_args[@]}
    fi
    return 0
}

# Mount the driver rootfs into the run directory with the exception of sysfs.
_mount_rootfs() {
    echo "Mounting NVIDIA driver rootfs..."
    mount --make-runbindable /sys
    mount --make-private /sys
    mkdir -p ${RUN_DIR}/driver
    mount --rbind / ${RUN_DIR}/driver

    echo "Check SELinux status"
    if [ -e /sys/fs/selinux ]; then
        echo "SELinux is enabled"
        echo "Change device files security context for selinux compatibility"
        chcon -R -t container_file_t ${RUN_DIR}/driver/dev
    else
        echo "SELinux is disabled, skipping..."
    fi
}

# Unmount the driver rootfs from the run directory.
_unmount_rootfs() {
    echo "Unmounting NVIDIA driver rootfs..."
    if findmnt -r -o TARGET | grep "${RUN_DIR}/driver/" > /dev/null; then
        umount -l -R ${RUN_DIR}/driver
    fi
}

# Write a kernel postinst.d script to automatically precompile packages on kernel update (similar to DKMS).
_write_kernel_update_hook() {
    if [ ! -d ${KERNEL_UPDATE_HOOK%/*} ]; then
        return
    fi

    echo "Writing kernel update hook..."
    cat > ${KERNEL_UPDATE_HOOK} <<'EOF'
#!/bin/bash

set -eu
trap 'echo "ERROR: Failed to update the NVIDIA driver" >&2; exit 0' ERR

NVIDIA_DRIVER_PID=$(< ${RUN_DIR}/nvidia-driver.pid)

export "$(grep -z DRIVER_VERSION /proc/${NVIDIA_DRIVER_PID}/environ)"
nsenter -t "${NVIDIA_DRIVER_PID}" -m -- nvidia-driver update --kernel "$1"
EOF
    chmod +x ${KERNEL_UPDATE_HOOK}
}

_shutdown() {
    if _unload_driver; then
        _unmount_rootfs
        rm -f ${PID_FILE} ${KERNEL_UPDATE_HOOK}
        return 0
    fi
    return 1
}

_prepare_exclusive() {
    # TODO: handle vgpu
    echo -e "Starting installation of NVIDIA driver version ${DRIVER_VERSION} for Linux kernel version ${KERNEL_VERSION}\n"

    exec 3> ${PID_FILE}
    if ! flock -n 3; then
        echo "An instance of the NVIDIA driver is already running, aborting"
        exit 1
    fi
    echo $$ >&3

    trap "echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM
    trap "_shutdown" EXIT

    _unload_driver || exit 1
    _unmount_rootfs
}

_install() {
    # Install dependencies
    _update_package_cache

    # Build the driver
    _install_driver_package
    _cleanup_package_cache
}

_load() {
    _mount_rootfs
    _load_driver || exit 1
    _write_kernel_update_hook

    echo "Done, now waiting for signal"
    sleep infinity &
    trap "echo 'Caught signal'; _shutdown && { kill $!; exit 0; }" HUP INT QUIT PIPE TERM
    trap - EXIT
    while true; do wait $! || continue; done
    exit 0
}

load() {
    _prepare_exclusive

    _load
}

init() {
    _prepare_exclusive

    _install

    _load
}

build() {
    exit 0
}

update() {
    exit 0
}

usage() {
    echo "$0 <command> [-a|--accept-license][-k|--kernel <kernel_version>]" \
    	 "[-m|--max-threads n][-s|--sign <arg>][-t|--tag <arg>]"
    echo "command: init|build|load|update"
    echo "All arguments except -k are ignored"
    exit $1
}

if [ $# -eq 0 ]; then
    usage 0
fi
command=$1; shift
case "${command}" in
    init) options=$(getopt -l accept-license,max-threads: -o am: -- "$@") ;;
    build) options=$(getopt -l accept-license,tag:,max-threads: -o a:t:m: -- "$@") ;;
    load) options="" ;;
    update) options=$(getopt -l kernel:,sign:,tag:,max-threads: -o k:s:t:m: -- "$@") ;;
    *) usage 1;;
esac
if [ $? -ne 0 ]; then
    usage 1
fi
eval set -- "${options}"

KERNEL_VERSION=$(uname -r)

for opt in ${options}; do
    case "$opt" in
    -a | --accept-license) shift 1 ;;
    -k | --kernel) KERNEL_VERSION=$2; shift 2 ;;
    -m | --max-threads) shift 2 ;;
    -s | --sign) shift 2 ;;
    -t | --tag) shift 2 ;;
    --) shift; break ;;
    esac
done
# Set flavor _after_ `--kernel` argument takes effect
_get_kernel_flavor

if [ $# -ne 0 ]; then
    usage 1
fi

$command
openSUSE Build Service is sponsored by