File 0002-pci-passthrough-add-VFIO-implementation.patch of Package virtualbox
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Martin Messer <martin.messer@cyberus-technology.de>
Date: Tue, 5 Oct 2021 13:57:43 +0000
Subject: [PATCH] pci-passthrough: add VFIO implementation
The VFIO device is added an can be configured via VBoxManage to
pass-through arbitrary PCI devices from the host system. The user is
responsible to prepare the host PCI device so it can be used by the VFIO
framework.
vfio: fix VBoxManage vfio controls
license: vfio
---
include/VBox/log.h | 3 +
include/VBox/pci.h | 2 +
include/VBox/settings.h | 18 +
include/svp/pci.h | 444 +++++++++
src/VBox/Devices/Bus/DevVfio.cpp | 154 +++
src/VBox/Devices/Bus/DevVfio.h | 412 ++++++++
src/VBox/Devices/Bus/VfioDevice.cpp | 912 ++++++++++++++++++
src/VBox/Devices/Makefile.kmk | 6 +
src/VBox/Devices/build/VBoxDD.cpp | 3 +
src/VBox/Devices/build/VBoxDD.h | 1 +
.../Frontends/VBoxManage/VBoxManageInfo.cpp | 24 +
.../VBoxManage/VBoxManageModifyVM.cpp | 15 +
src/VBox/Main/idl/VirtualBox.xidl | 24 +
src/VBox/Main/include/ConsoleImpl.h | 1 +
src/VBox/Main/include/MachineImpl.h | 5 +
.../Main/src-client/BusAssignmentManager.cpp | 15 +-
.../src-client/ConsoleImplConfigCommon.cpp | 1 -
src/VBox/Main/src-server/MachineImpl.cpp | 91 ++
src/VBox/Main/xml/Settings.cpp | 57 ++
src/VBox/Runtime/VBox/log-vbox.cpp | 1 +
20 files changed, 2181 insertions(+), 8 deletions(-)
create mode 100644 vboxsrc/include/svp/pci.h
create mode 100644 vboxsrc/src/VBox/Devices/Bus/DevVfio.cpp
create mode 100644 vboxsrc/src/VBox/Devices/Bus/DevVfio.h
create mode 100644 vboxsrc/src/VBox/Devices/Bus/VfioDevice.cpp
diff --git a/include/VBox/log.h b/include/VBox/log.h
index 5a4193dbcc..7d434c208b 100644
--- a/include/VBox/log.h
+++ b/include/VBox/log.h
@@ -182,6 +182,8 @@ typedef enum VBOXLOGGROUP
LOG_GROUP_DEV_SMC,
/** Trusted Platform Module Device group. */
LOG_GROUP_DEV_TPM,
+ /** Vfio Device group. */
+ LOG_GROUP_DEV_VFIO,
/** VGA Device group. */
LOG_GROUP_DEV_VGA,
/** Virtio PCI Device group. */
@@ -930,6 +932,7 @@ typedef enum VBOXLOGGROUP
"DEV_SERIAL", \
"DEV_SMC", \
"DEV_TPM", \
+ "DEV_VFIO", \
"DEV_VGA", \
"DEV_VIRTIO", \
"DEV_VIRTIO_NET", \
diff --git a/include/VBox/pci.h b/include/VBox/pci.h
index 7a51a32e94..7d6dd54e81 100644
--- a/include/VBox/pci.h
+++ b/include/VBox/pci.h
@@ -631,6 +631,8 @@ typedef enum PCIADDRTYPE
#define VBOX_PCI_ROM_SLOT 6
/** Max number of I/O regions. */
#define VBOX_PCI_NUM_REGIONS 7
+/** Max Number of PCI BARs */
+#define VBOX_PCI_MAX_BARS 6
#define PCI_ROM_SLOT VBOX_PCI_ROM_SLOT /**< deprecated */
#define PCI_NUM_REGIONS VBOX_PCI_NUM_REGIONS /**< deprecated */
diff --git a/include/VBox/settings.h b/include/VBox/settings.h
index cd6cbb9d04..e91d03c66b 100644
--- a/include/VBox/settings.h
+++ b/include/VBox/settings.h
@@ -1126,6 +1126,22 @@ struct HostPCIDeviceAttachment
typedef std::list<HostPCIDeviceAttachment> HostPCIDeviceAttachmentList;
+/**
+ * NOTE: If you add any fields in here, you must update a) the constructor and b)
+ * the operator== which is used by MachineConfigFile::operator==(), or otherwise
+ * your settings might never get saved.
+ */
+struct VFIODeviceAttachment
+{
+ VFIODeviceAttachment();
+
+ bool operator==(const VFIODeviceAttachment &a) const;
+
+ com::Utf8Str strDevicePath;
+};
+
+typedef std::vector<VFIODeviceAttachment> VFIODeviceAttachmentList;
+
/**
* A device attached to a storage controller. This can either be a
* hard disk or a DVD drive or a floppy drive and also specifies
@@ -1390,6 +1406,8 @@ struct Hardware
IOSettings ioSettings; // requires settings version 1.10 (VirtualBox 3.2)
HostPCIDeviceAttachmentList pciAttachments; // requires settings version 1.12 (VirtualBox 4.1)
+ VFIODeviceAttachmentList vfioAttachments; // requires settings version 1.17 (VirtualBox 6.0)
+
com::Utf8Str strDefaultFrontend; // requires settings version 1.14 (VirtualBox 4.3)
};
diff --git a/include/svp/pci.h b/include/svp/pci.h
new file mode 100644
index 0000000000..9a020b6803
--- /dev/null
+++ b/include/svp/pci.h
@@ -0,0 +1,444 @@
+#pragma once
+
+#include <VBox/pci.h>
+#include <VBox/vmm/pdmdev.h>
+#include <VBox/vmm/pdmpcidev.h>
+
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <optional>
+#include <type_traits>
+
+typedef struct PCIBarRegion
+{
+ static_assert(std::is_same<IOMMMIOHANDLE, IOMIOPORTHANDLE>::value,
+ "IOMMMIOHANDLE and IOMIOPORTHANDLE have different types now please extend this struct for the "
+ "support of both!");
+ IOMMMIOHANDLE hRegion;
+ uint8_t iRegion; ///< The bar index e.G Bar0.
+ uint64_t offset; ///< The bar offset into the vfio device.
+ uint64_t size; ///< The size of the bar.
+ RTGCPHYS address; ///< Base address of the bar.
+} PCIBARREGION;
+
+typedef PCIBARREGION* PPCIBARREGION;
+
+class PCIBar
+{
+public:
+ PCIBar() = delete;
+
+ PCIBar(uint64_t value_) : value(value_)
+ {
+ if (not is64BitBar()) {
+ value &= std::numeric_limits<uint32_t>::max();
+ }
+ }
+
+ bool isIoBar() const { return (value & PCI_BAR_TYPE_MASK) == PCI_ADDRESS_SPACE_IO; }
+ bool isMmioBar() const { return (value & PCI_BAR_TYPE_MASK) == PCI_ADDRESS_SPACE_MEM; }
+ bool is64BitBar() const { return (value & PCI_BAR_ADDRESS_MASK) == PCI_ADDRESS_SPACE_BAR64; }
+
+ uint64_t getBarAddress() const
+ {
+ if (isIoBar()) {
+ return value & ~PCI_CFG_IO_FLAGS_MASK;
+ } else if (isMmioBar()) {
+ return value & ~PCI_CFG_MMIO_FLAGS_MASK;
+ }
+
+ return 0;
+ }
+
+private:
+ static constexpr uint64_t PCI_CFG_IO_FLAGS_MASK {0x3};
+ static constexpr uint64_t PCI_CFG_MMIO_FLAGS_MASK {0xf};
+ static constexpr uint64_t PCI_BAR_TYPE_MASK {0x1};
+ static constexpr uint64_t PCI_BAR_ADDRESS_MASK {0x4};
+
+ uint64_t value;
+};
+
+/**
+ * Describes the generic part of a capability descriptor.
+ */
+struct __attribute__((__packed__)) CapabilityDescriptor
+{
+ uint8_t capID {0};
+ uint8_t nextPtr {0};
+};
+static_assert(sizeof(CapabilityDescriptor) == 0x2,
+ "The Capability Descriptor has incorrect size, did you forgot __attribute__ ((__packed__))");
+
+/*
+ * Read a specified type from the pci configuration space.
+ *
+ * \param offset offset into the pci configuration space
+ * \param readFn The function that should be used to read from the pci
+ * configuration space.
+ *
+ * \return An object of the by template parameter specified type
+ */
+template <typename T>
+T readType(PPDMDEVINS pDevIns, uint32_t offset, PFNPCICONFIGREAD readFn)
+{
+ T t;
+
+ char* ptr {reinterpret_cast<char*>(&t)};
+
+ // TODO: can be optimized to minimize cfg space read accesses as we could read 4 bytes at once
+ for (size_t i = 0; i < sizeof(T); i++) {
+ uint8_t data;
+ readFn(pDevIns, nullptr, offset + i, 1u, reinterpret_cast<uint32_t*>(&data));
+ memcpy(ptr + i, &data, sizeof(data));
+ }
+
+ return t;
+}
+
+/*
+ * The pci configuration space capability list abstraction
+ *
+ * The abstraction makes an easy iteration of capabilities in the pci config space possible
+ * Additionally, a conversion from the basic CapabilityDescriptor to a special capability is possible
+ */
+class CapabilityList
+{
+public:
+ class CapabilityIterator
+ {
+ public:
+ using iterator_category = std::input_iterator_tag;
+ using value_type = CapabilityDescriptor;
+ using difference_type = size_t;
+ using pointer = CapabilityDescriptor*;
+ using reference = CapabilityDescriptor&;
+
+ CapabilityIterator(uint32_t capListPtr, PFNPCICONFIGREAD readFn_, PPDMDEVINS pDevIns_)
+ : offset(capListPtr), pDevIns(pDevIns_), readFn(readFn_)
+ {}
+
+ CapabilityIterator(const CapabilityIterator& o) : offset(o.offset), pDevIns(o.pDevIns), readFn(o.readFn) {}
+
+ value_type operator*() const
+ {
+ assert(offset);
+ return readType<CapabilityDescriptor>(pDevIns, offset, readFn);
+ }
+
+ CapabilityIterator& operator++()
+ {
+ assert(offset);
+ static constexpr uint32_t CAP_PTR_MASK {0x3};
+ auto capDescriptor {readType<CapabilityDescriptor>(pDevIns, offset, readFn)};
+ offset = capDescriptor.nextPtr & (~CAP_PTR_MASK);
+ return *this;
+ }
+
+ bool operator==(const CapabilityIterator& o) const { return offset == o.offset and readFn == o.readFn; }
+
+ bool operator!=(const CapabilityIterator& o) const { return not operator==(o); }
+
+ template <typename T>
+ T getCapability() const
+ {
+ assert(offset);
+ return readType<T>(pDevIns, offset, readFn);
+ }
+
+ uint32_t getOffset() const { return offset; }
+
+ private:
+ uint32_t offset;
+ PPDMDEVINS pDevIns;
+ PFNPCICONFIGREAD readFn;
+ };
+
+ CapabilityList(PFNPCICONFIGREAD readFn_, PPDMDEVINS pDevIns_ = nullptr) : pDevIns(pDevIns_), readFn(readFn_)
+ {
+ if (enabled()) {
+ readFn(pDevIns, nullptr, VBOX_PCI_CAPABILITY_LIST, PCI_CAPABILITY_LIST_PTR_SIZE, &capListPtr);
+ }
+ }
+
+ /**
+ * The function checks if the PCI device has support for capabilities
+ *
+ * \param pciStatus The value of the status register of the pci config space.
+ */
+ bool enabled()
+ {
+ static constexpr uint32_t PCI_STATUS_REGISTER_SIZE {0x2};
+ uint32_t pciStatus {0};
+
+ auto rc {readFn(pDevIns, nullptr, VBOX_PCI_STATUS, PCI_STATUS_REGISTER_SIZE, &pciStatus)};
+
+ return RT_SUCCESS(rc) ? (pciStatus & VBOX_PCI_STATUS_CAP_LIST) : false;
+ }
+
+ CapabilityIterator begin() { return {capListPtr, readFn, pDevIns}; }
+ CapabilityIterator end() { return {0x0, readFn, pDevIns}; }
+
+ std::optional<CapabilityIterator> getCapabilityIterator(uint8_t capId)
+ {
+ if (not enabled()) {
+ return std::nullopt;
+ }
+ auto it {std::find_if(begin(), end(), [capId](CapabilityDescriptor desc) { return desc.capID == capId; })};
+
+ if (it != end()) {
+ return it;
+ }
+
+ return std::nullopt;
+ }
+
+private:
+ static constexpr uint8_t PCI_CAPABILITY_LIST_PTR_SIZE {sizeof(uint8_t)};
+ PPDMDEVINS pDevIns;
+ PFNPCICONFIGREAD readFn;
+ uint32_t capListPtr {0x0};
+};
+
+/**
+ * MSI capability descriptor based on the PCI Local Bus Specification REV 3.0
+ */
+class __attribute__((__packed__)) MSICapabilityDescriptor : public CapabilityDescriptor
+{
+private:
+ using CapabilityIterator = CapabilityList::CapabilityIterator;
+
+ uint16_t msgControl {0};
+ uint32_t msgAddress {0};
+
+ union __attribute__((__packed__))
+ {
+ uint16_t msgData32Bit;
+ struct
+ {
+ uint32_t msgAddressHigh;
+ uint16_t msgData;
+ } msi64bit;
+ struct
+ {
+ uint16_t msgData;
+ uint16_t reserved;
+ uint32_t maskBits;
+ uint32_t pendingBits;
+ } msiPerVectorMasking;
+ struct
+ {
+ uint32_t msgAddressHigh;
+ uint16_t msgData;
+ uint16_t reserved;
+ uint32_t maskBits;
+ uint32_t pendingBits;
+ } msi64BitPerVectorMasking {0, 0, 0, 0, 0};
+ };
+
+public:
+ MSICapabilityDescriptor() = default;
+ // We possibly read too much data here, if no all features of the MSI subsystem are supported.
+ // We accept this and treat the feature variables that are not activated in msgControl as garbage
+ MSICapabilityDescriptor(const CapabilityIterator& iterator)
+ : MSICapabilityDescriptor(iterator.getCapability<MSICapabilityDescriptor>())
+ {}
+
+ MSICapabilityDescriptor(const MSICapabilityDescriptor& o)
+ : msgControl(o.msgControl), msgAddress(o.msgAddress), msi64BitPerVectorMasking(o.msi64BitPerVectorMasking)
+ {}
+
+ bool enabled() const { return msgControl & VBOX_PCI_MSI_FLAGS_ENABLE; }
+
+ bool isPerVectorMaskable() const { return msgControl & VBOX_PCI_MSI_FLAGS_MASKBIT; }
+
+ bool is64Bit() const { return msgControl & VBOX_PCI_MSI_FLAGS_64BIT; }
+
+ uint8_t maxCount() const
+ {
+ static constexpr uint8_t PCI_MSI_FLAGS_QMASK_SHIFT {1u};
+ return 1 << ((msgControl & VBOX_PCI_MSI_FLAGS_QMASK) >> PCI_MSI_FLAGS_QMASK_SHIFT);
+ }
+
+ uint8_t count() const
+ {
+ static constexpr uint8_t PCI_MSI_FLAGS_QSIZE_SHIFT {4u};
+ return 1 << ((msgControl & VBOX_PCI_MSI_FLAGS_QSIZE) >> PCI_MSI_FLAGS_QSIZE_SHIFT);
+ }
+
+ uint64_t messageAddress() const
+ {
+ return is64Bit() ? static_cast<uint64_t>(msi64bit.msgAddressHigh) << 32 | msgAddress : msgAddress;
+ }
+
+ uint16_t messageData() const { return is64Bit() ? msi64bit.msgData : msgData32Bit; }
+
+ bool isMasked(uint32_t vector) const
+ {
+ if (not isPerVectorMaskable()) {
+ return false;
+ }
+
+ uint32_t maskBits {0};
+ if (is64Bit()) {
+ maskBits = msi64BitPerVectorMasking.maskBits;
+ } else {
+ maskBits = msiPerVectorMasking.maskBits;
+ }
+
+ return maskBits & (1u << vector);
+ }
+
+ std::optional<uint32_t> maskBitOffset() const
+ {
+ if (not isPerVectorMaskable()) {
+ return std::nullopt;
+ }
+
+ return is64Bit() ? 0x10 : 0xC;
+ }
+
+ std::optional<uint32_t> pendingBitOffset() const
+ {
+ if (not isPerVectorMaskable()) {
+ return std::nullopt;
+ }
+
+ return is64Bit() ? 0x14 : 0x10;
+ }
+};
+static_assert(sizeof(MSICapabilityDescriptor) == 0x18,
+ "The MSI Capability Descriptor has incorrect size, did you forgot __attribute__ ((__packed__))");
+
+/**
+ * MSIX capability descriptor based on the PCI Local Bus Specification REV 3.0
+ */
+class __attribute__((__packed__)) MSIXCapabilityDescriptor : public CapabilityDescriptor
+{
+private:
+ using CapabilityIterator = CapabilityList::CapabilityIterator;
+
+ uint16_t msgControl {0};
+ uint32_t tableOffset {0};
+ uint32_t pendingBitArrayOffset {0};
+
+ static constexpr uint32_t MSIX_TABLE_OFFSET_MASK {~0x7u};
+
+public:
+ MSIXCapabilityDescriptor() = default;
+ MSIXCapabilityDescriptor(const MSIXCapabilityDescriptor& o)
+ : msgControl(o.msgControl), tableOffset(o.tableOffset), pendingBitArrayOffset(o.pendingBitArrayOffset)
+ {}
+
+ MSIXCapabilityDescriptor(const CapabilityIterator& iterator)
+ : MSIXCapabilityDescriptor(iterator.getCapability<MSIXCapabilityDescriptor>())
+ {}
+
+ bool enabled() const { return msgControl & VBOX_PCI_MSIX_FLAGS_ENABLE; }
+
+ bool allMasked() const { return msgControl & VBOX_PCI_MSIX_FLAGS_FUNCMASK; }
+
+ uint16_t tableSize() const
+ {
+ // According to the PCI Local Bus Specification REV 3.0
+ // the MSIX Table size is encoded as N-1 in the bits 0 to 10
+ // of message control, so we need to add 1 to
+ // get the actual table size.
+ static constexpr uint16_t MSIX_TABLE_SIZE_MASK {0x7ff};
+ return (msgControl & MSIX_TABLE_SIZE_MASK) + 1;
+ }
+
+ uint32_t getTableOffset() const { return tableOffset & MSIX_TABLE_OFFSET_MASK; }
+
+ uint32_t getBarIndex() const { return tableOffset & ~MSIX_TABLE_OFFSET_MASK; }
+};
+static_assert(sizeof(MSIXCapabilityDescriptor) == 0xc,
+ "The MSIX Capability Descriptor has incorrect size, did you forgot __attribute__ ((__packed__))");
+
+/**
+ * MSIX table entry based on the PCI Local Bus Specification REV 3.0
+ */
+class __attribute__((__packed__)) MSIXTableEntry
+{
+private:
+ uint32_t msgAddressLow {0};
+ uint32_t msgAddressHigh {0};
+ uint32_t msgData {0};
+ uint32_t vectorCtrl {0};
+
+public:
+ uint64_t messageAddress() const { return static_cast<uint64_t>(msgAddressHigh) << 32 | msgAddressLow; }
+
+ uint32_t messageData() const { return msgData; }
+};
+static_assert(sizeof(MSIXTableEntry) == 0x10,
+ "The MSIX Capability Descriptor has incorrect size, did you forgot __attribute__ ((__packed__))");
+
+/**
+ * This Function writes data to the PCI configuration space of VirtualBox
+ * The function is required for pass through or semi emulated devices to handle pci capabilities such as
+ * MSI support by VirtualBox.
+ *
+ * /param pPciDev The PCI device to which PCI configuration space should be written.
+ * /param offset the Offset into the Configuration Space. Refer to PCI Local Bus Specification REV 3.0 Figure 6-1 for an
+ * overview, /param cb The byte count to write, /param value The Value to write.
+ */
+inline void writePciConfigSpaceShadow(PPDMPCIDEV pPciDev, uint32_t offset, unsigned cb, uint64_t value)
+{
+ if (pPciDev) {
+ switch (cb) {
+ case sizeof(uint8_t): PDMPciDevSetByte(pPciDev, offset, value); break;
+ case sizeof(uint16_t): PDMPciDevSetWord(pPciDev, offset, value); break;
+ case sizeof(uint32_t): PDMPciDevSetDWord(pPciDev, offset, value); break;
+ case sizeof(uint64_t): PDMPciDevSetQWord(pPciDev, offset, value); break;
+ default:
+ AssertLogRelMsgFailed(("SuperNova-PCI: Could not write PCI Config Space Shadow due to an unsupported byte "
+ "count of %u bytes.\n",
+ cb));
+ };
+ }
+}
+
+/**
+ * Register the MSI(X) system for the pass through pci device in the VirtualBox PCI Subsystem.
+ *
+ * /param pDevIns The VirtualBox PCI Device instance data
+ * /param msiCapabilityIterator The MSI Capability iterator of the pci device.
+ * /param msixCapabilityIterator The MSIX Capability iterator of the pci device.
+ */
+
+inline int registerMsi(PPDMDEVINS pDevIns, std::optional<CapabilityList::CapabilityIterator> msiCapabilityIterator,
+ std::optional<CapabilityList::CapabilityIterator> msixCapabilityIterator)
+{
+ PDMMSIREG msiReg;
+ RT_ZERO(msiReg);
+
+ if (msiCapabilityIterator) {
+ MSICapabilityDescriptor msiCap {*msiCapabilityIterator};
+
+ msiReg.cMsiVectors = msiCap.maxCount();
+ msiReg.iMsiCapOffset = msiCapabilityIterator->getOffset();
+ msiReg.iMsiNextOffset = msiCap.nextPtr;
+ msiReg.fMsi64bit = msiCap.is64Bit();
+ msiReg.fMsiNoMasking = not msiCap.isPerVectorMaskable();
+ }
+
+ if (msixCapabilityIterator) {
+ MSIXCapabilityDescriptor msixCap {*msiCapabilityIterator};
+ msiReg.cMsixVectors = msixCap.tableSize();
+ msiReg.iMsixCapOffset = msixCapabilityIterator->getOffset();
+ msiReg.iMsixNextOffset = msixCap.nextPtr;
+ msiReg.iMsixBar = msixCap.getBarIndex();
+ }
+
+ if (msiCapabilityIterator or msixCapabilityIterator) {
+ return PDMDevHlpPCIRegisterMsi(pDevIns, &msiReg);
+ }
+
+ /*
+ * If we end up here, the device either do not support MSI or MSIX or the Device Capabilitys are not present.
+ */
+ return VINF_SUCCESS;
+}
diff --git a/src/VBox/Devices/Bus/DevVfio.cpp b/src/VBox/Devices/Bus/DevVfio.cpp
new file mode 100644
index 0000000000..f93fcd7381
--- /dev/null
+++ b/src/VBox/Devices/Bus/DevVfio.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) Cyberus Technology GmbH.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#define LOG_GROUP LOG_GROUP_DEV_VFIO
+#include "DevVfio.h"
+
+#include <VBox/log.h>
+#include <VBox/vmm/mm.h>
+#include <VBox/vmm/pdmdev.h>
+
+#include <string>
+
+static DECLCALLBACK(int) devVfioConstruct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg)
+{
+ /*
+ * Check that the device instance and device helper structures are compatible.
+ */
+ PDMDEV_CHECK_VERSIONS_RETURN(pDevIns);
+
+ PVFIODEV pThis {PDMDEVINS_2_DATA(pDevIns, PVFIODEV)};
+ PCPDMDEVHLPR3 pHlp {pDevIns->pHlpR3};
+ int rc;
+ uint16_t bus, device, function;
+ char* sysfsPath;
+
+ constexpr char validation[] = "sysfsPath"
+ "|GuestPCIBusNo"
+ "|GuestPCIDeviceNo"
+ "|GuestPCIFunctionNo";
+
+ PDMDEV_VALIDATE_CONFIG_RETURN(pDevIns, validation, "Invalid configuration");
+ rc = pHlp->pfnCFGMQueryStringAlloc(pCfg, "sysfsPath", &sysfsPath);
+ if (RT_FAILURE(rc))
+ {
+ return PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Querying sysfsPath as a string failed"));
+ }
+
+ std::string sysfsPathString {sysfsPath};
+ MMR3HeapFree(sysfsPath);
+
+ rc = pHlp->pfnCFGMQueryU16(pCfg, "GuestPCIBusNo", &bus);
+ if (RT_FAILURE(rc))
+ {
+ return PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Querying GuestPCIBusNo as a uint16_t failed"));
+ }
+
+ rc = pHlp->pfnCFGMQueryU16(pCfg, "GuestPCIDeviceNo", &device);
+ if (RT_FAILURE(rc))
+ {
+ return PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Querying GuestPCIDeviceNo as a uint16_t failed"));
+ }
+
+ rc = pHlp->pfnCFGMQueryU16(pCfg, "GuestPCIFunctionNo", &function);
+ if (RT_FAILURE(rc))
+ {
+ return PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Querying GuestPCIFunctionNo as a uint16_t failed"));
+ }
+
+ LogRel(("VFIO: Constructing VFIO PCI device with path %s Guest BDF: %02hx:%02hx.%hx\n",
+ sysfsPathString.c_str(), bus, device, function));
+
+ rc = pThis->init(pDevIns, sysfsPathString);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+
+ NOREF(iInstance);
+
+ return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) devVfioDestruct(PPDMDEVINS pDevIns)
+{
+ /*
+ * Check the versions here as well since the destructor is *always* called.
+ */
+ PDMDEV_CHECK_VERSIONS_RETURN_QUIET(pDevIns);
+
+ PVFIODEV pThis {PDMDEVINS_2_DATA(pDevIns, PVFIODEV)};
+
+ pThis->terminate(pDevIns);
+
+ return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) devVfioInitComplete(PPDMDEVINS pDevIns)
+{
+ PDMDEV_CHECK_VERSIONS_RETURN_QUIET(pDevIns);
+
+ PVFIODEV pThis {PDMDEVINS_2_DATA(pDevIns, PVFIODEV)};
+
+ return pThis->initializeDma(pDevIns);
+}
+
+/**
+ * The device registration structure.
+ */
+extern "C" const PDMDEVREG g_DeviceVfioDev =
+{
+ /* .u32Version = */ PDM_DEVREG_VERSION,
+ /* .uReserved0 = */ 0,
+ /* .szName = */ "VfioDev",
+ /* .fFlags = */ PDM_DEVREG_FLAGS_DEFAULT_BITS | PDM_DEVREG_FLAGS_NEW_STYLE,
+
+ /* .fClass = */ PDM_DEVREG_CLASS_HOST_DEV,
+ /* .cMaxInstances = */ 1,
+ /* .uSharedVersion = */ 1,
+ /* .cbInstanceShared = */ sizeof(VFIODEV),
+ /* .cbInstanceR0 = */ 0,
+ /* .cbInstanceRC = */ 0,
+ /* .cMaxPciDevices = */ 1,
+ /* .cMaxMsixVectors = */ 0,
+ /* .pszDescription = */ "VirtualBox Vfio Passthrough Device\n",
+ /* .pszRCMod = */ "",
+ /* .pszR0Mod = */ "",
+ /* .pfnConstruct = */ devVfioConstruct,
+ /* .pfnDestruct = */ devVfioDestruct,
+ /* .pfnRelocate = */ NULL,
+ /* .pfnMemSetup = */ NULL,
+ /* .pfnPowerOn = */ NULL,
+ /* .pfnReset = */ NULL,
+ /* .pfnSuspend = */ NULL,
+ /* .pfnResume = */ NULL,
+ /* .pfnAttach = */ NULL,
+ /* .pfnDetach = */ NULL,
+ /* .pfnQueryInterface. = */ NULL,
+ /* .pfnInitComplete = */ devVfioInitComplete,
+ /* .pfnPowerOff = */ NULL,
+ /* .pfnSoftReset = */ NULL,
+ /* .pfnReserved0 = */ NULL,
+ /* .pfnReserved1 = */ NULL,
+ /* .pfnReserved2 = */ NULL,
+ /* .pfnReserved3 = */ NULL,
+ /* .pfnReserved4 = */ NULL,
+ /* .pfnReserved5 = */ NULL,
+ /* .pfnReserved6 = */ NULL,
+ /* .pfnReserved7 = */ NULL,
+ /* .u32VersionEnd = */ PDM_DEVREG_VERSION
+};
diff --git a/src/VBox/Devices/Bus/DevVfio.h b/src/VBox/Devices/Bus/DevVfio.h
new file mode 100644
index 0000000000..cfe384d7a1
--- /dev/null
+++ b/src/VBox/Devices/Bus/DevVfio.h
@@ -0,0 +1,412 @@
+/*
+ * Copyright (C) Cyberus Technology GmbH.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#pragma once
+
+#include <svp/pci.h>
+
+#include <VBox/err.h>
+#include <VBox/pci.h>
+#include <VBox/vmm/pdmdev.h>
+
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <array>
+#include <atomic>
+#include <filesystem>
+#include <mutex>
+#include <vector>
+
+class VfioDevice
+{
+public:
+ /*
+ * The IRQ Type information, required for the interrupt handler.
+ */
+ enum class IrqType
+ {
+ VFIO_INTX = VFIO_PCI_INTX_IRQ_INDEX,
+ VFIO_MSI = VFIO_PCI_MSI_IRQ_INDEX,
+ VFIO_MSIX = VFIO_PCI_MSIX_IRQ_INDEX,
+ VFIO_NONE,
+ };
+
+ /**
+ * Interrupt Handler function
+ *
+ * \param pDevIns The PCI Device Instance
+ *
+ * \return VBox status code
+ */
+ int handleInterrupts(PPDMDEVINS pDevIns);
+
+ /**
+ * Initialize the VfioDevice
+ *
+ * \param pDevIns The PCI Device Instance
+ *
+ * \return VBox status code
+ */
+ int init(PPDMDEVINS pDevIns, std::filesystem::path sysfsPath);
+
+ /**
+ * Initialize DMA
+ * As the ram preallocation is required to initialize the DMA regions for the
+ * VFIO device, the function have to be called **after** pgmR3RamPreAlloc
+ *
+ * \param pDevIns The PCI Device Instance
+ *
+ * \return VBox status code
+ */
+ int initializeDma(PPDMDEVINS pDevIns);
+
+ /**
+ * Terminates the VFIO device and closes the file descriptors
+ *
+ * \param pDevIns The PCI Device Instance
+ *
+ * \return VBox status code
+ */
+ int terminate(PPDMDEVINS pDevIns);
+
+ /**
+ * Read from the Vfio Device file descriptor
+ *
+ * \param pData data to read
+ * \param bytes count of bytes to read
+ * \param uAddress address to read from
+ *
+ * \return VBOX status code
+ */
+ int readFromDevice(void* pData, unsigned bytes, uint64_t uAddress)
+ {
+ return handleDeviceAccess(pread64, pData, bytes, uAddress);
+ }
+
+ /**
+ * Write to the Vfio Device file descriptor
+ *
+ * \param pData data to write
+ * \param bytes count of bytes to write
+ * \param uAddress address to write to
+ *
+ * \return VBOX status code
+ */
+ int writeToDevice(const void* pData, unsigned bytes, uint64_t uAddress)
+ {
+ return handleDeviceAccess(pwrite64, const_cast<void*>(pData), bytes, uAddress);
+ }
+
+ /**
+ * Read from the actual PCI Config Space of the VFIO device
+ *
+ * \param data data to read
+ * \param bytes count of bytes to read
+ * \param uAddress address to read from
+ *
+ * \return VBOX status code
+ */
+ template <typename T>
+ int readConfigSpace(T& data, unsigned bytes, uint64_t uAddress)
+ {
+ return readFromDevice(&data, bytes, mcfgOffset + uAddress);
+ }
+
+ /**
+ * Write to the actual PCI Config Space of the VFIO device
+ *
+ * \param data data to write
+ * \param bytes count of bytes to write
+ * \param uAddress address to write to
+ *
+ * \return VBOX status code
+ */
+ template <typename T>
+ int writeConfigSpace(T& data, unsigned bytes, uint64_t uAddress)
+ {
+ return writeToDevice(&data, bytes, mcfgOffset + uAddress);
+ }
+
+private:
+ using LockGuard = std::lock_guard<std::mutex>;
+
+ /**
+ * The interrupt information structure is a bookkeeping structure for the
+ * interrupt handling.
+ * It maps the interrupt event file descriptor to an internal interrupt
+ * index and contains the interrupt type (INTX, MSI, MSIX) for the handler thread.
+ */
+ struct InterruptInformation
+ {
+ int fd;
+ uint32_t index;
+
+ bool operator==(const InterruptInformation& o) const
+ {
+ return o.fd == fd and o.index == index;
+ }
+ };
+
+ template<typename FN>
+ int handleDeviceAccess(FN& fn, void* data, unsigned bytes, uint64_t uAddress)
+ {
+ AssertLogRelMsgReturn(vfioDeviceFd > 0, ("The Vfio Device is not open \n"), VERR_GENERAL_FAILURE);
+ auto rc {fn(vfioDeviceFd, data, bytes, uAddress)};
+
+ return rc < 0 ? VERR_ACCESS_DENIED : VINF_SUCCESS;
+ }
+
+ /**
+ * Initialize VFIO container and device
+ *
+ * \param pDevIns The PCI Device Instance
+ * \param sysfsPath path to the sysfs device
+ *
+ * \return VBox status code
+ */
+ int initializeVfio(PPDMDEVINS pDevIns, std::filesystem::path sysfsPath);
+
+ /**
+ * Initialize the VirtualBox PCI Device Information
+ *
+ * \param pDevIns The PCI Device Instance
+ *
+ * \return VBox status code
+ */
+ int initializePci(PPDMDEVINS pDevIns);
+
+ /**
+ * Initialize VFIO Memory Regions
+ *
+ * Such regions are either PCI Bar regions or VFIO specific regions to
+ * provide device Information or device state such as graphics output
+ *
+ * \param pDevIns The PCI Instance Data
+ * \param deviceInfo The vfio device information
+ *
+ * \return VBox status code
+ */
+ int initializeMemoryRegions(PPDMDEVINS pDevIns, vfio_device_info& deviceInfo);
+
+ /**
+ * Initialize interrupt handling
+ *
+ * \param pDevIns The PCI Device Instance
+ *
+ * \return VBox status code
+ */
+ int initializeInterrupts(PPDMDEVINS pDevIns);
+
+ /**
+ * Activate the corresponding interrupt type. The current interrupt type must be disabled before.
+ *
+ * \param pDevIns The PCI Device Instance
+ * \param vfuiIrqIndexType the irq type that should be activated
+ * \param irqCount count of irqs to register
+ *
+ * \return VBox status code
+ */
+ int activateInterrupts(PPDMDEVINS pDevIns, const IrqType vfioIrqIndexType, uint32_t irqCount = 1);
+
+ /**
+ * Disable the corresponding interrupt type
+ *
+ * \param pDevIns The PCI Device Instance
+ *
+ * \return VBox status code
+ */
+ int disableInterrupts(PPDMDEVINS pDevIns);
+
+ /**
+ * Inject a MSI
+ *
+ * \param pDevIns The PCI Device Instance
+ * \param irqInfo The interrupt information of the pending interrupt
+ *
+ * \return VBOX status code
+ */
+ int injectMsi(PPDMDEVINS pDevIns, InterruptInformation& irqInfo);
+
+ /**
+ * Inject a MSIX
+ *
+ * \param pDevIns The PCI Device Instance
+ * \param irqInfo The interrupt information of the pending interrupt
+ *
+ * \return VBOX status code
+ */
+ int injectMsix(PPDMDEVINS pDevIns, InterruptInformation& irqInfo);
+
+ /**
+ * The configuration space write handler.
+ *
+ * \param pDevIns The PCI Device Instance
+ * \param uAddress offset in the configuration space to write
+ * \param cb count of bytes to write
+ * \param u32Value The value to write
+ *
+ * \return VBox status code
+ */
+ int configSpaceWriteHandler(PPDMDEVINS pDevIns, uint32_t uAddress, unsigned cb, uint32_t u32Value);
+
+ /**
+ * The memory mapped IO access handler function.
+ *
+ * \param pDevIns The PCI Device Instance
+ * \param barRegion The reference to the PCI Bar region
+ * \param barOffset The offset in the PCI bar
+ * \param pv The pointer to the data to be read
+ * \param cb The size of the data to be read
+ * \param write Indicator of access direction
+ *
+ * \return Vbox Status code
+ */
+ int mmioAccessHandler(PPDMDEVINS pDevIns, PCIBarRegion& barRegion, RTGCPHYS barOffset, void* pv, unsigned cb, bool writeAccess);
+
+ /**
+ * Start inteception of Guest VM PCI Config Space Accesses
+ *
+ * \param pDevIns the VBox Device Instance
+ *
+ * \return VBox status code
+ */
+ int interceptConfigSpaceAccesses(PPDMDEVINS pDevIns);
+
+ /**
+ * Register a Guest Physical Memory range at the vfio container
+ *
+ * \param pVM Pointer to the VM structure
+ * \param startGCPhys Guest physical address of the start of the ram range
+ * \param endGCPhys Guest physical address of the end of the region
+ *
+ * \return VBOX status code
+ */
+ int registerDmaRange(PVM pVM, RTGCPHYS startGCPhys, RTGCPHYS endGCPhys);
+
+ /**
+ * Try handling of PCI Bar interception
+ *
+ * \param pDevIns PDM Device Instance
+ * \param pciConfigCommandValue value of the command register of the PCI config space
+ */
+ void tryHandleBarInterception(PPDMDEVINS pDevIns, const uint32_t pciConfigCommandValue);
+
+ /**
+ * Register a PCI Bar at the corresponding subsystem (IO or MMIO).
+ *
+ * \param mapFn function used to map the Bar at the corrseponding Subsystem
+ * \param unmapFn function to unmap the old Bar region if the bar was present before
+ * \param pDevIns the PDM Device Instance Data structure
+ * \param barRegion the region bookkeeping data structure
+ * \param mapAddress the new address of the Bar
+ */
+ template <uint64_t INVALID_ELEM, typename MapFN, typename UnmapFN>
+ void registerPCIBar(MapFN& mapFn, UnmapFN& unmapFn, PPDMDEVINS pDevIns, PCIBarRegion& barRegion, uint64_t mapAddress) {
+ LogRel(("VFIO: RegisterBar %#llx \n", mapAddress));
+ if (barRegion.address == mapAddress)
+ {
+ return;
+ }
+
+ if (barRegion.address != INVALID_ELEM)
+ {
+ unmapFn(pDevIns, barRegion.hRegion);
+ barRegion.address = INVALID_ELEM;
+ }
+
+ mapFn(pDevIns, barRegion.hRegion, mapAddress);
+ barRegion.address = mapAddress;
+ }
+
+ /**
+ * Read the Bar value from the PCI config space
+ *
+ * \param barNumber The bar which value should be read
+ *
+ * \return PCIBar information
+ */
+ const PCIBar getBarInfo(unsigned barNumber);
+
+ /**
+ * Ioctl wrapper with meaningfull error return
+ * \param fd file descriptor to interact with
+ * \param request ioct request number
+ * \param errorStr string to set in the log in case of an error
+ * \param args variadic template args for the ioctl
+ *
+ * \return Vbox error code
+ */
+ template <typename ...ARGS>
+ int vfioControl(PPDMDEVINS pDevIns, int fd, unsigned long request, const char* errorString, ARGS&& ...args)
+ {
+ if (ioctl(fd, request, std::forward<ARGS>(args) ...) < 0)
+ {
+ return PDMDEV_SET_ERROR(pDevIns, VERR_INVALID_PARAMETER, errorString);
+ }
+
+ return VINF_SUCCESS;
+ }
+
+ /**
+ * Ioctl device wrapper for accesses on the vfio device file descriptor
+ *
+ * \param pDevIns the VBox Device Instance
+ * \param request ioct request number
+ * \param errorStr string to set in the log in case of an error
+ * \param args variadic template args for the ioctl
+ *
+ * \return VBOX status code
+ */
+ template <typename ...ARGS>
+ int deviceControl(PPDMDEVINS pDevIns, unsigned long request, const char* errorString, ARGS&& ...args)
+ {
+ AssertLogRelMsgReturn(vfioDeviceFd > 0, ("The Vfio Device is not open \n"), VERR_GENERAL_FAILURE);
+ return vfioControl(pDevIns, vfioDeviceFd, request, errorString, std::forward<ARGS>(args)...);
+ }
+
+ /** Vfio File descriptors */
+ int vfioContainerFd{-1};
+ int vfioGroupFd{-1};
+ int vfioDeviceFd {-1};
+
+ /** PCI device members. */
+ PPDMPCIDEV pPciDev;
+ uint64_t mcfgOffset; ///< The offset to the PCI Config Space Page in the vfio device
+ std::atomic<bool> pciConfigMemoryDecodingEnabled {false}; ///< The PCI Memory decoding indicator
+ std::atomic<bool> pciConfigIODecodingEnabled {false}; ///< The PCI IO decoding indicator
+ std::array<PCIBarRegion, VBOX_PCI_MAX_BARS> pciBars;
+
+ /** IRQ handling */
+ RTTHREAD hIrqDeliveryThread;
+ // Even if only one INTX interrupt is supported handling it as a vector reduce the code complexity by a lot.
+ std::vector<InterruptInformation> aIrqInformation;
+ std::vector<MSIXTableEntry> aMsixTable;
+ IrqType activeInterruptType {IrqType::VFIO_NONE};
+ std::mutex irqDisable;
+
+ std::optional<CapabilityList::CapabilityIterator> msiCapabilityIterator;
+ std::optional<CapabilityList::CapabilityIterator> msixCapabilityIterator;
+
+ std::atomic<bool> exit{false};
+};
+typedef VfioDevice VFIODEV;
+
+typedef VFIODEV *PVFIODEV;
diff --git a/src/VBox/Devices/Bus/VfioDevice.cpp b/src/VBox/Devices/Bus/VfioDevice.cpp
new file mode 100644
index 0000000000..6bf1945828
--- /dev/null
+++ b/src/VBox/Devices/Bus/VfioDevice.cpp
@@ -0,0 +1,912 @@
+/*
+ * Copyright (C) Cyberus Technology GmbH.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#define PDMPCIDEV_INCLUDE_PRIVATE /* Hack to get pdmpcidevint.h included at the right point. */
+#include <VBox/vmm/pdmpcidev.h>
+
+#include "DevVfio.h"
+
+#include <iprt/mem.h>
+#include <VBox/log.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/pdmapi.h>
+#include <VBox/vmm/pdmdev.h>
+#include "DevPciInternal.h"
+
+#include <fcntl.h>
+#include <sys/eventfd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <cstring>
+#include <optional>
+
+namespace {
+ using IrqType = VfioDevice::IrqType;
+
+ VBOXSTRICTRC vfioConfigSpaceRead(PPDMDEVINS pDev, PPDMPCIDEV pPciDev, uint32_t uAddress, unsigned cb, uint32_t* pu32Value)
+ {
+ PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+
+ AssertLogRelMsgReturn(pu32Value, ("VFIO: PCi config space read: value pointer is zero!"), VERR_INVALID_POINTER);
+
+ int rc { pThis->readConfigSpace(*pu32Value, cb, uAddress) };
+ writePciConfigSpaceShadow(pPciDev, uAddress, cb, *pu32Value);
+ return rc;
+ }
+
+ std::underlying_type_t<IrqType> toUnderlying(const IrqType& t)
+ {
+ return static_cast<std::underlying_type_t<IrqType>>(t);
+ }
+
+}
+
+int VfioDevice::initializeVfio(PPDMDEVINS pDevIns, std::filesystem::path sysfsPath)
+{
+ namespace fs = std::filesystem;
+ const std::filesystem::path VFIO_PATH {"/dev/vfio"};
+
+ int rc {VINF_SUCCESS};
+
+ vfioContainerFd = open((VFIO_PATH / "vfio").c_str(), O_RDWR | O_CLOEXEC);
+ AssertLogRelMsgReturn(vfioContainerFd > 0, ("VFIO: Could not open VFIO Container\n"), VERR_INVALID_PARAMETER);
+
+ const int vfioApiVersion {ioctl(vfioContainerFd, VFIO_GET_API_VERSION)};
+
+ LogRel(("VFIO: Detected VFIO Api Version %d\n", vfioApiVersion));
+
+ const int iommuTypePresent {ioctl(vfioContainerFd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)};
+ AssertLogRelMsgReturn(iommuTypePresent, ("VFIO: Requested IOMMU type is not supported.\n"), VERR_NOT_AVAILABLE);
+
+ const auto iommuGroupLink {fs::read_symlink(sysfsPath / "iommu_group")};
+ vfioGroupFd = open((VFIO_PATH / iommuGroupLink.filename()).c_str(), O_RDWR, O_CLOEXEC);
+ AssertLogRelMsgReturn(vfioGroupFd > 0, ("VFIO: Could not open VFIO Container\n"), VERR_INVALID_PARAMETER);
+
+ rc = vfioControl(pDevIns, vfioGroupFd, VFIO_GROUP_SET_CONTAINER,
+ "VFIO: Unable to assign the VFIO container to the VFIO Group \n", &vfioContainerFd);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ rc = vfioControl(pDevIns, vfioContainerFd, VFIO_SET_IOMMU, "VFIO: Unable to set VFIO IOMMU Type \n", VFIO_TYPE1_IOMMU);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ vfioDeviceFd = ioctl(vfioGroupFd, VFIO_GROUP_GET_DEVICE_FD, sysfsPath.filename().c_str());
+ AssertLogRelMsgReturn(vfioDeviceFd > 0, ("VFIO: Unable to open VFIO device \n"), VERR_INVALID_PARAMETER);
+
+ rc = deviceControl(pDevIns, VFIO_DEVICE_RESET, "Unable to reset VFIO device");
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ return rc;
+}
+
+int VfioDevice::initializePci(PPDMDEVINS pDevIns)
+{
+ int rc {VINF_SUCCESS};
+
+ pPciDev = pDevIns->apPciDevs[0];
+ PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
+
+ vfio_region_info regionInfo;
+ regionInfo.argsz = sizeof(regionInfo);
+ regionInfo.index = VFIO_PCI_CONFIG_REGION_INDEX;
+
+ rc = deviceControl(pDevIns, VFIO_DEVICE_GET_REGION_INFO, "VFIO: Could not retrieve VFIO Device MCFG region\n", ®ionInfo);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ AssertLogRelMsgReturn(regionInfo.size != 0, ("VFIO: MCFG Region size is zero\n"), VERR_INVALID_PARAMETER);
+
+ mcfgOffset = regionInfo.offset;
+
+ uint16_t vendorId, deviceId;
+ uint8_t classBase, classSub, headerType, interruptPin, interruptLine;
+
+ readConfigSpace(vendorId, sizeof(vendorId), VBOX_PCI_VENDOR_ID);
+ readConfigSpace(deviceId, sizeof(deviceId), VBOX_PCI_DEVICE_ID);
+ readConfigSpace(classBase, sizeof(classBase), VBOX_PCI_CLASS_BASE);
+ readConfigSpace(classSub, sizeof(classSub), VBOX_PCI_CLASS_SUB);
+ readConfigSpace(headerType, sizeof(headerType), VBOX_PCI_HEADER_TYPE);
+ readConfigSpace(interruptLine, sizeof(interruptLine), VBOX_PCI_INTERRUPT_LINE);
+ readConfigSpace(interruptPin, sizeof(interruptPin), VBOX_PCI_INTERRUPT_PIN);
+
+ PDMPciDevSetVendorId(pPciDev, vendorId);
+ PDMPciDevSetDeviceId(pPciDev, deviceId);
+ PDMPciDevSetClassBase(pPciDev, classBase);
+ PDMPciDevSetClassSub(pPciDev, classSub);
+ PDMPciDevSetHeaderType(pPciDev, headerType);
+ PDMPciDevSetInterruptLine(pPciDev, interruptLine);
+ PDMPciDevSetInterruptPin(pPciDev, interruptPin);
+
+ rc = PDMDevHlpPCIRegister(pDevIns, pPciDev);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ CapabilityList capList {vfioConfigSpaceRead, pDevIns};
+ msiCapabilityIterator = capList.getCapabilityIterator(VBOX_PCI_CAP_ID_MSI);
+ msixCapabilityIterator = capList.getCapabilityIterator(VBOX_PCI_CAP_ID_MSIX);
+
+
+ if (msiCapabilityIterator)
+ {
+ MSICapabilityDescriptor msiCap {*msiCapabilityIterator};
+ AssertLogRelMsgReturn(msiCap.maxCount() == 1, ("VFIO: Multiple Message MSI supporting devices are not supported yet!\n"), VERR_NOT_SUPPORTED);
+ }
+
+
+ return rc;
+}
+
+int VfioDevice::initializeMemoryRegions(PPDMDEVINS pDevIns, vfio_device_info& deviceInfo)
+{
+ int rc {VINF_SUCCESS};
+ for (auto i {0u}; i < deviceInfo.num_regions; ++i)
+ {
+ /**
+ * Currently only PCI Bar regions are supported.
+ * VFIO places the bar region information at indices
+ * 0 <= i <= VBOX_PCI_MAX_BARS, so we can stop if the
+ * limit is reached
+ *
+ * TODO implement special region handling
+ */
+ if (i >= VBOX_PCI_MAX_BARS)
+ {
+ break;
+ }
+
+ vfio_region_info regionInfo;
+ regionInfo.argsz = sizeof(regionInfo);
+ regionInfo.index = i;
+
+ rc = deviceControl(pDevIns, VFIO_DEVICE_GET_REGION_INFO, "VFIO: Unable to retrieve VFIO region info", ®ionInfo);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ if (regionInfo.size == 0)
+ {
+ continue;
+ }
+
+ const auto barInfo {getBarInfo(i)};
+
+ PCIBarRegion& region {pciBars[i]};
+ region.offset = regionInfo.offset;
+ region.size = regionInfo.size;
+ region.iRegion = i;
+
+ if (barInfo.isIoBar())
+ {
+ auto portIoRead = [](PPDMDEVINS pDev, void* pvUser , RTIOPORT offsetPort, uint32_t* pu32, unsigned cb) -> VBOXSTRICTRC
+ {
+ PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+ auto pBar {static_cast<PPCIBARREGION>(pvUser)};
+
+ AssertLogRelReturn(pu32, VERR_INVALID_POINTER);
+ AssertLogRelReturn(pBar, VERR_INVALID_POINTER);
+
+ return pThis->readFromDevice(pu32, cb, pBar->offset + offsetPort);
+ };
+
+ auto portIoWrite = [](PPDMDEVINS pDev, void* pvUser, RTIOPORT offsetPort, uint32_t u32, unsigned cb) -> VBOXSTRICTRC
+ {
+ PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+ auto pBar {static_cast<PPCIBARREGION>(pvUser)};
+
+ AssertLogRelReturn(pBar, VERR_INVALID_POINTER);
+
+ return pThis->writeToDevice(&u32, cb, pBar->offset + offsetPort);
+ };
+
+ rc = PDMDevHlpPCIIORegionCreateIo(pDevIns, i, region.size, portIoWrite, portIoRead,
+ ®ion, "VFIO Port IO", nullptr, ®ion.hRegion);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ }
+ else if (barInfo.isMmioBar())
+ {
+ region.address = NIL_RTGCPHYS;
+
+ auto mmioRead = [](PPDMDEVINS pDev, void* pvUser, RTGCPHYS barOffset, void* pv, unsigned cb) -> VBOXSTRICTRC
+ {
+ PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+ auto pBar {static_cast<PPCIBARREGION>(pvUser)};
+
+ AssertLogRelReturn(pBar, VERR_INVALID_POINTER);
+
+ return pThis->mmioAccessHandler(pDev, *pBar, barOffset, pv, cb, false);
+ };
+
+ auto mmioWrite = [](PPDMDEVINS pDev, void* pvUser, RTGCPHYS barOffset, const void * pv, unsigned cb) -> VBOXSTRICTRC
+ {
+ PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+ auto pBar {static_cast<PPCIBARREGION>(pvUser)};
+
+ AssertLogRelReturn(pBar, VERR_INVALID_POINTER);
+
+ return pThis->mmioAccessHandler(pDev, *pBar, barOffset, const_cast<void*>(pv), cb, true);
+ };
+
+ rc = PDMDevHlpMmioCreate(pDevIns,
+ region.size,
+ NULL,
+ UINT32_MAX,
+ mmioWrite,
+ mmioRead,
+ ®ion,
+ IOMMMIO_FLAGS_READ_PASSTHRU | IOMMMIO_FLAGS_WRITE_PASSTHRU,
+ "VFIO MMIO BAR",
+ ®ion.hRegion);
+ }
+
+ }
+ return rc;
+}
+
+int VfioDevice::handleInterrupts(PPDMDEVINS pDevIns)
+{
+ // Waits for input on a file descriptor with a given timeout.
+ // Taken from https://www.gnu.org/software/libc/manual/html_node/Waiting-for-I_002fO.html
+ // Returns the first file descriptor that has input
+ auto waitForInput = [&] (std::chrono::microseconds delay) -> std::optional<InterruptInformation>
+ {
+ fd_set set;
+ struct timeval timeout {0, 0};
+
+ /* Initialize the file descriptor set. */
+ FD_ZERO(&set);
+
+ /*
+ * We use a copy of the interrupts here to avoid firing interrupts that are deactivated already.
+ */
+ irqDisable.lock();
+ std::vector<InterruptInformation> aCurrentIrqInformation {aIrqInformation};
+ irqDisable.unlock();
+
+ for (const auto efd : aCurrentIrqInformation)
+ {
+ if (efd.fd > 0)
+ {
+ FD_SET(efd.fd, &set);
+ }
+ }
+
+ /* Initialize the timeout data structure. */
+ const auto seconds {std::chrono::duration_cast<std::chrono::seconds>(delay)};
+ const auto us {std::chrono::duration_cast<std::chrono::microseconds>(delay - seconds)};
+
+ timeout.tv_sec = seconds.count();
+ timeout.tv_usec = us.count();
+
+ /* select returns 0 if timeout, 1 if input available, -1 if error. */
+ int error = TEMP_FAILURE_RETRY(select(FD_SETSIZE,
+ &set, NULL, NULL,
+ &timeout));
+
+ if (error == -1)
+ {
+ perror("select on fds failed");
+ }
+
+ Assert(error != -1);
+
+ {
+ LockGuard _ {irqDisable};
+
+ /*
+ * skip delivering non active interrupts
+ */
+ if (aCurrentIrqInformation != aIrqInformation)
+ {
+ return std::nullopt;
+ }
+
+ for (const auto efd : aCurrentIrqInformation)
+ {
+ if (efd.fd >= 0 and FD_ISSET(efd.fd, &set))
+ {
+ return efd;
+ }
+ }
+ }
+
+ return std::nullopt;
+ };
+
+ while (not exit.load())
+ {
+ if (auto irqInfo = waitForInput(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::seconds(1))); irqInfo)
+ {
+ uint64_t value;
+ const ssize_t s {read(irqInfo->fd, &value, sizeof(value))};
+ AssertLogRelMsgReturn(s == sizeof(value), ("VFIO: Read on event FD returned wrong size."), VERR_GENERAL_FAILURE);
+ AssertLogRelReturn(value != 0, VERR_INTERRUPTED);
+ int rc {VINF_SUCCESS};
+ switch (activeInterruptType)
+ {
+ case IrqType::VFIO_INTX:
+ PDMDevHlpPCISetIrqNoWait(pDevIns, 0, PDM_IRQ_LEVEL_FLIP_FLOP);
+ break;
+ case IrqType::VFIO_MSI:
+ rc = injectMsi(pDevIns, *irqInfo);
+ break;
+ case IrqType::VFIO_MSIX:
+ rc = injectMsix(pDevIns, *irqInfo);
+ break;
+ default:
+ AssertLogRelMsgFailedReturn(("VFIO: Unsupported interrupt type in IRQ delivery thread detected %u\n", toUnderlying(activeInterruptType)), VERR_NOT_SUPPORTED);
+ }
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ }
+ }
+
+ return VINF_SUCCESS;
+}
+
+int VfioDevice::initializeInterrupts(PPDMDEVINS pDevIns)
+{
+ int rc {activateInterrupts(pDevIns, IrqType::VFIO_INTX)};
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ /*
+ * We need to shadow the MSIX table, as a read access on the table returns invalid data.
+ * Thus we need to allocate MSIX table entries upfront, to be able to handle MSIX table writes.
+ */
+ PDMMSIREG MsiReg;
+ RT_ZERO(MsiReg);
+
+ if (msiCapabilityIterator)
+ {
+ MSICapabilityDescriptor msiCap {*msiCapabilityIterator};
+
+ MsiReg.cMsiVectors = msiCap.maxCount();
+ MsiReg.iMsiCapOffset = msiCapabilityIterator->getOffset();
+ MsiReg.iMsiNextOffset = msiCap.nextPtr;
+ MsiReg.fMsi64bit = msiCap.is64Bit();
+ MsiReg.fMsiNoMasking = not msiCap.isPerVectorMaskable();
+ }
+
+ // if (msixCapabilityIterator)
+ // {
+ // MSIXCapabilityDescriptor msixCap {*msixCapabilityIterator};
+ // aMsixTable.resize(msixCap.tableSize());
+ // MsiReg.cMsixVectors = msixCap.tableSize();
+ // MsiReg.iMsixCapOffset = msixCapabilityIterator->getOffset();
+ // MsiReg.iMsixNextOffset = msixCap.nextPtr;
+ // MsiReg.iMsixBar = msixCap.getBarIndex();
+ // }
+
+ if (msixCapabilityIterator or msiCapabilityIterator)
+ {
+ rc = PDMDevHlpPCIRegisterMsi(pDevIns, &MsiReg);
+ }
+
+ auto handleIrqs = [](RTTHREAD /*hSelf*/, void* pvUser) -> int
+ {
+ PPDMDEVINS pDev {static_cast<PPDMDEVINS>(pvUser)};
+ PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+
+ return pThis->handleInterrupts(pDev);
+ };
+
+ rc = RTThreadCreate(&hIrqDeliveryThread, handleIrqs, pDevIns, 0, RTTHREADTYPE_IO, RTTHREADFLAGS_WAITABLE, "vfio IRQ");
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ return rc;
+}
+
+int VfioDevice::activateInterrupts(PPDMDEVINS pDevIns, const IrqType irqType, uint32_t irqCount)
+{
+ LockGuard _ {irqDisable};
+
+ int rc;
+ vfio_irq_info irqInfo;
+ irqInfo.argsz = sizeof(irqInfo);
+ irqInfo.index = toUnderlying(irqType);
+
+ /**
+ * The call of this function requires that the interrupts are disabled.
+ */
+ AssertLogRelMsgReturn(aIrqInformation.size() == 0,
+ ("VFIO: Trying to activate interrupts without deactivating the previous irqs! Disable irqs before activate new ones!"),
+ VERR_NOT_SUPPORTED);
+
+ /**
+ * If the IRQ is not enabled in the VFIO device the call will return unsuccessful
+ * and we don't need to set up something for this IRQ and can just continue
+ */
+ if (RT_FAILURE(deviceControl(pDevIns, VFIO_DEVICE_GET_IRQ_INFO, "", &irqInfo)))
+ {
+ return VERR_NOT_AVAILABLE;
+ }
+
+ /**
+ * Some devices, (e.G SRIOV virtual functions does not have legacy interrupts enabled.
+ * We can skip interrupt activation if we find a device without legacy interrupts.
+ */
+ if (irqType == IrqType::VFIO_INTX and irqInfo.count == 0)
+ {
+ uint8_t interruptPin;
+ readConfigSpace(interruptPin, sizeof(interruptPin), VBOX_PCI_INTERRUPT_PIN);
+ AssertLogRelMsgReturn(interruptPin == 0, ("VFIO: Found device without INTX information, but INTX is marked as supported in the PCI Config space"), VERR_NOT_AVAILABLE);
+ return VINF_SUCCESS;
+ }
+
+ /**
+ * If we try to activate an interrupt type that is not enabled, or supported by vfio we get an interrupt count of 0
+ * We bail out here, as we are not able to enable an interrupt type with no interrupts.
+ */
+ if (irqInfo.count == 0)
+ {
+ LogRel(("VFIO: Trying to activate IRQ type %u, but no IRQs of that type are configured\n", toUnderlying(irqType)));
+ return VERR_NOT_AVAILABLE;
+ }
+
+ /**
+ * Sanity check: If we request a larger number of interrutps, the VFIO device is able to support we bail out here.
+ */
+ if (irqInfo.count < irqCount)
+ {
+ LogRel(("VFIO: Trying to register %lu irqs, but %lu are supported for type %u.\n", irqCount, irqInfo.count, toUnderlying(irqType)));
+ return VERR_NOT_SUPPORTED;
+ }
+
+ AssertLogRelReturn(irqInfo.flags & VFIO_IRQ_INFO_EVENTFD, VERR_NOT_AVAILABLE);
+
+ const auto setSize {sizeof(vfio_irq_set) + sizeof(int) * irqCount};
+ std::vector<uint8_t> buf(setSize);
+ vfio_irq_set& irqSet {*reinterpret_cast<vfio_irq_set*>(buf.data())};
+
+ irqSet.argsz = setSize;
+ irqSet.flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irqSet.index = irqInfo.index;
+ irqSet.start = 0;
+ irqSet.count = irqCount;
+
+ /*
+ * Logging and sanity checking only.
+ */
+ switch (irqType)
+ {
+ case IrqType::VFIO_INTX:
+ AssertLogRelMsgReturn(irqInfo.count == 1,
+ ("VFIO: Only a single INTX is supported! Detected Count: %u\n", irqInfo.count),
+ VERR_NOT_IMPLEMENTED);
+ LogRel(("VFIO: Activate INTX\n"));
+ break;
+ case IrqType::VFIO_MSI:
+ LogRel(("VFIO: Activate MSI count: %u\n", irqCount));
+ break;
+ case IrqType::VFIO_MSIX:
+ LogRel(("VFIO: Activate MSIX: count %u\n", irqCount));
+ break;
+ default:
+ AssertLogRelMsgFailedReturn(("VFIO: Found unsupported vfio IRQ type: %u, count: %u\n", irqInfo.index, irqInfo.count), VERR_NOT_IMPLEMENTED);
+ }
+
+ activeInterruptType = irqType;
+
+ for (uint32_t i {0ul}; i < irqCount; ++i)
+ {
+ int eventFd {eventfd(0, 0)};
+
+ AssertLogRelMsgReturn(eventFd > 0,("VFIO: could not request additional eventfds\n"), VERR_ACCESS_DENIED);
+ aIrqInformation.push_back({eventFd, i});
+ }
+
+ for (auto i {0ul}; i < aIrqInformation.size(); ++i)
+ {
+ reinterpret_cast<int*>(irqSet.data)[i] = aIrqInformation[i].fd;
+ }
+
+ rc = deviceControl(pDevIns, VFIO_DEVICE_SET_IRQS, "VFIO: Could not set irq info\n", &irqSet);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ return rc;
+}
+
+int VfioDevice::disableInterrupts(PPDMDEVINS pDevIns)
+{
+ LockGuard _ {irqDisable};
+
+ if (aIrqInformation.size() != 0 and activeInterruptType != IrqType::VFIO_NONE)
+ {
+ const auto setSize {sizeof(vfio_irq_set)};
+ std::vector<uint8_t> buf(setSize);
+ vfio_irq_set& irqSet {*reinterpret_cast<vfio_irq_set*>(buf.data())};
+
+ irqSet.argsz = setSize;
+ irqSet.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irqSet.index = toUnderlying(activeInterruptType);
+ irqSet.start = 0;
+ irqSet.count = 0;
+
+ int rc {deviceControl(pDevIns, VFIO_DEVICE_SET_IRQS, "VFIO: Could not set irq info for deactivation\n", &irqSet)};
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ for(auto info: aIrqInformation)
+ {
+ close(info.fd);
+ }
+
+ aIrqInformation.clear();
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+int VfioDevice::injectMsi(PPDMDEVINS pDevIns, InterruptInformation& irqInfo)
+{
+
+ AssertLogRelMsgReturn(msiCapabilityIterator, ("VFIO: Pending MSI, but the capability is not provided \n"), VERR_NOT_SUPPORTED);
+ MSICapabilityDescriptor cap(*msiCapabilityIterator);
+
+ AssertLogRelMsgReturn(cap.enabled(), ("VFIO: Pending MSI, but the capability is disabled \n"), VERR_NOT_SUPPORTED);
+
+ if (not cap.isMasked(irqInfo.index))
+ {
+ PDMDevHlpPCISetIrqNoWait(pDevIns, 0, PDM_IRQ_LEVEL_HIGH);
+ }
+
+ return VINF_SUCCESS;
+}
+
+int VfioDevice::injectMsix(PPDMDEVINS pDevIns, InterruptInformation& irqInfo)
+{
+ AssertLogRelMsgReturn(msixCapabilityIterator, ("VFIO: Pending MSIX, but the capability is not provided \n"), VERR_NOT_SUPPORTED);
+ MSIXCapabilityDescriptor cap(*msixCapabilityIterator);
+
+ AssertLogRelMsgReturn(cap.enabled(), ("VFIO: Pending MSIX, but the capability is disabled \n"), VERR_NOT_SUPPORTED);
+
+ PDMDevHlpPCISetIrqNoWait(pDevIns, irqInfo.index, PDM_IRQ_LEVEL_HIGH);
+ return VINF_SUCCESS;
+}
+
+int VfioDevice::configSpaceWriteHandler(PPDMDEVINS pDevIns, uint32_t uAddress, unsigned cb, uint32_t u32Value)
+{
+ int rc {VINF_SUCCESS};
+
+ if (uAddress == VBOX_PCI_COMMAND)
+ {
+ tryHandleBarInterception(pDevIns, u32Value);
+ }
+ else if (msiCapabilityIterator and (uAddress >= msiCapabilityIterator->getOffset() and uAddress < (msiCapabilityIterator->getOffset() + sizeof(MSICapabilityDescriptor))))
+ {
+ MSICapabilityDescriptor lastCap {*msiCapabilityIterator};
+
+ MSICapabilityDescriptor updatedCap {*msiCapabilityIterator};
+ std::memcpy(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(&updatedCap) + (uAddress - msiCapabilityIterator->getOffset())), &u32Value, cb);
+
+ if (not updatedCap.enabled() and lastCap.enabled())
+ {
+ rc = disableInterrupts(pDevIns);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ rc = activateInterrupts(pDevIns, IrqType::VFIO_INTX);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ }
+ else if (updatedCap.enabled())
+ {
+ rc = disableInterrupts(pDevIns);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ rc = activateInterrupts(pDevIns, IrqType::VFIO_MSI, updatedCap.count());
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ }
+ }
+ else if (msixCapabilityIterator and (uAddress >= msixCapabilityIterator->getOffset() and uAddress < (msixCapabilityIterator->getOffset() + sizeof(MSIXCapabilityDescriptor))))
+ {
+ MSIXCapabilityDescriptor lastCap {*msixCapabilityIterator};
+
+ MSIXCapabilityDescriptor updatedCap {lastCap};
+ std::memcpy(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(&updatedCap) + (uAddress - msixCapabilityIterator->getOffset())), &u32Value, cb);
+
+ if (not updatedCap.enabled() and lastCap.enabled())
+ {
+ rc = disableInterrupts(pDevIns);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ rc = activateInterrupts(pDevIns, IrqType::VFIO_INTX);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ }
+ else if (updatedCap.enabled())
+ {
+ rc = disableInterrupts(pDevIns);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ rc = activateInterrupts(pDevIns, IrqType::VFIO_MSIX, updatedCap.tableSize());
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ }
+ }
+
+ return writeConfigSpace(u32Value, cb, uAddress);
+}
+
+int VfioDevice::mmioAccessHandler(PPDMDEVINS /*pDevIns*/, PCIBarRegion& barRegion, RTGCPHYS barOffset, void* pv, unsigned cb, bool writeAccess)
+{
+ if (msixCapabilityIterator)
+ {
+ MSIXCapabilityDescriptor cap {*msixCapabilityIterator};
+
+ if (cap.getBarIndex() == barRegion.iRegion and barOffset >= cap.getTableOffset()
+ and barOffset < cap.getTableOffset() + (sizeof(MSIXTableEntry) * cap.tableSize()))
+ {
+ AssertLogRelMsgReturn(cap.tableSize() == aMsixTable.size(),
+ ("VFIO: The MSIX table size mismatches the hardware table size. Assumed table size: %hu Hardware table size: %hu\n",
+ aMsixTable.size(),
+ cap.tableSize()),
+ VERR_NOT_SUPPORTED);
+ uint64_t msixTableEntryOffset {barOffset - cap.getTableOffset()};
+ void* shadowMsixTableOffset {reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(aMsixTable.data()) + msixTableEntryOffset)};
+ if (writeAccess)
+ {
+ /*
+ * We need to shadow the MSIX table as explained in the else path, but we need to provide the VFIO device
+ * with data written to the VFIO device.
+ * Because of this we need to write the data through.
+ */
+ std::memcpy(shadowMsixTableOffset, pv, cb);
+ }
+ else
+ {
+ std::memcpy(pv, shadowMsixTableOffset, cb);
+ /**
+ * The VFIO Device returns invalid data in case of a read from the MSIX table.
+ * Because of this, we need to shadow the table and return early without reading
+ * from the actual VFIO device here.
+ */
+ return VINF_SUCCESS;
+ }
+ }
+ }
+
+ if (writeAccess)
+ {
+ return writeToDevice(pv, cb, barRegion.offset + barOffset);
+ }
+ else
+ {
+ return readFromDevice(pv, cb, barRegion.offset + barOffset);
+ }
+
+}
+
+int VfioDevice::interceptConfigSpaceAccesses(PPDMDEVINS pDevIns)
+{
+ int rc {VINF_SUCCESS};
+
+ auto configSpaceWrite = [](PPDMDEVINS pDev, PPDMPCIDEV pPciDev_, uint32_t uAddress, unsigned cb, uint32_t u32Value) -> VBOXSTRICTRC
+ {
+ PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+ writePciConfigSpaceShadow(pPciDev_, uAddress, cb, u32Value);
+ return pThis->configSpaceWriteHandler(pDev, uAddress, cb, u32Value);
+ };
+
+ rc = PDMDevHlpPCIInterceptConfigAccesses(pDevIns, pPciDev, vfioConfigSpaceRead, configSpaceWrite);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ return rc;
+}
+
+int VfioDevice::init(PPDMDEVINS pDevIns, std::filesystem::path sysfsPath)
+{
+ int rc {VINF_SUCCESS};
+
+ rc = initializeVfio(pDevIns, sysfsPath);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ rc = initializePci(pDevIns);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ vfio_group_status groupStatus;
+ groupStatus.argsz = sizeof(groupStatus);
+
+ rc = vfioControl(pDevIns, vfioGroupFd, VFIO_GROUP_GET_STATUS, "VFIO: Unable to retrieve VFIO group status\n" , &groupStatus);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ vfio_device_info deviceInfo;
+ deviceInfo.argsz = sizeof(deviceInfo);
+
+ rc = deviceControl(pDevIns, VFIO_DEVICE_GET_INFO, "VFIO: Unable to retrieve VFIO Device information\n", &deviceInfo);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ LogRel(("VFIO: Successfully opened VFIO Device: Group Status Flags: %#x Device Flags: %#x, Num BARs: %u, Num IRQ's %u \n",
+ groupStatus.flags, deviceInfo.flags, deviceInfo.num_regions, deviceInfo.num_irqs));
+
+ rc = initializeMemoryRegions(pDevIns, deviceInfo);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ rc = initializeInterrupts(pDevIns);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ rc = interceptConfigSpaceAccesses(pDevIns);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ return rc;
+}
+
+int VfioDevice::registerDmaRange(PVM pVM, RTGCPHYS startGCPhys, RTGCPHYS endGCPhys)
+{
+ AssertLogRelReturn(RT_VALID_ALIGNED_PTR(startGCPhys, PAGE_SIZE) || startGCPhys == 0, VERR_INVALID_POINTER);
+ AssertLogRelReturn(RT_VALID_ALIGNED_PTR(endGCPhys + 1 , PAGE_SIZE), VERR_INVALID_POINTER);
+
+ auto registerDma = [](uintptr_t hva, RTGCPHYS gpa, uint64_t size, int containerFd) -> int
+ {
+ struct vfio_iommu_type1_dma_map dma;
+ dma.argsz = sizeof(dma);
+ dma.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+ dma.vaddr = hva;
+ dma.iova = gpa;
+ dma.size = static_cast<uint64_t>(size);
+ int rc {ioctl(containerFd, VFIO_IOMMU_MAP_DMA, &dma)};
+ AssertLogRelMsgReturn(rc == 0, ("VFIO: Could not acquire enough memory to map the Guest Physical address space. Adapt your ulimit\n"), VERR_NO_MEMORY);
+
+ return VINF_SUCCESS;
+ };
+
+ uintptr_t continousPagesStart {0};
+ RTGCPHYS continousPagesStartGCPhys {0};
+ uintptr_t continousPagesLast {0};
+ uint64_t continousRangeSize {0};
+
+ auto reset = [&]()
+ {
+ continousRangeSize = 0;
+ continousPagesStart = 0;
+ continousPagesLast = 0;
+ continousPagesStartGCPhys = 0;
+ };
+
+ for (RTGCPHYS pageAddress {startGCPhys}; pageAddress < endGCPhys; pageAddress += PAGE_SIZE)
+ {
+ void* ptr;
+ if (RT_SUCCESS(PGMR3PhysTlbGCPhys2Ptr(pVM, pageAddress, true, &ptr)))
+ {
+ uintptr_t hcVirt(reinterpret_cast<uintptr_t>(ptr));
+ if (continousRangeSize > 0 and continousPagesLast + PAGE_SIZE == hcVirt)
+ {
+ continousPagesLast = hcVirt;
+ continousRangeSize += PAGE_SIZE;
+ continue;
+ }
+ else if (continousRangeSize != 0)
+ {
+ int rc {registerDma(continousPagesStart, continousPagesStartGCPhys, continousRangeSize, vfioContainerFd)};
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ }
+ continousPagesStart = hcVirt;
+ continousPagesLast = hcVirt;
+ continousPagesStartGCPhys = pageAddress;
+ continousRangeSize = PAGE_SIZE;
+ }
+ else if (continousRangeSize != 0)
+ {
+ int rc {registerDma(continousPagesStart, continousPagesStartGCPhys, continousRangeSize, vfioContainerFd)};
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ reset();
+ }
+ }
+
+ if (continousRangeSize != 0)
+ {
+ int rc {registerDma(continousPagesStart, continousPagesStartGCPhys, continousRangeSize, vfioContainerFd)};
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ reset();
+ }
+
+ return VINF_SUCCESS;
+}
+
+int VfioDevice::initializeDma(PPDMDEVINS pDevIns)
+{
+ auto pVM {PDMDevHlpGetVM(pDevIns)};
+ uint32_t ramRangeCount {PGMR3PhysGetRamRangeCount(pVM)};
+
+ for (uint32_t i {0u}; i < ramRangeCount; ++i)
+ {
+ RTGCPHYS start, end;
+ bool isMMioRange;
+ if (RT_SUCCESS(PGMR3PhysGetRange(pVM, i, &start, &end, nullptr, &isMMioRange)) and not isMMioRange)
+ {
+ int rc {registerDmaRange(pVM, start, end)};
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ }
+
+ }
+
+ return VINF_SUCCESS;
+}
+
+int VfioDevice::terminate(PPDMDEVINS pDevIns)
+{
+ int rc {VINF_SUCCESS};
+
+ rc = disableInterrupts(pDevIns);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ exit = true;
+ rc = RTThreadWaitNoResume(hIrqDeliveryThread, RT_INDEFINITE_WAIT, nullptr);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ exit = false;
+
+ aMsixTable.clear();
+ msiCapabilityIterator = std::nullopt;
+ msixCapabilityIterator = std::nullopt;
+
+ rc = close(vfioDeviceFd);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ rc = close(vfioGroupFd);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+ rc = close(vfioContainerFd);
+ AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+ vfioDeviceFd = -1;
+ vfioGroupFd = -1;
+ vfioContainerFd = -1;
+
+
+ return rc;
+}
+
+void VfioDevice::tryHandleBarInterception(PPDMDEVINS pDevIns, uint32_t pciConfigCommandValue)
+{
+ if (pciConfigCommandValue & VBOX_PCI_COMMAND_IO and not pciConfigIODecodingEnabled)
+ {
+ pciConfigIODecodingEnabled = true;
+ }
+ else
+ {
+ pciConfigIODecodingEnabled = false;
+ }
+
+ if (pciConfigCommandValue & VBOX_PCI_COMMAND_MEMORY and not pciConfigMemoryDecodingEnabled)
+ {
+ pciConfigMemoryDecodingEnabled = true;
+ }
+ else
+ {
+ pciConfigMemoryDecodingEnabled = false;
+ }
+
+ if (pciConfigIODecodingEnabled or pciConfigMemoryDecodingEnabled)
+ {
+ for (auto i {0u}; i < VBOX_PCI_MAX_BARS; ++i)
+ {
+ const auto barInfo {getBarInfo(i)};
+ if (not (pciBars[i].hRegion == NIL_IOMMMIOHANDLE or pciBars[i].hRegion == 0))
+ {
+ if (barInfo.isIoBar() and pciConfigIODecodingEnabled)
+ {
+ registerPCIBar<0>(PDMDevHlpIoPortMap, PDMDevHlpIoPortUnmap, pDevIns, pciBars[i], barInfo.getBarAddress());
+ }
+ else if (barInfo.isMmioBar() and pciConfigMemoryDecodingEnabled)
+ {
+ registerPCIBar<NIL_RTGCPHYS>(PDMDevHlpMmioMap, PDMDevHlpMmioUnmap, pDevIns, pciBars[i], barInfo.getBarAddress());
+ }
+ }
+ }
+ }
+}
+
+const PCIBar VfioDevice::getBarInfo(unsigned barNumber)
+{
+ uint64_t barOffset { VBOX_PCI_BASE_ADDRESS_0 + barNumber * sizeof(uint32_t)};
+ uint64_t barValue;
+
+ readConfigSpace(barValue, sizeof(barValue), barOffset);
+
+ PCIBar bar {barValue};
+
+ if (bar.is64BitBar()) {
+ return bar;
+ }
+
+ return {barValue & std::numeric_limits<uint32_t>::max()};
+}
diff --git a/src/VBox/Devices/Makefile.kmk b/src/VBox/Devices/Makefile.kmk
index e31080d7af..a7c089caa2 100644
--- a/src/VBox/Devices/Makefile.kmk
+++ b/src/VBox/Devices/Makefile.kmk
@@ -190,6 +190,8 @@ if !defined(VBOX_ONLY_EXTPACKS) && "$(intersects $(KBUILD_TARGET_ARCH),$(VBOX_SU
Input/UsbMouse.cpp \
Bus/DevPCI.cpp \
Bus/DevPciIch9.cpp \
+ $(if-expr defined(VBOX_WITH_KVM), Bus/DevVfio.cpp,) \
+ $(if-expr defined(VBOX_WITH_KVM), Bus/VfioDevice.cpp,) \
Bus/MsiCommon.cpp \
Bus/MsixCommon.cpp \
$(if $(VBOX_WITH_IOMMU_AMD),Bus/DevIommuAmd.cpp,) \
@@ -256,6 +258,10 @@ if !defined(VBOX_ONLY_EXTPACKS) && "$(intersects $(KBUILD_TARGET_ARCH),$(VBOX_SU
VBoxDD_SOURCES += Storage/DrvHostFloppy.cpp
endif
+ # VFIO
+ VBoxDD_LIBS.linux += $(LIB_VMM)
+ Bus/DevVfio.cpp_CXXFLAGS.linux += $(CYBERUS_CXX_FLAGS)
+ Bus/VfioDevice.cpp_CXXFLAGS.linux += $(CYBERUS_CXX_FLAGS)
ifn1of ($(KBUILD_TARGET), darwin)
VBoxDD_SOURCES += Storage/HBDMgmt-generic.cpp
diff --git a/src/VBox/Devices/build/VBoxDD.cpp b/src/VBox/Devices/build/VBoxDD.cpp
index 32a67a08f5..f355b992a6 100644
--- a/src/VBox/Devices/build/VBoxDD.cpp
+++ b/src/VBox/Devices/build/VBoxDD.cpp
@@ -218,6 +218,9 @@ extern "C" DECLEXPORT(int) VBoxDevicesRegister(PPDMDEVREGCB pCallbacks, uint32_t
if (RT_FAILURE(rc))
return rc;
#endif
+ rc = pCallbacks->pfnRegister(pCallbacks, &g_DeviceVfioDev);
+ if (RT_FAILURE(rc))
+ return rc;
rc = pCallbacks->pfnRegister(pCallbacks, &g_DeviceGIMDev);
if (RT_FAILURE(rc))
return rc;
diff --git a/src/VBox/Devices/build/VBoxDD.h b/src/VBox/Devices/build/VBoxDD.h
index 557d071213..ef70e457af 100644
--- a/src/VBox/Devices/build/VBoxDD.h
+++ b/src/VBox/Devices/build/VBoxDD.h
@@ -107,6 +107,7 @@ extern const PDMDEVREG g_DeviceEFI;
#ifdef VBOX_WITH_PCI_PASSTHROUGH_IMPL
extern const PDMDEVREG g_DevicePciRaw;
#endif
+extern const PDMDEVREG g_DeviceVfioDev;
extern const PDMDEVREG g_DeviceGIMDev;
extern const PDMDEVREG g_DeviceLPC;
#ifdef VBOX_WITH_VIRTUALKD
diff --git a/src/VBox/Frontends/VBoxManage/VBoxManageInfo.cpp b/src/VBox/Frontends/VBoxManage/VBoxManageInfo.cpp
index e229f4119f..64bf2b1df7 100644
--- a/src/VBox/Frontends/VBoxManage/VBoxManageInfo.cpp
+++ b/src/VBox/Frontends/VBoxManage/VBoxManageInfo.cpp
@@ -2748,6 +2748,30 @@ HRESULT showVMInfo(ComPtr<IVirtualBox> pVirtualBox,
/* Host PCI passthrough devices */
#endif
+ SafeArray<BSTR> vfioDevices;
+ hrc = machine->COMGETTER(VFIODeviceAssignments)(ComSafeArrayAsOutParam(vfioDevices));
+ if (SUCCEEDED(hrc))
+ {
+ if (vfioDevices.size() > 0 && (details != VMINFO_MACHINEREADABLE))
+ {
+ RTPrintf("\n Attached VFIO Devices: \n\n");
+ }
+
+ for (size_t i {0}; i < vfioDevices.size(); ++i)
+ {
+ Utf8Str devicePath {vfioDevices[i]};
+
+ if (details == VMINFO_MACHINEREADABLE)
+ {
+ RTPrintf("AttachedVFIO%d=%s\n", i, devicePath.c_str());
+ }
+ else
+ {
+ RTPrintf(" VFIO Device %s is attached\n", devicePath.c_str());
+ }
+ }
+ }
+
/*
* Bandwidth groups
*/
diff --git a/src/VBox/Frontends/VBoxManage/VBoxManageModifyVM.cpp b/src/VBox/Frontends/VBoxManage/VBoxManageModifyVM.cpp
index c433fc12d2..6c80d40e06 100644
--- a/src/VBox/Frontends/VBoxManage/VBoxManageModifyVM.cpp
+++ b/src/VBox/Frontends/VBoxManage/VBoxManageModifyVM.cpp
@@ -214,6 +214,8 @@ enum
MODIFYVM_ATTACH_PCI,
MODIFYVM_DETACH_PCI,
#endif
+ MODIFYVM_ATTACH_VFIO,
+ MODIFYVM_DETACH_VFIO,
#ifdef VBOX_WITH_USB_CARDREADER
MODIFYVM_USBCARDREADER,
#endif
@@ -466,6 +468,8 @@ static const RTGETOPTDEF g_aModifyVMOptions[] =
OPT2("--pci-attach", "--pciattach", MODIFYVM_ATTACH_PCI, RTGETOPT_REQ_STRING),
OPT2("--pci-detach", "--pcidetach", MODIFYVM_DETACH_PCI, RTGETOPT_REQ_STRING),
#endif
+ { "--attachvfio", MODIFYVM_ATTACH_VFIO, RTGETOPT_REQ_STRING },
+ { "--detachvfio", MODIFYVM_DETACH_VFIO, RTGETOPT_REQ_STRING },
#ifdef VBOX_WITH_USB_CARDREADER
OPT2("--usb-card-reader", "--usbcardreader", MODIFYVM_USBCARDREADER, RTGETOPT_REQ_BOOL_ONOFF),
#endif
@@ -3593,6 +3597,17 @@ RTEXITCODE handleModifyVM(HandlerArg *a)
break;
}
#endif
+ case MODIFYVM_ATTACH_VFIO:
+ {
+ CHECK_ERROR(sessionMachine, AttachVFIODevice(Bstr(ValueUnion.psz).raw()));
+ break;
+ }
+
+ case MODIFYVM_DETACH_VFIO:
+ {
+ CHECK_ERROR(sessionMachine, DetachVFIODevice(Bstr(ValueUnion.psz).raw()));
+ break;
+ }
#ifdef VBOX_WITH_USB_CARDREADER
case MODIFYVM_USBCARDREADER:
diff --git a/src/VBox/Main/idl/VirtualBox.xidl b/src/VBox/Main/idl/VirtualBox.xidl
index b4472bb330..ec1e4974c7 100644
--- a/src/VBox/Main/idl/VirtualBox.xidl
+++ b/src/VBox/Main/idl/VirtualBox.xidl
@@ -8055,6 +8055,12 @@
</desc>
</attribute>
+ <attribute name="VFIODeviceAssignments" type="wstring" readonly="yes" safearray="yes">
+ <desc>
+ Array of VFIO Device paths, assigned to this machine.
+ </desc>
+ </attribute>
+
<attribute name="bandwidthControl" type="IBandwidthControl" readonly="yes">
<desc>
Bandwidth control manager.
@@ -9207,6 +9213,24 @@
</param>
</method>
+ <method name="attachVFIODevice">
+ <desc>
+ Attach a host VFIO device from the virtual machine.
+ </desc>
+ <param name="hostFileName" type="wstring" dir="in">
+ <desc> Absolute path to the device file in sysfs.</desc>
+ </param>
+ </method>
+
+ <method name="detachVFIODevice">
+ <desc>
+ Detach a host VFIO device from the virtual machine.
+ </desc>
+ <param name="hostFileName" type="wstring" dir="in">
+ <desc> Absolute path to the device file in sysfs.</desc>
+ </param>
+ </method>
+
<method name="getNetworkAdapter" const="yes">
<rest request="get" path="/vms/{vmid}/configuration/"/>
<desc>
diff --git a/src/VBox/Main/include/ConsoleImpl.h b/src/VBox/Main/include/ConsoleImpl.h
index 366f1b39ad..1d3e8155d6 100644
--- a/src/VBox/Main/include/ConsoleImpl.h
+++ b/src/VBox/Main/include/ConsoleImpl.h
@@ -847,6 +847,7 @@ private:
bool fForce);
HRESULT i_attachRawPCIDevices(PUVM pUVM, BusAssignmentManager *BusMgr, PCFGMNODE pDevices);
+ HRESULT i_attachVfioDevices(BusAssignmentManager *BusMgr, PCFGMNODE pDevices, PCVMMR3VTABLE pVMM);
struct LEDSET;
typedef struct LEDSET *PLEDSET;
PPDMLED volatile *i_getLedSet(uint32_t iLedSet);
diff --git a/src/VBox/Main/include/MachineImpl.h b/src/VBox/Main/include/MachineImpl.h
index dc11e96d59..3b66f995c5 100644
--- a/src/VBox/Main/include/MachineImpl.h
+++ b/src/VBox/Main/include/MachineImpl.h
@@ -339,6 +339,8 @@ public:
typedef std::list<ComObjPtr<PCIDeviceAttachment> > PCIDeviceAssignmentList;
PCIDeviceAssignmentList mPCIDeviceAssignments;
+ std::vector<Utf8Str> mVFIODeviceAssignments;
+
settings::Debugging mDebugging;
settings::Autostart mAutostart;
@@ -1011,6 +1013,7 @@ private:
HRESULT getIOCacheSize(ULONG *aIOCacheSize);
HRESULT setIOCacheSize(ULONG aIOCacheSize);
HRESULT getPCIDeviceAssignments(std::vector<ComPtr<IPCIDeviceAttachment> > &aPCIDeviceAssignments);
+ HRESULT getVFIODeviceAssignments(std::vector<com::Utf8Str> &aVFIODeviceAssignments);
HRESULT getBandwidthControl(ComPtr<IBandwidthControl> &aBandwidthControl);
HRESULT getTracingEnabled(BOOL *aTracingEnabled);
HRESULT setTracingEnabled(BOOL aTracingEnabled);
@@ -1110,6 +1113,8 @@ private:
LONG aDesiredGuestAddress,
BOOL aTryToUnbind);
HRESULT detachHostPCIDevice(LONG aHostAddress);
+ HRESULT attachVFIODevice(const com::Utf8Str &aDevicePath);
+ HRESULT detachVFIODevice(const com::Utf8Str &aDevicePath);
HRESULT getNetworkAdapter(ULONG aSlot,
ComPtr<INetworkAdapter> &aAdapter);
HRESULT addStorageController(const com::Utf8Str &aName,
diff --git a/src/VBox/Main/src-client/BusAssignmentManager.cpp b/src/VBox/Main/src-client/BusAssignmentManager.cpp
index 9f87323810..dbdffead42 100644
--- a/src/VBox/Main/src-client/BusAssignmentManager.cpp
+++ b/src/VBox/Main/src-client/BusAssignmentManager.cpp
@@ -109,17 +109,18 @@ static const DeviceAssignmentRule g_aGenericRules[] =
#endif
/* Network controllers */
- /* the first network card gets the PCI ID 3, the next 3 gets 8..10,
- * next 4 get 16..19. In "VMWare compatibility" mode the IDs 3 and 17
- * swap places, i.e. the first card goes to ID 17=0x11. */
+ /* the first network card gets the PCI ID 3, the next 3 gets 8..10 */
+
{"nic", 0, 3, 0, 1},
{"nic", 0, 8, 0, 1},
{"nic", 0, 9, 0, 1},
{"nic", 0, 10, 0, 1},
- {"nic", 0, 16, 0, 1},
- {"nic", 0, 17, 0, 1},
- {"nic", 0, 18, 0, 1},
- {"nic", 0, 19, 0, 1},
+
+ /* Vfio Devices */
+ {"vfio", 0, 16, 0, 1},
+ {"vfio", 0, 17, 0, 1},
+ {"vfio", 0, 18, 0, 1},
+ {"vfio", 0, 19, 0, 1},
/* ISA/LPC controller */
{"lpc", 0, 31, 0, 0},
diff --git a/src/VBox/Main/src-client/ConsoleImplConfigCommon.cpp b/src/VBox/Main/src-client/ConsoleImplConfigCommon.cpp
index f0b2ee0f0f..6ebebb28cc 100644
--- a/src/VBox/Main/src-client/ConsoleImplConfigCommon.cpp
+++ b/src/VBox/Main/src-client/ConsoleImplConfigCommon.cpp
@@ -536,7 +536,6 @@ int Console::i_configConstructorInner(PUVM pUVM, PVM pVM, PCVMMR3VTABLE pVMM, Au
return VERR_PLATFORM_ARCH_NOT_SUPPORTED;
}
-
/**
* Configures an audio driver via CFGM by getting (optional) values from extra data.
*
diff --git a/src/VBox/Main/src-server/MachineImpl.cpp b/src/VBox/Main/src-server/MachineImpl.cpp
index 59fc285582..0fb5e25735 100644
--- a/src/VBox/Main/src-server/MachineImpl.cpp
+++ b/src/VBox/Main/src-server/MachineImpl.cpp
@@ -6423,6 +6423,80 @@ HRESULT Machine::getPCIDeviceAssignments(std::vector<ComPtr<IPCIDeviceAttachment
return S_OK;
}
+HRESULT Machine::attachVFIODevice(const com::Utf8Str &aDevicePath)
+{
+ AutoWriteLock alock(this COMMA_LOCKVAL_SRC_POS);
+
+ HRESULT hrc = i_checkStateDependency(MutableStateDep);
+
+ if (not SUCCEEDED(hrc))
+ {
+ return hrc;
+ }
+
+ auto search_fn = [&aDevicePath] (const com::Utf8Str& path)
+ {
+ return aDevicePath == path;
+ };
+
+ auto it {std::find_if(mHWData->mVFIODeviceAssignments.begin(), mHWData->mVFIODeviceAssignments.end(), search_fn)};
+
+ if (it != mHWData->mVFIODeviceAssignments.end())
+ {
+ return setError(E_INVALIDARG, tr("The VFIO device %s is already attached"), aDevicePath);
+ }
+
+ hrc = mHWData.backupEx();
+ if (not SUCCEEDED(hrc)) {
+ return hrc;
+ }
+
+ mHWData->mVFIODeviceAssignments.emplace_back(aDevicePath);
+ return S_OK;
+}
+
+HRESULT Machine::detachVFIODevice(const com::Utf8Str &aDevicePath)
+{
+ AutoWriteLock alock(this COMMA_LOCKVAL_SRC_POS);
+
+ HRESULT hrc = i_checkStateDependency(MutableStateDep);
+
+ if (not SUCCEEDED(hrc))
+ {
+ return hrc;
+ }
+
+ auto search_fn = [&aDevicePath] (const com::Utf8Str& path)
+ {
+ return aDevicePath == path;
+ };
+
+ hrc = mHWData.backupEx();
+
+ if (not SUCCEEDED(hrc)) {
+ return hrc;
+ }
+
+ auto it {std::find_if(mHWData->mVFIODeviceAssignments.begin(), mHWData->mVFIODeviceAssignments.end(), search_fn)};
+
+ if (it == mHWData->mVFIODeviceAssignments.end())
+ {
+ return setError(VBOX_E_OBJECT_NOT_FOUND, tr("No VFIO device %s attached"), aDevicePath);
+ }
+
+ mHWData->mVFIODeviceAssignments.erase(it);
+
+ return S_OK;
+}
+
+HRESULT Machine::getVFIODeviceAssignments(std::vector<com::Utf8Str>& aVFIODeviceAssignments)
+{
+ AutoReadLock alock(this COMMA_LOCKVAL_SRC_POS);
+
+ std::copy(mHWData->mVFIODeviceAssignments.begin(), mHWData->mVFIODeviceAssignments.end(), std::back_inserter(aVFIODeviceAssignments));
+ return S_OK;
+}
+
HRESULT Machine::getBandwidthControl(ComPtr<IBandwidthControl> &aBandwidthControl)
{
mBandwidthControl.queryInterfaceTo(aBandwidthControl.asOutParam());
@@ -8853,6 +8927,12 @@ HRESULT Machine::i_loadHardware(const Guid *puuidRegistry,
mHWData->mPCIDeviceAssignments.push_back(pda);
}
+ // VFIO Devices
+ for (auto deviceAssignment : data.vfioAttachments)
+ {
+ mHWData->mVFIODeviceAssignments.push_back(deviceAssignment.strDevicePath);
+ }
+
/*
* (The following isn't really real hardware, but it lives in HWData
* for reasons of convenience.)
@@ -10234,6 +10314,17 @@ HRESULT Machine::i_saveHardware(settings::Hardware &data, settings::Debugging *p
data.pciAttachments.push_back(hpda);
}
+ /* VFIO Devices */
+ data.vfioAttachments.clear();
+ for (auto devStr : mHWData->mVFIODeviceAssignments)
+ {
+ settings::VFIODeviceAttachment vfioda;
+
+ vfioda.strDevicePath = devStr;
+
+ data.vfioAttachments.push_back(vfioda);
+ }
+
// guest properties
data.llGuestProperties.clear();
#ifdef VBOX_WITH_GUEST_PROPS
diff --git a/src/VBox/Main/xml/Settings.cpp b/src/VBox/Main/xml/Settings.cpp
index ea46a45ab8..a75cde069c 100644
--- a/src/VBox/Main/xml/Settings.cpp
+++ b/src/VBox/Main/xml/Settings.cpp
@@ -3967,6 +3967,22 @@ bool HostPCIDeviceAttachment::operator==(const HostPCIDeviceAttachment &a) const
&& strDeviceName == a.strDeviceName);
}
+/**
+ * VFIODeviceAttachment Constructor.
+ */
+VFIODeviceAttachment::VFIODeviceAttachment() {}
+
+/**
+ * Comparison operator. This gets called from MachineConfigFile::operator==,
+ * which in turn gets called from Machine::saveSettings to figure out whether
+ * machine settings have really changed and thus need to be written out to disk.
+ */
+bool VFIODeviceAttachment::operator==(const VFIODeviceAttachment &a) const
+{
+ return (this == &a)
+ || (strDevicePath == a.strDevicePath);
+}
+
#ifdef VBOX_WITH_VIRT_ARMV8
PlatformARM::PlatformARM()
{
@@ -4214,6 +4230,7 @@ bool Hardware::operator==(const Hardware& h) const
&& llGuestProperties == h.llGuestProperties
&& ioSettings == h.ioSettings
&& pciAttachments == h.pciAttachments
+ && vfioAttachments == h.vfioAttachments
&& strDefaultFrontend == h.strDefaultFrontend);
}
@@ -6125,6 +6142,26 @@ void MachineConfigFile::readHardware(const xml::ElementNode &elmHardware,
}
}
}
+ else if (pelmHwChild->nameEquals("Vfio"))
+ {
+ const xml::ElementNode *pelmDevices;
+ if ((pelmDevices = pelmHwChild->findChildElement("Devices")))
+ {
+ xml::NodesLoop nl2(*pelmDevices, "Device");
+ const xml::ElementNode *pelmDevice;
+ while ((pelmDevice = nl2.forAllNodes()))
+ {
+ VFIODeviceAttachment vfioda;
+
+ if (!pelmDevice->getAttributeValue("devicePath", vfioda.strDevicePath))
+ {
+ throw ConfigFileError(this, pelmDevice, N_("Missing Device/@devicePath attribute"));
+ }
+
+ hw.vfioAttachments.push_back(vfioda);
+ }
+ }
+ }
else if (pelmHwChild->nameEquals("EmulatedUSB"))
{
const xml::ElementNode *pelmCardReader;
@@ -8438,6 +8475,20 @@ void MachineConfigFile::buildHardwareXML(xml::ElementNode &elmParent,
}
}
+ if ( m->sv >= SettingsVersion_v1_17
+ && hw.vfioAttachments.size())
+ {
+ xml::ElementNode *pelmVFIO = pelmHardware->createChild("Vfio");
+ xml::ElementNode *pelmVFIODevices = pelmVFIO->createChild("Devices");
+
+ for (auto deviceAssignment : hw.vfioAttachments)
+ {
+ xml::ElementNode *pelmThis = pelmVFIODevices->createChild("Device");
+
+ pelmThis->setAttribute("devicePath", deviceAssignment.strDevicePath);
+ }
+ }
+
if ( m->sv >= SettingsVersion_v1_12
&& hw.fEmulatedUSBCardReader)
{
@@ -9729,6 +9780,12 @@ void MachineConfigFile::bumpSettingsVersionIfNeeded()
return;
}
}
+
+ if (hardwareMachine.vfioAttachments.size() > 0)
+ {
+ m->sv = SettingsVersion_v1_17;
+ return;
+ }
}
if (m->sv < SettingsVersion_v1_16)
diff --git a/src/VBox/Runtime/VBox/log-vbox.cpp b/src/VBox/Runtime/VBox/log-vbox.cpp
index c1edf5a81b..522fd4c0cd 100644
--- a/src/VBox/Runtime/VBox/log-vbox.cpp
+++ b/src/VBox/Runtime/VBox/log-vbox.cpp
@@ -272,6 +272,7 @@ RTDECL(PRTLOGGER) RTLogDefaultInit(void)
ASSERT_LOG_GROUP(DEV_SB16);
ASSERT_LOG_GROUP(DEV_SERIAL);
ASSERT_LOG_GROUP(DEV_SMC);
+ ASSERT_LOG_GROUP(DEV_VFIO);
ASSERT_LOG_GROUP(DEV_VGA);
ASSERT_LOG_GROUP(DEV_VIRTIO);
ASSERT_LOG_GROUP(DEV_VIRTIO_NET);
--
2.45.0