File 0002-pci-passthrough-add-VFIO-implementation.patch of Package virtualbox

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Martin Messer <martin.messer@cyberus-technology.de>
Date: Tue, 5 Oct 2021 13:57:43 +0000
Subject: [PATCH] pci-passthrough: add VFIO implementation

The VFIO device is added an can be configured via VBoxManage to
pass-through arbitrary PCI devices from the host system. The user is
responsible to prepare the host PCI device so it can be used by the VFIO
framework.

vfio: fix VBoxManage vfio controls

license: vfio
---
 include/VBox/log.h                            |   3 +
 include/VBox/pci.h                            |   2 +
 include/VBox/settings.h                       |  18 +
 include/svp/pci.h                             | 444 +++++++++
 src/VBox/Devices/Bus/DevVfio.cpp              | 154 +++
 src/VBox/Devices/Bus/DevVfio.h                | 412 ++++++++
 src/VBox/Devices/Bus/VfioDevice.cpp           | 912 ++++++++++++++++++
 src/VBox/Devices/Makefile.kmk                 |   6 +
 src/VBox/Devices/build/VBoxDD.cpp             |   3 +
 src/VBox/Devices/build/VBoxDD.h               |   1 +
 .../Frontends/VBoxManage/VBoxManageInfo.cpp   |  24 +
 .../VBoxManage/VBoxManageModifyVM.cpp         |  15 +
 src/VBox/Main/idl/VirtualBox.xidl             |  24 +
 src/VBox/Main/include/ConsoleImpl.h           |   1 +
 src/VBox/Main/include/MachineImpl.h           |   5 +
 .../Main/src-client/BusAssignmentManager.cpp  |  15 +-
 .../src-client/ConsoleImplConfigCommon.cpp    |   1 -
 src/VBox/Main/src-server/MachineImpl.cpp      |  91 ++
 src/VBox/Main/xml/Settings.cpp                |  57 ++
 src/VBox/Runtime/VBox/log-vbox.cpp            |   1 +
 20 files changed, 2181 insertions(+), 8 deletions(-)
 create mode 100644 vboxsrc/include/svp/pci.h
 create mode 100644 vboxsrc/src/VBox/Devices/Bus/DevVfio.cpp
 create mode 100644 vboxsrc/src/VBox/Devices/Bus/DevVfio.h
 create mode 100644 vboxsrc/src/VBox/Devices/Bus/VfioDevice.cpp

diff --git a/include/VBox/log.h b/include/VBox/log.h
index 5a4193dbcc..7d434c208b 100644
--- a/include/VBox/log.h
+++ b/include/VBox/log.h
@@ -182,6 +182,8 @@ typedef enum VBOXLOGGROUP
     LOG_GROUP_DEV_SMC,
     /** Trusted Platform Module Device group. */
     LOG_GROUP_DEV_TPM,
+    /** Vfio Device group. */
+    LOG_GROUP_DEV_VFIO,
     /** VGA Device group. */
     LOG_GROUP_DEV_VGA,
     /** Virtio PCI Device group. */
@@ -930,6 +932,7 @@ typedef enum VBOXLOGGROUP
     "DEV_SERIAL", \
     "DEV_SMC", \
     "DEV_TPM", \
+    "DEV_VFIO", \
     "DEV_VGA", \
     "DEV_VIRTIO", \
     "DEV_VIRTIO_NET", \
diff --git a/include/VBox/pci.h b/include/VBox/pci.h
index 7a51a32e94..7d6dd54e81 100644
--- a/include/VBox/pci.h
+++ b/include/VBox/pci.h
@@ -631,6 +631,8 @@ typedef enum PCIADDRTYPE
 #define VBOX_PCI_ROM_SLOT    6
 /** Max number of I/O regions. */
 #define VBOX_PCI_NUM_REGIONS 7
+/** Max Number of PCI BARs */
+#define VBOX_PCI_MAX_BARS 6
 
 #define PCI_ROM_SLOT         VBOX_PCI_ROM_SLOT    /**< deprecated */
 #define PCI_NUM_REGIONS      VBOX_PCI_NUM_REGIONS /**< deprecated */
diff --git a/include/VBox/settings.h b/include/VBox/settings.h
index cd6cbb9d04..e91d03c66b 100644
--- a/include/VBox/settings.h
+++ b/include/VBox/settings.h
@@ -1126,6 +1126,22 @@ struct HostPCIDeviceAttachment
 
 typedef std::list<HostPCIDeviceAttachment> HostPCIDeviceAttachmentList;
 
+/**
+ * NOTE: If you add any fields in here, you must update a) the constructor and b)
+ * the operator== which is used by MachineConfigFile::operator==(), or otherwise
+ * your settings might never get saved.
+ */
+struct VFIODeviceAttachment
+{
+    VFIODeviceAttachment();
+
+    bool operator==(const VFIODeviceAttachment &a) const;
+
+    com::Utf8Str strDevicePath;
+};
+
+typedef std::vector<VFIODeviceAttachment> VFIODeviceAttachmentList;
+
 /**
  * A device attached to a storage controller. This can either be a
  * hard disk or a DVD drive or a floppy drive and also specifies
@@ -1390,6 +1406,8 @@ struct Hardware
 
     IOSettings          ioSettings;             // requires settings version 1.10 (VirtualBox 3.2)
     HostPCIDeviceAttachmentList pciAttachments; // requires settings version 1.12 (VirtualBox 4.1)
+    VFIODeviceAttachmentList vfioAttachments;  // requires settings version 1.17 (VirtualBox 6.0)
+
 
     com::Utf8Str        strDefaultFrontend;     // requires settings version 1.14 (VirtualBox 4.3)
 };
diff --git a/include/svp/pci.h b/include/svp/pci.h
new file mode 100644
index 0000000000..9a020b6803
--- /dev/null
+++ b/include/svp/pci.h
@@ -0,0 +1,444 @@
+#pragma once
+
+#include <VBox/pci.h>
+#include <VBox/vmm/pdmdev.h>
+#include <VBox/vmm/pdmpcidev.h>
+
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <optional>
+#include <type_traits>
+
+typedef struct PCIBarRegion
+{
+    static_assert(std::is_same<IOMMMIOHANDLE, IOMIOPORTHANDLE>::value,
+                  "IOMMMIOHANDLE and IOMIOPORTHANDLE have different types now please extend this struct for the "
+                  "support of both!");
+    IOMMMIOHANDLE hRegion;
+    uint8_t iRegion;  ///< The bar index e.G Bar0.
+    uint64_t offset;  ///< The bar offset into the vfio device.
+    uint64_t size;    ///< The size of the bar.
+    RTGCPHYS address; ///< Base address of the bar.
+} PCIBARREGION;
+
+typedef PCIBARREGION* PPCIBARREGION;
+
+class PCIBar
+{
+public:
+    PCIBar() = delete;
+
+    PCIBar(uint64_t value_) : value(value_)
+    {
+        if (not is64BitBar()) {
+            value &= std::numeric_limits<uint32_t>::max();
+        }
+    }
+
+    bool isIoBar() const { return (value & PCI_BAR_TYPE_MASK) == PCI_ADDRESS_SPACE_IO; }
+    bool isMmioBar() const { return (value & PCI_BAR_TYPE_MASK) == PCI_ADDRESS_SPACE_MEM; }
+    bool is64BitBar() const { return (value & PCI_BAR_ADDRESS_MASK) == PCI_ADDRESS_SPACE_BAR64; }
+
+    uint64_t getBarAddress() const
+    {
+        if (isIoBar()) {
+            return value & ~PCI_CFG_IO_FLAGS_MASK;
+        } else if (isMmioBar()) {
+            return value & ~PCI_CFG_MMIO_FLAGS_MASK;
+        }
+
+        return 0;
+    }
+
+private:
+    static constexpr uint64_t PCI_CFG_IO_FLAGS_MASK {0x3};
+    static constexpr uint64_t PCI_CFG_MMIO_FLAGS_MASK {0xf};
+    static constexpr uint64_t PCI_BAR_TYPE_MASK {0x1};
+    static constexpr uint64_t PCI_BAR_ADDRESS_MASK {0x4};
+
+    uint64_t value;
+};
+
+/**
+ * Describes the generic part of a capability descriptor.
+ */
+struct __attribute__((__packed__)) CapabilityDescriptor
+{
+    uint8_t capID {0};
+    uint8_t nextPtr {0};
+};
+static_assert(sizeof(CapabilityDescriptor) == 0x2,
+              "The Capability Descriptor has incorrect size, did you forgot __attribute__ ((__packed__))");
+
+/*
+ * Read a specified type from the pci configuration space.
+ *
+ * \param offset offset into the pci configuration space
+ * \param readFn The function that should be used to read from the pci
+ *         configuration space.
+ *
+ * \return An object of the by template parameter specified type
+ */
+template <typename T>
+T readType(PPDMDEVINS pDevIns, uint32_t offset, PFNPCICONFIGREAD readFn)
+{
+    T t;
+
+    char* ptr {reinterpret_cast<char*>(&t)};
+
+    // TODO: can be optimized to minimize cfg space read accesses as we could read 4 bytes at once
+    for (size_t i = 0; i < sizeof(T); i++) {
+        uint8_t data;
+        readFn(pDevIns, nullptr, offset + i, 1u, reinterpret_cast<uint32_t*>(&data));
+        memcpy(ptr + i, &data, sizeof(data));
+    }
+
+    return t;
+}
+
+/*
+ * The pci configuration space capability list abstraction
+ *
+ * The abstraction makes an easy iteration of capabilities in the pci config space possible
+ * Additionally, a conversion from the basic CapabilityDescriptor to a special capability is possible
+ */
+class CapabilityList
+{
+public:
+    class CapabilityIterator
+    {
+    public:
+        using iterator_category = std::input_iterator_tag;
+        using value_type = CapabilityDescriptor;
+        using difference_type = size_t;
+        using pointer = CapabilityDescriptor*;
+        using reference = CapabilityDescriptor&;
+
+        CapabilityIterator(uint32_t capListPtr, PFNPCICONFIGREAD readFn_, PPDMDEVINS pDevIns_)
+            : offset(capListPtr), pDevIns(pDevIns_), readFn(readFn_)
+        {}
+
+        CapabilityIterator(const CapabilityIterator& o) : offset(o.offset), pDevIns(o.pDevIns), readFn(o.readFn) {}
+
+        value_type operator*() const
+        {
+            assert(offset);
+            return readType<CapabilityDescriptor>(pDevIns, offset, readFn);
+        }
+
+        CapabilityIterator& operator++()
+        {
+            assert(offset);
+            static constexpr uint32_t CAP_PTR_MASK {0x3};
+            auto capDescriptor {readType<CapabilityDescriptor>(pDevIns, offset, readFn)};
+            offset = capDescriptor.nextPtr & (~CAP_PTR_MASK);
+            return *this;
+        }
+
+        bool operator==(const CapabilityIterator& o) const { return offset == o.offset and readFn == o.readFn; }
+
+        bool operator!=(const CapabilityIterator& o) const { return not operator==(o); }
+
+        template <typename T>
+        T getCapability() const
+        {
+            assert(offset);
+            return readType<T>(pDevIns, offset, readFn);
+        }
+
+        uint32_t getOffset() const { return offset; }
+
+    private:
+        uint32_t offset;
+        PPDMDEVINS pDevIns;
+        PFNPCICONFIGREAD readFn;
+    };
+
+    CapabilityList(PFNPCICONFIGREAD readFn_, PPDMDEVINS pDevIns_ = nullptr) : pDevIns(pDevIns_), readFn(readFn_)
+    {
+        if (enabled()) {
+            readFn(pDevIns, nullptr, VBOX_PCI_CAPABILITY_LIST, PCI_CAPABILITY_LIST_PTR_SIZE, &capListPtr);
+        }
+    }
+
+    /**
+     * The function checks if the PCI device has support for capabilities
+     *
+     * \param pciStatus The value of the status register of the pci config space.
+     */
+    bool enabled()
+    {
+        static constexpr uint32_t PCI_STATUS_REGISTER_SIZE {0x2};
+        uint32_t pciStatus {0};
+
+        auto rc {readFn(pDevIns, nullptr, VBOX_PCI_STATUS, PCI_STATUS_REGISTER_SIZE, &pciStatus)};
+
+        return RT_SUCCESS(rc) ? (pciStatus & VBOX_PCI_STATUS_CAP_LIST) : false;
+    }
+
+    CapabilityIterator begin() { return {capListPtr, readFn, pDevIns}; }
+    CapabilityIterator end() { return {0x0, readFn, pDevIns}; }
+
+    std::optional<CapabilityIterator> getCapabilityIterator(uint8_t capId)
+    {
+        if (not enabled()) {
+            return std::nullopt;
+        }
+        auto it {std::find_if(begin(), end(), [capId](CapabilityDescriptor desc) { return desc.capID == capId; })};
+
+        if (it != end()) {
+            return it;
+        }
+
+        return std::nullopt;
+    }
+
+private:
+    static constexpr uint8_t PCI_CAPABILITY_LIST_PTR_SIZE {sizeof(uint8_t)};
+    PPDMDEVINS pDevIns;
+    PFNPCICONFIGREAD readFn;
+    uint32_t capListPtr {0x0};
+};
+
+/**
+ * MSI capability descriptor  based on the PCI Local Bus Specification REV 3.0
+ */
+class __attribute__((__packed__)) MSICapabilityDescriptor : public CapabilityDescriptor
+{
+private:
+    using CapabilityIterator = CapabilityList::CapabilityIterator;
+
+    uint16_t msgControl {0};
+    uint32_t msgAddress {0};
+
+    union __attribute__((__packed__))
+    {
+        uint16_t msgData32Bit;
+        struct
+        {
+            uint32_t msgAddressHigh;
+            uint16_t msgData;
+        } msi64bit;
+        struct
+        {
+            uint16_t msgData;
+            uint16_t reserved;
+            uint32_t maskBits;
+            uint32_t pendingBits;
+        } msiPerVectorMasking;
+        struct
+        {
+            uint32_t msgAddressHigh;
+            uint16_t msgData;
+            uint16_t reserved;
+            uint32_t maskBits;
+            uint32_t pendingBits;
+        } msi64BitPerVectorMasking {0, 0, 0, 0, 0};
+    };
+
+public:
+    MSICapabilityDescriptor() = default;
+    // We possibly read too much data here, if no all features of the MSI subsystem are supported.
+    // We accept this and treat the feature variables that are not activated in msgControl as garbage
+    MSICapabilityDescriptor(const CapabilityIterator& iterator)
+        : MSICapabilityDescriptor(iterator.getCapability<MSICapabilityDescriptor>())
+    {}
+
+    MSICapabilityDescriptor(const MSICapabilityDescriptor& o)
+        : msgControl(o.msgControl), msgAddress(o.msgAddress), msi64BitPerVectorMasking(o.msi64BitPerVectorMasking)
+    {}
+
+    bool enabled() const { return msgControl & VBOX_PCI_MSI_FLAGS_ENABLE; }
+
+    bool isPerVectorMaskable() const { return msgControl & VBOX_PCI_MSI_FLAGS_MASKBIT; }
+
+    bool is64Bit() const { return msgControl & VBOX_PCI_MSI_FLAGS_64BIT; }
+
+    uint8_t maxCount() const
+    {
+        static constexpr uint8_t PCI_MSI_FLAGS_QMASK_SHIFT {1u};
+        return 1 << ((msgControl & VBOX_PCI_MSI_FLAGS_QMASK) >> PCI_MSI_FLAGS_QMASK_SHIFT);
+    }
+
+    uint8_t count() const
+    {
+        static constexpr uint8_t PCI_MSI_FLAGS_QSIZE_SHIFT {4u};
+        return 1 << ((msgControl & VBOX_PCI_MSI_FLAGS_QSIZE) >> PCI_MSI_FLAGS_QSIZE_SHIFT);
+    }
+
+    uint64_t messageAddress() const
+    {
+        return is64Bit() ? static_cast<uint64_t>(msi64bit.msgAddressHigh) << 32 | msgAddress : msgAddress;
+    }
+
+    uint16_t messageData() const { return is64Bit() ? msi64bit.msgData : msgData32Bit; }
+
+    bool isMasked(uint32_t vector) const
+    {
+        if (not isPerVectorMaskable()) {
+            return false;
+        }
+
+        uint32_t maskBits {0};
+        if (is64Bit()) {
+            maskBits = msi64BitPerVectorMasking.maskBits;
+        } else {
+            maskBits = msiPerVectorMasking.maskBits;
+        }
+
+        return maskBits & (1u << vector);
+    }
+
+    std::optional<uint32_t> maskBitOffset() const
+    {
+        if (not isPerVectorMaskable()) {
+            return std::nullopt;
+        }
+
+        return is64Bit() ? 0x10 : 0xC;
+    }
+
+    std::optional<uint32_t> pendingBitOffset() const
+    {
+        if (not isPerVectorMaskable()) {
+            return std::nullopt;
+        }
+
+        return is64Bit() ? 0x14 : 0x10;
+    }
+};
+static_assert(sizeof(MSICapabilityDescriptor) == 0x18,
+              "The MSI Capability Descriptor has incorrect size, did you forgot __attribute__ ((__packed__))");
+
+/**
+ * MSIX capability descriptor  based on the PCI Local Bus Specification REV 3.0
+ */
+class __attribute__((__packed__)) MSIXCapabilityDescriptor : public CapabilityDescriptor
+{
+private:
+    using CapabilityIterator = CapabilityList::CapabilityIterator;
+
+    uint16_t msgControl {0};
+    uint32_t tableOffset {0};
+    uint32_t pendingBitArrayOffset {0};
+
+    static constexpr uint32_t MSIX_TABLE_OFFSET_MASK {~0x7u};
+
+public:
+    MSIXCapabilityDescriptor() = default;
+    MSIXCapabilityDescriptor(const MSIXCapabilityDescriptor& o)
+        : msgControl(o.msgControl), tableOffset(o.tableOffset), pendingBitArrayOffset(o.pendingBitArrayOffset)
+    {}
+
+    MSIXCapabilityDescriptor(const CapabilityIterator& iterator)
+        : MSIXCapabilityDescriptor(iterator.getCapability<MSIXCapabilityDescriptor>())
+    {}
+
+    bool enabled() const { return msgControl & VBOX_PCI_MSIX_FLAGS_ENABLE; }
+
+    bool allMasked() const { return msgControl & VBOX_PCI_MSIX_FLAGS_FUNCMASK; }
+
+    uint16_t tableSize() const
+    {
+        // According to the PCI Local Bus Specification REV 3.0
+        // the MSIX Table size is encoded as N-1  in the bits 0 to 10
+        // of message control, so we need to add 1 to
+        // get the actual table size.
+        static constexpr uint16_t MSIX_TABLE_SIZE_MASK {0x7ff};
+        return (msgControl & MSIX_TABLE_SIZE_MASK) + 1;
+    }
+
+    uint32_t getTableOffset() const { return tableOffset & MSIX_TABLE_OFFSET_MASK; }
+
+    uint32_t getBarIndex() const { return tableOffset & ~MSIX_TABLE_OFFSET_MASK; }
+};
+static_assert(sizeof(MSIXCapabilityDescriptor) == 0xc,
+              "The MSIX Capability Descriptor has incorrect size, did you forgot __attribute__ ((__packed__))");
+
+/**
+ * MSIX table entry based on the PCI Local Bus Specification REV 3.0
+ */
+class __attribute__((__packed__)) MSIXTableEntry
+{
+private:
+    uint32_t msgAddressLow {0};
+    uint32_t msgAddressHigh {0};
+    uint32_t msgData {0};
+    uint32_t vectorCtrl {0};
+
+public:
+    uint64_t messageAddress() const { return static_cast<uint64_t>(msgAddressHigh) << 32 | msgAddressLow; }
+
+    uint32_t messageData() const { return msgData; }
+};
+static_assert(sizeof(MSIXTableEntry) == 0x10,
+              "The MSIX Capability Descriptor has incorrect size, did you forgot __attribute__ ((__packed__))");
+
+/**
+ * This Function writes data to the PCI configuration space of VirtualBox
+ * The function is required for pass through or semi emulated devices to handle pci capabilities such as
+ * MSI support by VirtualBox.
+ *
+ * /param pPciDev The PCI device to which PCI configuration space should be written.
+ * /param offset the Offset into the Configuration Space. Refer to PCI Local Bus Specification REV 3.0 Figure 6-1 for an
+ * overview, /param cb The byte count to write, /param value The Value to write.
+ */
+inline void writePciConfigSpaceShadow(PPDMPCIDEV pPciDev, uint32_t offset, unsigned cb, uint64_t value)
+{
+    if (pPciDev) {
+        switch (cb) {
+        case sizeof(uint8_t): PDMPciDevSetByte(pPciDev, offset, value); break;
+        case sizeof(uint16_t): PDMPciDevSetWord(pPciDev, offset, value); break;
+        case sizeof(uint32_t): PDMPciDevSetDWord(pPciDev, offset, value); break;
+        case sizeof(uint64_t): PDMPciDevSetQWord(pPciDev, offset, value); break;
+        default:
+            AssertLogRelMsgFailed(("SuperNova-PCI: Could not write PCI Config Space Shadow due to an unsupported byte "
+                                   "count of %u bytes.\n",
+                                   cb));
+        };
+    }
+}
+
+/**
+ * Register the MSI(X) system for the pass through pci device in the VirtualBox PCI Subsystem.
+ *
+ * /param pDevIns The VirtualBox PCI Device instance data
+ * /param msiCapabilityIterator The MSI Capability iterator of the pci device.
+ * /param msixCapabilityIterator The MSIX Capability iterator of the pci device.
+ */
+
+inline int registerMsi(PPDMDEVINS pDevIns, std::optional<CapabilityList::CapabilityIterator> msiCapabilityIterator,
+                       std::optional<CapabilityList::CapabilityIterator> msixCapabilityIterator)
+{
+    PDMMSIREG msiReg;
+    RT_ZERO(msiReg);
+
+    if (msiCapabilityIterator) {
+        MSICapabilityDescriptor msiCap {*msiCapabilityIterator};
+
+        msiReg.cMsiVectors = msiCap.maxCount();
+        msiReg.iMsiCapOffset = msiCapabilityIterator->getOffset();
+        msiReg.iMsiNextOffset = msiCap.nextPtr;
+        msiReg.fMsi64bit = msiCap.is64Bit();
+        msiReg.fMsiNoMasking = not msiCap.isPerVectorMaskable();
+    }
+
+    if (msixCapabilityIterator) {
+        MSIXCapabilityDescriptor msixCap {*msiCapabilityIterator};
+        msiReg.cMsixVectors = msixCap.tableSize();
+        msiReg.iMsixCapOffset = msixCapabilityIterator->getOffset();
+        msiReg.iMsixNextOffset = msixCap.nextPtr;
+        msiReg.iMsixBar = msixCap.getBarIndex();
+    }
+
+    if (msiCapabilityIterator or msixCapabilityIterator) {
+        return PDMDevHlpPCIRegisterMsi(pDevIns, &msiReg);
+    }
+
+    /*
+     * If we end up here, the device either do not support MSI or MSIX or the Device Capabilitys are not present.
+     */
+    return VINF_SUCCESS;
+}
diff --git a/src/VBox/Devices/Bus/DevVfio.cpp b/src/VBox/Devices/Bus/DevVfio.cpp
new file mode 100644
index 0000000000..f93fcd7381
--- /dev/null
+++ b/src/VBox/Devices/Bus/DevVfio.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) Cyberus Technology GmbH.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#define LOG_GROUP LOG_GROUP_DEV_VFIO
+#include "DevVfio.h"
+
+#include <VBox/log.h>
+#include <VBox/vmm/mm.h>
+#include <VBox/vmm/pdmdev.h>
+
+#include <string>
+
+static DECLCALLBACK(int) devVfioConstruct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg)
+{
+    /*
+     * Check that the device instance and device helper structures are compatible.
+     */
+    PDMDEV_CHECK_VERSIONS_RETURN(pDevIns);
+
+    PVFIODEV pThis {PDMDEVINS_2_DATA(pDevIns, PVFIODEV)};
+    PCPDMDEVHLPR3 pHlp {pDevIns->pHlpR3};
+    int rc;
+    uint16_t bus, device, function;
+    char* sysfsPath;
+
+    constexpr char validation[] = "sysfsPath"
+                                  "|GuestPCIBusNo"
+                                  "|GuestPCIDeviceNo"
+                                  "|GuestPCIFunctionNo";
+
+    PDMDEV_VALIDATE_CONFIG_RETURN(pDevIns, validation, "Invalid configuration");
+    rc = pHlp->pfnCFGMQueryStringAlloc(pCfg, "sysfsPath", &sysfsPath);
+    if (RT_FAILURE(rc))
+    {
+        return PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Querying sysfsPath as a string failed"));
+    }
+
+    std::string sysfsPathString {sysfsPath};
+    MMR3HeapFree(sysfsPath);
+
+    rc = pHlp->pfnCFGMQueryU16(pCfg, "GuestPCIBusNo", &bus);
+    if (RT_FAILURE(rc))
+    {
+        return PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Querying GuestPCIBusNo as a uint16_t failed"));
+    }
+
+    rc = pHlp->pfnCFGMQueryU16(pCfg, "GuestPCIDeviceNo", &device);
+    if (RT_FAILURE(rc))
+    {
+        return PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Querying GuestPCIDeviceNo as a uint16_t failed"));
+    }
+
+    rc = pHlp->pfnCFGMQueryU16(pCfg, "GuestPCIFunctionNo", &function);
+    if (RT_FAILURE(rc))
+    {
+        return PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Querying GuestPCIFunctionNo as a uint16_t failed"));
+    }
+
+    LogRel(("VFIO: Constructing VFIO PCI device with path %s Guest BDF: %02hx:%02hx.%hx\n",
+            sysfsPathString.c_str(), bus, device, function));
+
+    rc = pThis->init(pDevIns, sysfsPathString);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+
+    NOREF(iInstance);
+
+    return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) devVfioDestruct(PPDMDEVINS pDevIns)
+{
+    /*
+     * Check the versions here as well since the destructor is *always* called.
+     */
+    PDMDEV_CHECK_VERSIONS_RETURN_QUIET(pDevIns);
+
+    PVFIODEV pThis {PDMDEVINS_2_DATA(pDevIns, PVFIODEV)};
+
+    pThis->terminate(pDevIns);
+
+    return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) devVfioInitComplete(PPDMDEVINS pDevIns)
+{
+    PDMDEV_CHECK_VERSIONS_RETURN_QUIET(pDevIns);
+
+    PVFIODEV pThis {PDMDEVINS_2_DATA(pDevIns, PVFIODEV)};
+
+    return pThis->initializeDma(pDevIns);
+}
+
+/**
+ * The device registration structure.
+ */
+extern "C" const PDMDEVREG g_DeviceVfioDev =
+{
+    /* .u32Version = */             PDM_DEVREG_VERSION,
+    /* .uReserved0 = */             0,
+    /* .szName = */                 "VfioDev",
+    /* .fFlags = */                 PDM_DEVREG_FLAGS_DEFAULT_BITS | PDM_DEVREG_FLAGS_NEW_STYLE,
+
+    /* .fClass = */                 PDM_DEVREG_CLASS_HOST_DEV,
+    /* .cMaxInstances = */          1,
+    /* .uSharedVersion = */         1,
+    /* .cbInstanceShared = */       sizeof(VFIODEV),
+    /* .cbInstanceR0 = */           0,
+    /* .cbInstanceRC = */           0,
+    /* .cMaxPciDevices = */         1,
+    /* .cMaxMsixVectors = */        0,
+    /* .pszDescription = */         "VirtualBox Vfio Passthrough Device\n",
+    /* .pszRCMod = */               "",
+    /* .pszR0Mod = */               "",
+    /* .pfnConstruct = */           devVfioConstruct,
+    /* .pfnDestruct = */            devVfioDestruct,
+    /* .pfnRelocate = */            NULL,
+    /* .pfnMemSetup = */            NULL,
+    /* .pfnPowerOn = */             NULL,
+    /* .pfnReset = */               NULL,
+    /* .pfnSuspend = */             NULL,
+    /* .pfnResume = */              NULL,
+    /* .pfnAttach = */              NULL,
+    /* .pfnDetach = */              NULL,
+    /* .pfnQueryInterface. = */     NULL,
+    /* .pfnInitComplete = */        devVfioInitComplete,
+    /* .pfnPowerOff = */            NULL,
+    /* .pfnSoftReset = */           NULL,
+    /* .pfnReserved0 = */           NULL,
+    /* .pfnReserved1 = */           NULL,
+    /* .pfnReserved2 = */           NULL,
+    /* .pfnReserved3 = */           NULL,
+    /* .pfnReserved4 = */           NULL,
+    /* .pfnReserved5 = */           NULL,
+    /* .pfnReserved6 = */           NULL,
+    /* .pfnReserved7 = */           NULL,
+    /* .u32VersionEnd = */          PDM_DEVREG_VERSION
+};
diff --git a/src/VBox/Devices/Bus/DevVfio.h b/src/VBox/Devices/Bus/DevVfio.h
new file mode 100644
index 0000000000..cfe384d7a1
--- /dev/null
+++ b/src/VBox/Devices/Bus/DevVfio.h
@@ -0,0 +1,412 @@
+/*
+ * Copyright (C) Cyberus Technology GmbH.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#pragma once
+
+#include <svp/pci.h>
+
+#include <VBox/err.h>
+#include <VBox/pci.h>
+#include <VBox/vmm/pdmdev.h>
+
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <array>
+#include <atomic>
+#include <filesystem>
+#include <mutex>
+#include <vector>
+
+class VfioDevice
+{
+public:
+    /*
+     * The IRQ Type information, required for the interrupt handler.
+     */
+    enum class IrqType
+    {
+        VFIO_INTX = VFIO_PCI_INTX_IRQ_INDEX,
+        VFIO_MSI  = VFIO_PCI_MSI_IRQ_INDEX,
+        VFIO_MSIX = VFIO_PCI_MSIX_IRQ_INDEX,
+        VFIO_NONE,
+    };
+
+    /**
+     * Interrupt Handler function
+     *
+     * \param pDevIns The PCI Device Instance
+     *
+     * \return VBox status code
+     */
+    int handleInterrupts(PPDMDEVINS pDevIns);
+
+    /**
+     * Initialize the VfioDevice
+     *
+     * \param pDevIns The PCI Device Instance
+     *
+     * \return VBox status code
+     */
+    int init(PPDMDEVINS pDevIns, std::filesystem::path sysfsPath);
+
+    /**
+     * Initialize DMA
+     * As the ram preallocation is required to initialize the DMA regions for the
+     * VFIO device, the function have to be called **after** pgmR3RamPreAlloc
+     *
+     * \param pDevIns The PCI Device Instance
+     *
+     * \return VBox status code
+     */
+    int initializeDma(PPDMDEVINS pDevIns);
+
+    /**
+     *  Terminates the VFIO device and closes the file descriptors
+     *
+     *  \param pDevIns The PCI Device Instance
+     *
+     *  \return VBox status code
+     */
+    int terminate(PPDMDEVINS pDevIns);
+
+    /**
+     * Read from the Vfio Device file descriptor
+     *
+     * \param pData data to read
+     * \param bytes count of bytes to read
+     * \param uAddress address to read from
+     *
+     * \return VBOX status code
+     */
+    int readFromDevice(void* pData, unsigned bytes, uint64_t uAddress)
+    {
+        return handleDeviceAccess(pread64, pData, bytes, uAddress);
+    }
+
+    /**
+     * Write to the Vfio Device file descriptor
+     *
+     * \param pData data to write
+     * \param bytes count of bytes to write
+     * \param uAddress address to write to
+     *
+     * \return VBOX status code
+     */
+    int writeToDevice(const void* pData, unsigned bytes, uint64_t uAddress)
+    {
+        return handleDeviceAccess(pwrite64, const_cast<void*>(pData), bytes, uAddress);
+    }
+
+    /**
+     * Read from the actual PCI Config Space of the VFIO device
+     *
+     * \param data data to read
+     * \param bytes count of bytes to read
+     * \param uAddress address to read from
+     *
+     * \return VBOX status code
+     */
+    template <typename T>
+    int readConfigSpace(T& data, unsigned bytes, uint64_t uAddress)
+    {
+        return readFromDevice(&data, bytes, mcfgOffset + uAddress);
+    }
+
+    /**
+     * Write to the actual PCI Config Space of the VFIO device
+     *
+     * \param data data to write
+     * \param bytes count of bytes to write
+     * \param uAddress address to write to
+     *
+     * \return VBOX status code
+     */
+    template <typename T>
+    int writeConfigSpace(T& data, unsigned bytes, uint64_t uAddress)
+    {
+        return writeToDevice(&data, bytes, mcfgOffset + uAddress);
+    }
+
+private:
+    using LockGuard = std::lock_guard<std::mutex>;
+
+    /**
+     * The interrupt information structure is a bookkeeping structure for the
+     * interrupt handling.
+     * It maps the interrupt event file descriptor to an internal interrupt
+     * index and contains the interrupt type (INTX, MSI, MSIX) for the handler thread.
+     */
+    struct InterruptInformation
+    {
+        int fd;
+        uint32_t index;
+
+        bool operator==(const InterruptInformation& o) const
+        {
+            return o.fd == fd and o.index == index;
+        }
+    };
+
+    template<typename FN>
+    int handleDeviceAccess(FN& fn, void* data, unsigned bytes, uint64_t uAddress)
+    {
+        AssertLogRelMsgReturn(vfioDeviceFd > 0, ("The Vfio Device is not open \n"), VERR_GENERAL_FAILURE);
+        auto rc {fn(vfioDeviceFd, data, bytes, uAddress)};
+
+        return rc < 0 ? VERR_ACCESS_DENIED : VINF_SUCCESS;
+    }
+
+    /**
+     * Initialize VFIO container and device
+     *
+     * \param pDevIns The PCI Device Instance
+     * \param sysfsPath path to the sysfs device
+     *
+     * \return VBox status code
+     */
+    int initializeVfio(PPDMDEVINS pDevIns, std::filesystem::path sysfsPath);
+
+    /**
+     * Initialize the VirtualBox PCI Device Information
+     *
+     * \param pDevIns The PCI Device Instance
+     *
+     * \return VBox status code
+     */
+    int initializePci(PPDMDEVINS pDevIns);
+
+    /**
+     * Initialize VFIO Memory Regions
+     *
+     * Such regions are either PCI Bar regions or VFIO specific regions to
+     * provide device Information or device state such as graphics output
+     *
+     * \param pDevIns The PCI Instance Data
+     * \param deviceInfo The vfio device information
+     *
+     * \return VBox status code
+     */
+    int initializeMemoryRegions(PPDMDEVINS pDevIns, vfio_device_info& deviceInfo);
+
+    /**
+     * Initialize interrupt handling
+     *
+     * \param pDevIns The PCI Device Instance
+     *
+     * \return VBox status code
+     */
+    int initializeInterrupts(PPDMDEVINS pDevIns);
+
+    /**
+     * Activate the corresponding interrupt type. The current interrupt type must be disabled before.
+     *
+     * \param pDevIns The PCI Device Instance
+     * \param vfuiIrqIndexType the irq type that should be activated
+     * \param irqCount count of irqs to register
+     *
+     * \return VBox status code
+     */
+    int activateInterrupts(PPDMDEVINS pDevIns, const IrqType vfioIrqIndexType, uint32_t irqCount = 1);
+
+    /**
+     * Disable the corresponding interrupt type
+     *
+     * \param pDevIns The PCI Device Instance
+     *
+     * \return VBox status code
+     */
+    int disableInterrupts(PPDMDEVINS pDevIns);
+
+    /**
+     * Inject a MSI
+     *
+     * \param pDevIns The PCI Device Instance
+     * \param irqInfo The interrupt information of the pending interrupt
+     *
+     * \return VBOX status code
+     */
+    int injectMsi(PPDMDEVINS pDevIns, InterruptInformation& irqInfo);
+
+    /**
+     * Inject a MSIX
+     *
+     * \param pDevIns The PCI Device Instance
+     * \param irqInfo The interrupt information of the pending interrupt
+     *
+     * \return VBOX status code
+     */
+    int injectMsix(PPDMDEVINS pDevIns, InterruptInformation& irqInfo);
+
+    /**
+     * The configuration space write handler.
+     *
+     * \param pDevIns The PCI Device Instance
+     * \param uAddress offset in the configuration space to write
+     * \param cb count of bytes to write
+     * \param u32Value The value to write
+     *
+     * \return VBox status code
+     */
+    int configSpaceWriteHandler(PPDMDEVINS pDevIns, uint32_t uAddress, unsigned cb, uint32_t u32Value);
+
+    /**
+     * The memory mapped IO access handler function.
+     *
+     * \param pDevIns The PCI Device Instance
+     * \param barRegion The reference to the PCI Bar region
+     * \param barOffset The offset in the PCI bar
+     * \param pv The pointer to the data to be read
+     * \param cb The size of the data to be read
+     * \param write Indicator of access direction
+     *
+     * \return Vbox Status code
+     */
+    int mmioAccessHandler(PPDMDEVINS pDevIns, PCIBarRegion& barRegion, RTGCPHYS barOffset, void* pv, unsigned cb, bool writeAccess);
+
+    /**
+     * Start inteception of Guest VM PCI Config Space Accesses
+     *
+     * \param pDevIns the VBox Device Instance
+     *
+     * \return VBox status code
+     */
+    int interceptConfigSpaceAccesses(PPDMDEVINS pDevIns);
+
+    /**
+     * Register a Guest Physical Memory range at the vfio container
+     *
+     * \param pVM Pointer to the VM structure
+     * \param startGCPhys Guest physical address of the start of the ram range
+     * \param endGCPhys Guest physical address of the end of the region
+     *
+     * \return VBOX status code
+     */
+    int registerDmaRange(PVM pVM, RTGCPHYS startGCPhys, RTGCPHYS endGCPhys);
+
+    /**
+     * Try handling of PCI Bar interception
+     *
+     * \param pDevIns PDM Device Instance
+     * \param pciConfigCommandValue value of the command register of the PCI config space
+     */
+    void tryHandleBarInterception(PPDMDEVINS pDevIns, const uint32_t pciConfigCommandValue);
+
+    /**
+     * Register a PCI Bar at the corresponding subsystem (IO or MMIO).
+     *
+     * \param mapFn function used to map the Bar at the corrseponding Subsystem
+     * \param unmapFn function to unmap the old Bar region if the bar was present before
+     * \param pDevIns the PDM Device Instance Data structure
+     * \param barRegion the region bookkeeping data structure
+     * \param mapAddress the new address of the Bar
+     */
+    template <uint64_t INVALID_ELEM, typename MapFN, typename UnmapFN>
+    void registerPCIBar(MapFN& mapFn, UnmapFN& unmapFn, PPDMDEVINS pDevIns, PCIBarRegion& barRegion, uint64_t mapAddress) {
+        LogRel(("VFIO: RegisterBar %#llx \n", mapAddress));
+        if (barRegion.address == mapAddress)
+        {
+            return;
+        }
+
+        if (barRegion.address != INVALID_ELEM)
+        {
+            unmapFn(pDevIns, barRegion.hRegion);
+            barRegion.address = INVALID_ELEM;
+        }
+
+        mapFn(pDevIns, barRegion.hRegion, mapAddress);
+        barRegion.address = mapAddress;
+    }
+
+    /**
+    * Read the Bar value from the PCI config space
+    *
+    * \param barNumber The bar which value should be read
+    *
+    * \return PCIBar information
+    */
+    const PCIBar getBarInfo(unsigned barNumber);
+
+    /**
+     * Ioctl wrapper with meaningfull error return
+     * \param fd file descriptor to interact with
+     * \param request ioct request number
+     * \param errorStr string to set in the log in case of an error
+     * \param args variadic template args for the ioctl
+     *
+     * \return Vbox error code
+     */
+    template <typename ...ARGS>
+    int vfioControl(PPDMDEVINS pDevIns, int fd, unsigned long request, const char* errorString, ARGS&& ...args)
+    {
+        if (ioctl(fd, request, std::forward<ARGS>(args) ...) < 0)
+        {
+            return PDMDEV_SET_ERROR(pDevIns, VERR_INVALID_PARAMETER, errorString);
+        }
+
+        return VINF_SUCCESS;
+    }
+
+    /**
+     * Ioctl device wrapper for accesses on the vfio device file descriptor
+     *
+     * \param pDevIns the VBox Device Instance
+     * \param request ioct request number
+     * \param errorStr string to set in the log in case of an error
+     * \param args variadic template args for the ioctl
+     *
+     * \return VBOX status code
+     */
+    template <typename ...ARGS>
+    int deviceControl(PPDMDEVINS pDevIns, unsigned long request, const char* errorString, ARGS&& ...args)
+    {
+        AssertLogRelMsgReturn(vfioDeviceFd > 0, ("The Vfio Device is not open \n"), VERR_GENERAL_FAILURE);
+        return vfioControl(pDevIns, vfioDeviceFd, request, errorString, std::forward<ARGS>(args)...);
+    }
+
+    /** Vfio File descriptors */
+    int vfioContainerFd{-1};
+    int vfioGroupFd{-1};
+    int vfioDeviceFd {-1};
+
+    /** PCI device members. */
+    PPDMPCIDEV pPciDev;
+    uint64_t mcfgOffset; ///< The offset to the PCI Config Space Page in the vfio device
+    std::atomic<bool> pciConfigMemoryDecodingEnabled {false}; ///< The PCI Memory decoding indicator
+    std::atomic<bool> pciConfigIODecodingEnabled {false}; ///< The PCI IO decoding indicator
+    std::array<PCIBarRegion, VBOX_PCI_MAX_BARS> pciBars;
+
+    /** IRQ handling */
+    RTTHREAD hIrqDeliveryThread;
+    // Even if only one INTX interrupt is supported handling it as a vector reduce the code complexity by a lot.
+    std::vector<InterruptInformation> aIrqInformation;
+    std::vector<MSIXTableEntry> aMsixTable;
+    IrqType activeInterruptType {IrqType::VFIO_NONE};
+    std::mutex irqDisable;
+
+    std::optional<CapabilityList::CapabilityIterator> msiCapabilityIterator;
+    std::optional<CapabilityList::CapabilityIterator> msixCapabilityIterator;
+
+    std::atomic<bool> exit{false};
+};
+typedef VfioDevice VFIODEV;
+
+typedef VFIODEV *PVFIODEV;
diff --git a/src/VBox/Devices/Bus/VfioDevice.cpp b/src/VBox/Devices/Bus/VfioDevice.cpp
new file mode 100644
index 0000000000..6bf1945828
--- /dev/null
+++ b/src/VBox/Devices/Bus/VfioDevice.cpp
@@ -0,0 +1,912 @@
+/*
+ * Copyright (C) Cyberus Technology GmbH.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#define PDMPCIDEV_INCLUDE_PRIVATE  /* Hack to get pdmpcidevint.h included at the right point. */
+#include <VBox/vmm/pdmpcidev.h>
+
+#include "DevVfio.h"
+
+#include <iprt/mem.h>
+#include <VBox/log.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/pdmapi.h>
+#include <VBox/vmm/pdmdev.h>
+#include "DevPciInternal.h"
+
+#include <fcntl.h>
+#include <sys/eventfd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <cstring>
+#include <optional>
+
+namespace {
+    using IrqType = VfioDevice::IrqType;
+
+    VBOXSTRICTRC vfioConfigSpaceRead(PPDMDEVINS pDev, PPDMPCIDEV pPciDev, uint32_t uAddress, unsigned cb, uint32_t* pu32Value)
+    {
+        PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+
+        AssertLogRelMsgReturn(pu32Value, ("VFIO: PCi config space read: value pointer is zero!"), VERR_INVALID_POINTER);
+
+        int rc { pThis->readConfigSpace(*pu32Value, cb, uAddress) };
+        writePciConfigSpaceShadow(pPciDev, uAddress, cb, *pu32Value);
+        return rc;
+    }
+
+    std::underlying_type_t<IrqType> toUnderlying(const IrqType& t)
+    {
+        return static_cast<std::underlying_type_t<IrqType>>(t);
+    }
+
+}
+
+int VfioDevice::initializeVfio(PPDMDEVINS pDevIns, std::filesystem::path sysfsPath)
+{
+    namespace fs = std::filesystem;
+    const std::filesystem::path VFIO_PATH {"/dev/vfio"};
+
+    int rc {VINF_SUCCESS};
+
+    vfioContainerFd = open((VFIO_PATH / "vfio").c_str(), O_RDWR | O_CLOEXEC);
+    AssertLogRelMsgReturn(vfioContainerFd > 0, ("VFIO: Could not open VFIO Container\n"), VERR_INVALID_PARAMETER);
+
+    const int vfioApiVersion {ioctl(vfioContainerFd, VFIO_GET_API_VERSION)};
+
+    LogRel(("VFIO: Detected VFIO Api Version %d\n", vfioApiVersion));
+
+    const int iommuTypePresent {ioctl(vfioContainerFd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)};
+    AssertLogRelMsgReturn(iommuTypePresent, ("VFIO: Requested IOMMU type is not supported.\n"), VERR_NOT_AVAILABLE);
+
+    const auto iommuGroupLink {fs::read_symlink(sysfsPath / "iommu_group")};
+    vfioGroupFd = open((VFIO_PATH / iommuGroupLink.filename()).c_str(), O_RDWR, O_CLOEXEC);
+    AssertLogRelMsgReturn(vfioGroupFd > 0, ("VFIO: Could not open VFIO Container\n"), VERR_INVALID_PARAMETER);
+
+    rc = vfioControl(pDevIns, vfioGroupFd, VFIO_GROUP_SET_CONTAINER,
+                    "VFIO: Unable to assign the VFIO container to the VFIO Group \n", &vfioContainerFd);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    rc = vfioControl(pDevIns, vfioContainerFd, VFIO_SET_IOMMU, "VFIO: Unable to set VFIO IOMMU Type \n", VFIO_TYPE1_IOMMU);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    vfioDeviceFd = ioctl(vfioGroupFd, VFIO_GROUP_GET_DEVICE_FD, sysfsPath.filename().c_str());
+    AssertLogRelMsgReturn(vfioDeviceFd > 0, ("VFIO: Unable to open VFIO device \n"), VERR_INVALID_PARAMETER);
+
+    rc = deviceControl(pDevIns, VFIO_DEVICE_RESET, "Unable to reset VFIO device");
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    return rc;
+}
+
+int VfioDevice::initializePci(PPDMDEVINS pDevIns)
+{
+    int rc {VINF_SUCCESS};
+
+    pPciDev = pDevIns->apPciDevs[0];
+    PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
+
+    vfio_region_info regionInfo;
+    regionInfo.argsz = sizeof(regionInfo);
+    regionInfo.index = VFIO_PCI_CONFIG_REGION_INDEX;
+
+    rc = deviceControl(pDevIns, VFIO_DEVICE_GET_REGION_INFO, "VFIO: Could not retrieve VFIO Device MCFG region\n", &regionInfo);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+    AssertLogRelMsgReturn(regionInfo.size != 0, ("VFIO: MCFG Region size is zero\n"), VERR_INVALID_PARAMETER);
+
+    mcfgOffset = regionInfo.offset;
+
+    uint16_t vendorId, deviceId;
+    uint8_t classBase, classSub, headerType, interruptPin, interruptLine;
+
+    readConfigSpace(vendorId, sizeof(vendorId), VBOX_PCI_VENDOR_ID);
+    readConfigSpace(deviceId, sizeof(deviceId), VBOX_PCI_DEVICE_ID);
+    readConfigSpace(classBase, sizeof(classBase), VBOX_PCI_CLASS_BASE);
+    readConfigSpace(classSub, sizeof(classSub), VBOX_PCI_CLASS_SUB);
+    readConfigSpace(headerType, sizeof(headerType), VBOX_PCI_HEADER_TYPE);
+    readConfigSpace(interruptLine, sizeof(interruptLine), VBOX_PCI_INTERRUPT_LINE);
+    readConfigSpace(interruptPin, sizeof(interruptPin), VBOX_PCI_INTERRUPT_PIN);
+
+    PDMPciDevSetVendorId(pPciDev, vendorId);
+    PDMPciDevSetDeviceId(pPciDev, deviceId);
+    PDMPciDevSetClassBase(pPciDev, classBase);
+    PDMPciDevSetClassSub(pPciDev, classSub);
+    PDMPciDevSetHeaderType(pPciDev, headerType);
+    PDMPciDevSetInterruptLine(pPciDev, interruptLine);
+    PDMPciDevSetInterruptPin(pPciDev, interruptPin);
+
+    rc = PDMDevHlpPCIRegister(pDevIns, pPciDev);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    CapabilityList capList {vfioConfigSpaceRead, pDevIns};
+    msiCapabilityIterator = capList.getCapabilityIterator(VBOX_PCI_CAP_ID_MSI);
+    msixCapabilityIterator = capList.getCapabilityIterator(VBOX_PCI_CAP_ID_MSIX);
+
+
+    if (msiCapabilityIterator)
+    {
+        MSICapabilityDescriptor msiCap {*msiCapabilityIterator};
+        AssertLogRelMsgReturn(msiCap.maxCount() == 1, ("VFIO: Multiple Message MSI supporting devices are not supported yet!\n"), VERR_NOT_SUPPORTED);
+    }
+
+
+    return rc;
+}
+
+int VfioDevice::initializeMemoryRegions(PPDMDEVINS pDevIns, vfio_device_info& deviceInfo)
+{
+    int rc {VINF_SUCCESS};
+    for (auto i {0u}; i < deviceInfo.num_regions; ++i)
+    {
+        /**
+         * Currently only PCI Bar regions are supported.
+         * VFIO places the bar region information at indices
+         * 0 <= i <= VBOX_PCI_MAX_BARS, so we can stop if the
+         * limit is reached
+         *
+         * TODO implement special region handling
+         */
+        if (i >= VBOX_PCI_MAX_BARS)
+        {
+            break;
+        }
+
+        vfio_region_info regionInfo;
+        regionInfo.argsz = sizeof(regionInfo);
+        regionInfo.index = i;
+
+        rc = deviceControl(pDevIns, VFIO_DEVICE_GET_REGION_INFO, "VFIO: Unable to retrieve VFIO region info",  &regionInfo);
+        AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+        if (regionInfo.size == 0)
+        {
+            continue;
+        }
+
+        const auto barInfo {getBarInfo(i)};
+
+        PCIBarRegion& region {pciBars[i]};
+        region.offset = regionInfo.offset;
+        region.size = regionInfo.size;
+        region.iRegion = i;
+
+        if (barInfo.isIoBar())
+        {
+            auto portIoRead = [](PPDMDEVINS pDev, void* pvUser , RTIOPORT offsetPort, uint32_t* pu32, unsigned cb) -> VBOXSTRICTRC
+            {
+                PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+                auto pBar {static_cast<PPCIBARREGION>(pvUser)};
+
+                AssertLogRelReturn(pu32, VERR_INVALID_POINTER);
+                AssertLogRelReturn(pBar, VERR_INVALID_POINTER);
+
+                return pThis->readFromDevice(pu32, cb, pBar->offset + offsetPort);
+            };
+
+            auto portIoWrite = [](PPDMDEVINS pDev, void* pvUser, RTIOPORT offsetPort, uint32_t u32, unsigned cb) -> VBOXSTRICTRC
+            {
+                PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+                auto pBar {static_cast<PPCIBARREGION>(pvUser)};
+
+                AssertLogRelReturn(pBar, VERR_INVALID_POINTER);
+
+                return pThis->writeToDevice(&u32, cb, pBar->offset + offsetPort);
+            };
+
+            rc = PDMDevHlpPCIIORegionCreateIo(pDevIns, i, region.size, portIoWrite, portIoRead,
+                                              &region, "VFIO Port IO", nullptr, &region.hRegion);
+            AssertLogRelReturn(RT_SUCCESS(rc), rc);
+        }
+        else if (barInfo.isMmioBar())
+        {
+            region.address = NIL_RTGCPHYS;
+
+            auto mmioRead = [](PPDMDEVINS pDev, void* pvUser, RTGCPHYS barOffset, void* pv, unsigned cb) -> VBOXSTRICTRC
+            {
+                PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+                auto pBar {static_cast<PPCIBARREGION>(pvUser)};
+
+                AssertLogRelReturn(pBar, VERR_INVALID_POINTER);
+
+                return pThis->mmioAccessHandler(pDev, *pBar, barOffset, pv, cb, false);
+            };
+
+            auto mmioWrite = [](PPDMDEVINS pDev, void* pvUser, RTGCPHYS barOffset, const void * pv, unsigned cb) -> VBOXSTRICTRC
+            {
+                PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+                auto pBar {static_cast<PPCIBARREGION>(pvUser)};
+
+                AssertLogRelReturn(pBar, VERR_INVALID_POINTER);
+
+                return pThis->mmioAccessHandler(pDev, *pBar, barOffset, const_cast<void*>(pv), cb, true);
+            };
+
+            rc = PDMDevHlpMmioCreate(pDevIns,
+                                     region.size,
+                                     NULL,
+                                     UINT32_MAX,
+                                     mmioWrite,
+                                     mmioRead,
+                                     &region,
+                                     IOMMMIO_FLAGS_READ_PASSTHRU | IOMMMIO_FLAGS_WRITE_PASSTHRU,
+                                     "VFIO MMIO BAR",
+                                     &region.hRegion);
+        }
+
+    }
+    return rc;
+}
+
+int VfioDevice::handleInterrupts(PPDMDEVINS pDevIns)
+{
+    // Waits for input on a file descriptor with a given timeout.
+    // Taken from https://www.gnu.org/software/libc/manual/html_node/Waiting-for-I_002fO.html
+    // Returns the first file descriptor that has input
+    auto waitForInput = [&] (std::chrono::microseconds delay) -> std::optional<InterruptInformation>
+    {
+        fd_set set;
+        struct timeval timeout {0, 0};
+
+        /* Initialize the file descriptor set. */
+        FD_ZERO(&set);
+
+        /*
+         * We use a copy of the interrupts here to avoid firing interrupts that are deactivated already.
+         */
+        irqDisable.lock();
+        std::vector<InterruptInformation> aCurrentIrqInformation {aIrqInformation};
+        irqDisable.unlock();
+
+        for (const auto efd : aCurrentIrqInformation)
+        {
+            if (efd.fd > 0)
+            {
+                FD_SET(efd.fd, &set);
+            }
+        }
+
+        /* Initialize the timeout data structure. */
+        const auto seconds {std::chrono::duration_cast<std::chrono::seconds>(delay)};
+        const auto us {std::chrono::duration_cast<std::chrono::microseconds>(delay - seconds)};
+
+        timeout.tv_sec = seconds.count();
+        timeout.tv_usec = us.count();
+
+        /* select returns 0 if timeout, 1 if input available, -1 if error. */
+        int error = TEMP_FAILURE_RETRY(select(FD_SETSIZE,
+                                       &set, NULL, NULL,
+                                       &timeout));
+
+        if (error == -1)
+        {
+            perror("select on fds failed");
+        }
+
+        Assert(error != -1);
+
+        {
+            LockGuard _ {irqDisable};
+
+            /*
+             * skip delivering non active interrupts
+             */
+            if (aCurrentIrqInformation != aIrqInformation)
+            {
+                return std::nullopt;
+            }
+
+            for (const auto efd : aCurrentIrqInformation)
+            {
+                if (efd.fd >= 0 and FD_ISSET(efd.fd, &set))
+                {
+                    return efd;
+                }
+            }
+        }
+
+        return std::nullopt;
+    };
+
+    while (not exit.load())
+    {
+        if (auto irqInfo = waitForInput(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::seconds(1))); irqInfo)
+        {
+            uint64_t value;
+            const ssize_t s {read(irqInfo->fd, &value, sizeof(value))};
+            AssertLogRelMsgReturn(s == sizeof(value), ("VFIO: Read on event FD returned wrong size."), VERR_GENERAL_FAILURE);
+            AssertLogRelReturn(value != 0, VERR_INTERRUPTED);
+            int rc {VINF_SUCCESS};
+            switch (activeInterruptType)
+            {
+            case IrqType::VFIO_INTX:
+                PDMDevHlpPCISetIrqNoWait(pDevIns, 0, PDM_IRQ_LEVEL_FLIP_FLOP);
+                break;
+            case IrqType::VFIO_MSI:
+                rc = injectMsi(pDevIns, *irqInfo);
+                break;
+            case IrqType::VFIO_MSIX:
+                rc = injectMsix(pDevIns, *irqInfo);
+                break;
+            default:
+                AssertLogRelMsgFailedReturn(("VFIO: Unsupported interrupt type in IRQ delivery thread detected %u\n", toUnderlying(activeInterruptType)), VERR_NOT_SUPPORTED);
+            }
+            AssertLogRelReturn(RT_SUCCESS(rc), rc);
+        }
+    }
+
+    return VINF_SUCCESS;
+}
+
+int VfioDevice::initializeInterrupts(PPDMDEVINS pDevIns)
+{
+    int rc {activateInterrupts(pDevIns, IrqType::VFIO_INTX)};
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    /*
+     *  We need to shadow the MSIX table, as a read access on the table returns invalid data.
+     *  Thus we need to allocate MSIX table entries upfront, to be able to handle MSIX table writes.
+     */
+    PDMMSIREG MsiReg;
+    RT_ZERO(MsiReg);
+
+    if (msiCapabilityIterator)
+    {
+        MSICapabilityDescriptor msiCap {*msiCapabilityIterator};
+
+        MsiReg.cMsiVectors = msiCap.maxCount();
+        MsiReg.iMsiCapOffset = msiCapabilityIterator->getOffset();
+        MsiReg.iMsiNextOffset = msiCap.nextPtr;
+        MsiReg.fMsi64bit = msiCap.is64Bit();
+        MsiReg.fMsiNoMasking = not msiCap.isPerVectorMaskable();
+    }
+
+    // if (msixCapabilityIterator)
+    // {
+        // MSIXCapabilityDescriptor msixCap {*msixCapabilityIterator};
+        // aMsixTable.resize(msixCap.tableSize());
+        // MsiReg.cMsixVectors = msixCap.tableSize();
+        // MsiReg.iMsixCapOffset = msixCapabilityIterator->getOffset();
+        // MsiReg.iMsixNextOffset = msixCap.nextPtr;
+        // MsiReg.iMsixBar = msixCap.getBarIndex();
+    // }
+
+    if (msixCapabilityIterator or msiCapabilityIterator)
+    {
+        rc = PDMDevHlpPCIRegisterMsi(pDevIns, &MsiReg);
+    }
+
+    auto handleIrqs = [](RTTHREAD /*hSelf*/, void* pvUser) -> int
+    {
+        PPDMDEVINS pDev {static_cast<PPDMDEVINS>(pvUser)};
+        PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+
+        return pThis->handleInterrupts(pDev);
+    };
+
+    rc = RTThreadCreate(&hIrqDeliveryThread, handleIrqs, pDevIns, 0, RTTHREADTYPE_IO, RTTHREADFLAGS_WAITABLE, "vfio IRQ");
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    return rc;
+}
+
+int VfioDevice::activateInterrupts(PPDMDEVINS pDevIns, const IrqType irqType, uint32_t irqCount)
+{
+    LockGuard _ {irqDisable};
+
+    int rc;
+    vfio_irq_info irqInfo;
+    irqInfo.argsz = sizeof(irqInfo);
+    irqInfo.index = toUnderlying(irqType);
+
+    /**
+     * The call of this function requires that the interrupts are disabled.
+     */
+    AssertLogRelMsgReturn(aIrqInformation.size() == 0,
+        ("VFIO: Trying to activate interrupts without deactivating the previous irqs! Disable irqs before activate new ones!"),
+        VERR_NOT_SUPPORTED);
+
+    /**
+     * If the IRQ is not enabled in the VFIO device the call will return unsuccessful
+     * and we don't need to set up something for this IRQ and can just continue
+     */
+    if (RT_FAILURE(deviceControl(pDevIns, VFIO_DEVICE_GET_IRQ_INFO, "", &irqInfo)))
+    {
+        return VERR_NOT_AVAILABLE;
+    }
+
+    /**
+     * Some devices, (e.G SRIOV virtual functions does not have legacy interrupts enabled.
+     * We can skip interrupt activation if we find a device without legacy interrupts.
+     */
+    if (irqType == IrqType::VFIO_INTX and irqInfo.count == 0)
+    {
+        uint8_t interruptPin;
+        readConfigSpace(interruptPin, sizeof(interruptPin), VBOX_PCI_INTERRUPT_PIN);
+        AssertLogRelMsgReturn(interruptPin == 0, ("VFIO: Found device without INTX information, but INTX is marked as supported in the PCI Config space"), VERR_NOT_AVAILABLE);
+        return VINF_SUCCESS;
+    }
+
+    /**
+     * If we try to activate an interrupt type that is not enabled, or supported by vfio we get an interrupt count of 0
+     * We bail out here, as we are not able to enable an interrupt type with no interrupts.
+     */
+    if (irqInfo.count == 0)
+    {
+        LogRel(("VFIO: Trying to activate IRQ type %u, but no IRQs of that type are configured\n", toUnderlying(irqType)));
+        return VERR_NOT_AVAILABLE;
+    }
+
+    /**
+     * Sanity check: If we request a larger number of interrutps, the VFIO device is able to support we bail out here.
+     */
+    if (irqInfo.count < irqCount)
+    {
+        LogRel(("VFIO: Trying to register %lu irqs, but %lu are supported for type %u.\n", irqCount, irqInfo.count, toUnderlying(irqType)));
+        return VERR_NOT_SUPPORTED;
+    }
+
+    AssertLogRelReturn(irqInfo.flags & VFIO_IRQ_INFO_EVENTFD, VERR_NOT_AVAILABLE);
+
+    const auto setSize {sizeof(vfio_irq_set) + sizeof(int) * irqCount};
+    std::vector<uint8_t> buf(setSize);
+    vfio_irq_set& irqSet {*reinterpret_cast<vfio_irq_set*>(buf.data())};
+
+    irqSet.argsz = setSize;
+    irqSet.flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+    irqSet.index = irqInfo.index;
+    irqSet.start = 0;
+    irqSet.count = irqCount;
+
+    /*
+     * Logging and sanity checking only.
+     */
+    switch (irqType)
+    {
+    case IrqType::VFIO_INTX:
+        AssertLogRelMsgReturn(irqInfo.count == 1,
+                              ("VFIO: Only a single INTX is supported! Detected Count: %u\n", irqInfo.count),
+                              VERR_NOT_IMPLEMENTED);
+        LogRel(("VFIO: Activate INTX\n"));
+        break;
+    case IrqType::VFIO_MSI:
+        LogRel(("VFIO: Activate MSI count: %u\n", irqCount));
+        break;
+    case IrqType::VFIO_MSIX:
+        LogRel(("VFIO: Activate MSIX: count %u\n", irqCount));
+        break;
+    default:
+        AssertLogRelMsgFailedReturn(("VFIO: Found unsupported vfio IRQ type: %u, count: %u\n", irqInfo.index, irqInfo.count), VERR_NOT_IMPLEMENTED);
+    }
+
+    activeInterruptType = irqType;
+
+    for (uint32_t i {0ul}; i < irqCount; ++i)
+    {
+        int eventFd {eventfd(0, 0)};
+
+        AssertLogRelMsgReturn(eventFd > 0,("VFIO: could not request additional eventfds\n"), VERR_ACCESS_DENIED);
+        aIrqInformation.push_back({eventFd, i});
+    }
+
+    for (auto i {0ul}; i < aIrqInformation.size(); ++i)
+    {
+        reinterpret_cast<int*>(irqSet.data)[i] = aIrqInformation[i].fd;
+    }
+
+    rc = deviceControl(pDevIns, VFIO_DEVICE_SET_IRQS, "VFIO: Could not set irq info\n", &irqSet);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    return rc;
+}
+
+int VfioDevice::disableInterrupts(PPDMDEVINS pDevIns)
+{
+    LockGuard _ {irqDisable};
+
+    if (aIrqInformation.size() != 0 and activeInterruptType != IrqType::VFIO_NONE)
+    {
+        const auto setSize {sizeof(vfio_irq_set)};
+        std::vector<uint8_t> buf(setSize);
+        vfio_irq_set& irqSet {*reinterpret_cast<vfio_irq_set*>(buf.data())};
+
+        irqSet.argsz = setSize;
+        irqSet.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+        irqSet.index = toUnderlying(activeInterruptType);
+        irqSet.start = 0;
+        irqSet.count = 0;
+
+        int rc {deviceControl(pDevIns, VFIO_DEVICE_SET_IRQS, "VFIO: Could not set irq info for deactivation\n", &irqSet)};
+        AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+        for(auto info: aIrqInformation)
+        {
+            close(info.fd);
+        }
+
+        aIrqInformation.clear();
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+int VfioDevice::injectMsi(PPDMDEVINS pDevIns, InterruptInformation& irqInfo)
+{
+
+    AssertLogRelMsgReturn(msiCapabilityIterator, ("VFIO: Pending MSI, but the capability is not provided \n"), VERR_NOT_SUPPORTED);
+    MSICapabilityDescriptor cap(*msiCapabilityIterator);
+
+    AssertLogRelMsgReturn(cap.enabled(), ("VFIO: Pending MSI, but the capability is disabled \n"), VERR_NOT_SUPPORTED);
+
+    if (not cap.isMasked(irqInfo.index))
+    {
+        PDMDevHlpPCISetIrqNoWait(pDevIns, 0, PDM_IRQ_LEVEL_HIGH);
+    }
+
+    return VINF_SUCCESS;
+}
+
+int VfioDevice::injectMsix(PPDMDEVINS pDevIns, InterruptInformation& irqInfo)
+{
+    AssertLogRelMsgReturn(msixCapabilityIterator, ("VFIO: Pending MSIX, but the capability is not provided \n"), VERR_NOT_SUPPORTED);
+    MSIXCapabilityDescriptor cap(*msixCapabilityIterator);
+
+    AssertLogRelMsgReturn(cap.enabled(), ("VFIO: Pending MSIX, but the capability is disabled \n"), VERR_NOT_SUPPORTED);
+
+    PDMDevHlpPCISetIrqNoWait(pDevIns,  irqInfo.index, PDM_IRQ_LEVEL_HIGH);
+    return VINF_SUCCESS;
+}
+
+int VfioDevice::configSpaceWriteHandler(PPDMDEVINS pDevIns, uint32_t uAddress, unsigned cb, uint32_t u32Value)
+{
+    int rc {VINF_SUCCESS};
+
+    if (uAddress == VBOX_PCI_COMMAND)
+    {
+        tryHandleBarInterception(pDevIns, u32Value);
+    }
+    else if (msiCapabilityIterator and  (uAddress >= msiCapabilityIterator->getOffset() and uAddress < (msiCapabilityIterator->getOffset() + sizeof(MSICapabilityDescriptor))))
+    {
+        MSICapabilityDescriptor lastCap {*msiCapabilityIterator};
+
+        MSICapabilityDescriptor updatedCap {*msiCapabilityIterator};
+        std::memcpy(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(&updatedCap) + (uAddress - msiCapabilityIterator->getOffset())), &u32Value, cb);
+
+        if (not updatedCap.enabled() and lastCap.enabled())
+        {
+            rc = disableInterrupts(pDevIns);
+            AssertLogRelReturn(RT_SUCCESS(rc), rc);
+            rc = activateInterrupts(pDevIns, IrqType::VFIO_INTX);
+            AssertLogRelReturn(RT_SUCCESS(rc), rc);
+        }
+        else if (updatedCap.enabled())
+        {
+            rc = disableInterrupts(pDevIns);
+            AssertLogRelReturn(RT_SUCCESS(rc), rc);
+            rc = activateInterrupts(pDevIns, IrqType::VFIO_MSI, updatedCap.count());
+            AssertLogRelReturn(RT_SUCCESS(rc), rc);
+        }
+    }
+    else if (msixCapabilityIterator and (uAddress >= msixCapabilityIterator->getOffset() and uAddress < (msixCapabilityIterator->getOffset() + sizeof(MSIXCapabilityDescriptor))))
+    {
+        MSIXCapabilityDescriptor lastCap {*msixCapabilityIterator};
+
+        MSIXCapabilityDescriptor updatedCap {lastCap};
+        std::memcpy(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(&updatedCap) + (uAddress - msixCapabilityIterator->getOffset())), &u32Value, cb);
+
+        if (not updatedCap.enabled() and lastCap.enabled())
+        {
+            rc = disableInterrupts(pDevIns);
+            AssertLogRelReturn(RT_SUCCESS(rc), rc);
+            rc = activateInterrupts(pDevIns, IrqType::VFIO_INTX);
+            AssertLogRelReturn(RT_SUCCESS(rc), rc);
+        }
+        else if (updatedCap.enabled())
+        {
+            rc = disableInterrupts(pDevIns);
+            AssertLogRelReturn(RT_SUCCESS(rc), rc);
+            rc = activateInterrupts(pDevIns, IrqType::VFIO_MSIX, updatedCap.tableSize());
+            AssertLogRelReturn(RT_SUCCESS(rc), rc);
+        }
+    }
+
+    return writeConfigSpace(u32Value, cb, uAddress);
+}
+
+int VfioDevice::mmioAccessHandler(PPDMDEVINS /*pDevIns*/, PCIBarRegion& barRegion, RTGCPHYS barOffset, void* pv, unsigned cb, bool writeAccess)
+{
+    if (msixCapabilityIterator)
+    {
+        MSIXCapabilityDescriptor cap {*msixCapabilityIterator};
+
+        if (cap.getBarIndex() == barRegion.iRegion and barOffset >= cap.getTableOffset()
+            and barOffset < cap.getTableOffset() + (sizeof(MSIXTableEntry) * cap.tableSize()))
+        {
+            AssertLogRelMsgReturn(cap.tableSize() == aMsixTable.size(),
+                ("VFIO: The MSIX table size mismatches the hardware table size. Assumed table size: %hu Hardware table size: %hu\n",
+                    aMsixTable.size(),
+                    cap.tableSize()),
+                VERR_NOT_SUPPORTED);
+            uint64_t msixTableEntryOffset {barOffset - cap.getTableOffset()};
+            void* shadowMsixTableOffset {reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(aMsixTable.data()) + msixTableEntryOffset)};
+            if (writeAccess)
+            {
+                /*
+                 * We need to shadow the MSIX table as explained in the else path, but we need to provide the VFIO device
+                 * with data written to the VFIO device.
+                 * Because of this we need to write the data through.
+                 */
+                std::memcpy(shadowMsixTableOffset, pv, cb);
+            }
+            else
+            {
+                std::memcpy(pv, shadowMsixTableOffset, cb);
+                /**
+                 * The VFIO Device returns invalid data in case of a read from the MSIX table.
+                 * Because of this, we need to shadow the table and return early without reading
+                 * from the actual VFIO device here.
+                 */
+                return VINF_SUCCESS;
+            }
+        }
+    }
+
+    if (writeAccess)
+    {
+        return writeToDevice(pv, cb, barRegion.offset + barOffset);
+    }
+    else
+    {
+       return readFromDevice(pv, cb, barRegion.offset + barOffset);
+    }
+
+}
+
+int VfioDevice::interceptConfigSpaceAccesses(PPDMDEVINS pDevIns)
+{
+    int rc {VINF_SUCCESS};
+
+    auto configSpaceWrite = [](PPDMDEVINS pDev, PPDMPCIDEV pPciDev_, uint32_t uAddress, unsigned cb, uint32_t u32Value) -> VBOXSTRICTRC
+    {
+        PVFIODEV pThis {PDMDEVINS_2_DATA(pDev, PVFIODEV)};
+        writePciConfigSpaceShadow(pPciDev_, uAddress, cb, u32Value);
+        return pThis->configSpaceWriteHandler(pDev, uAddress, cb, u32Value);
+    };
+
+    rc = PDMDevHlpPCIInterceptConfigAccesses(pDevIns, pPciDev, vfioConfigSpaceRead, configSpaceWrite);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    return rc;
+}
+
+int VfioDevice::init(PPDMDEVINS pDevIns, std::filesystem::path sysfsPath)
+{
+    int rc {VINF_SUCCESS};
+
+    rc = initializeVfio(pDevIns, sysfsPath);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    rc = initializePci(pDevIns);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    vfio_group_status groupStatus;
+    groupStatus.argsz = sizeof(groupStatus);
+
+    rc = vfioControl(pDevIns, vfioGroupFd, VFIO_GROUP_GET_STATUS, "VFIO: Unable to retrieve VFIO group status\n" , &groupStatus);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    vfio_device_info deviceInfo;
+    deviceInfo.argsz = sizeof(deviceInfo);
+
+    rc = deviceControl(pDevIns, VFIO_DEVICE_GET_INFO, "VFIO: Unable to retrieve VFIO Device information\n", &deviceInfo);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    LogRel(("VFIO: Successfully opened VFIO Device: Group Status Flags: %#x Device Flags: %#x, Num BARs: %u, Num IRQ's %u \n",
+           groupStatus.flags, deviceInfo.flags, deviceInfo.num_regions, deviceInfo.num_irqs));
+
+    rc = initializeMemoryRegions(pDevIns, deviceInfo);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    rc = initializeInterrupts(pDevIns);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    rc = interceptConfigSpaceAccesses(pDevIns);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    return rc;
+}
+
+int VfioDevice::registerDmaRange(PVM pVM, RTGCPHYS startGCPhys, RTGCPHYS endGCPhys)
+{
+    AssertLogRelReturn(RT_VALID_ALIGNED_PTR(startGCPhys, PAGE_SIZE) || startGCPhys == 0, VERR_INVALID_POINTER);
+    AssertLogRelReturn(RT_VALID_ALIGNED_PTR(endGCPhys + 1 , PAGE_SIZE), VERR_INVALID_POINTER);
+
+    auto registerDma = [](uintptr_t hva, RTGCPHYS gpa, uint64_t size, int containerFd) -> int
+    {
+        struct vfio_iommu_type1_dma_map dma;
+        dma.argsz = sizeof(dma);
+        dma.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+        dma.vaddr = hva;
+        dma.iova = gpa;
+        dma.size = static_cast<uint64_t>(size);
+        int rc  {ioctl(containerFd, VFIO_IOMMU_MAP_DMA, &dma)};
+        AssertLogRelMsgReturn(rc == 0, ("VFIO: Could not acquire enough memory to map the Guest Physical address space. Adapt your ulimit\n"), VERR_NO_MEMORY);
+
+        return VINF_SUCCESS;
+    };
+
+    uintptr_t continousPagesStart {0};
+    RTGCPHYS continousPagesStartGCPhys {0};
+    uintptr_t continousPagesLast {0};
+    uint64_t continousRangeSize {0};
+
+    auto reset = [&]()
+    {
+        continousRangeSize = 0;
+        continousPagesStart = 0;
+        continousPagesLast = 0;
+        continousPagesStartGCPhys = 0;
+    };
+
+    for (RTGCPHYS pageAddress {startGCPhys}; pageAddress < endGCPhys; pageAddress += PAGE_SIZE)
+    {
+        void* ptr;
+        if (RT_SUCCESS(PGMR3PhysTlbGCPhys2Ptr(pVM, pageAddress, true, &ptr)))
+        {
+            uintptr_t hcVirt(reinterpret_cast<uintptr_t>(ptr));
+            if (continousRangeSize > 0 and continousPagesLast + PAGE_SIZE == hcVirt)
+            {
+                continousPagesLast = hcVirt;
+                continousRangeSize += PAGE_SIZE;
+                continue;
+            }
+            else if (continousRangeSize != 0)
+            {
+                int rc {registerDma(continousPagesStart, continousPagesStartGCPhys, continousRangeSize, vfioContainerFd)};
+                AssertLogRelReturn(RT_SUCCESS(rc), rc);
+            }
+            continousPagesStart = hcVirt;
+            continousPagesLast = hcVirt;
+            continousPagesStartGCPhys = pageAddress;
+            continousRangeSize = PAGE_SIZE;
+        }
+        else if (continousRangeSize != 0)
+        {
+            int rc {registerDma(continousPagesStart, continousPagesStartGCPhys, continousRangeSize, vfioContainerFd)};
+            AssertLogRelReturn(RT_SUCCESS(rc), rc);
+            reset();
+        }
+    }
+
+    if (continousRangeSize != 0)
+    {
+        int rc {registerDma(continousPagesStart, continousPagesStartGCPhys, continousRangeSize, vfioContainerFd)};
+        AssertLogRelReturn(RT_SUCCESS(rc), rc);
+        reset();
+    }
+
+    return VINF_SUCCESS;
+}
+
+int VfioDevice::initializeDma(PPDMDEVINS pDevIns)
+{
+    auto pVM {PDMDevHlpGetVM(pDevIns)};
+    uint32_t ramRangeCount {PGMR3PhysGetRamRangeCount(pVM)};
+
+    for (uint32_t i {0u}; i < ramRangeCount; ++i)
+    {
+        RTGCPHYS start, end;
+        bool isMMioRange;
+        if (RT_SUCCESS(PGMR3PhysGetRange(pVM, i, &start, &end, nullptr, &isMMioRange)) and not isMMioRange)
+        {
+            int rc {registerDmaRange(pVM, start, end)};
+            AssertLogRelReturn(RT_SUCCESS(rc), rc);
+        }
+
+    }
+
+    return VINF_SUCCESS;
+}
+
+int VfioDevice::terminate(PPDMDEVINS pDevIns)
+{
+    int rc {VINF_SUCCESS};
+
+    rc = disableInterrupts(pDevIns);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    exit = true;
+    rc = RTThreadWaitNoResume(hIrqDeliveryThread, RT_INDEFINITE_WAIT, nullptr);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+    exit = false;
+
+    aMsixTable.clear();
+    msiCapabilityIterator = std::nullopt;
+    msixCapabilityIterator = std::nullopt;
+
+    rc = close(vfioDeviceFd);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+    rc = close(vfioGroupFd);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+    rc = close(vfioContainerFd);
+    AssertLogRelReturn(RT_SUCCESS(rc), rc);
+
+    vfioDeviceFd = -1;
+    vfioGroupFd = -1;
+    vfioContainerFd = -1;
+
+
+    return rc;
+}
+
+void VfioDevice::tryHandleBarInterception(PPDMDEVINS pDevIns, uint32_t pciConfigCommandValue)
+{
+    if (pciConfigCommandValue & VBOX_PCI_COMMAND_IO and not pciConfigIODecodingEnabled)
+    {
+        pciConfigIODecodingEnabled = true;
+    }
+    else
+    {
+        pciConfigIODecodingEnabled = false;
+    }
+
+    if (pciConfigCommandValue & VBOX_PCI_COMMAND_MEMORY and not pciConfigMemoryDecodingEnabled)
+    {
+        pciConfigMemoryDecodingEnabled = true;
+    }
+    else
+    {
+        pciConfigMemoryDecodingEnabled = false;
+    }
+
+    if (pciConfigIODecodingEnabled or pciConfigMemoryDecodingEnabled)
+    {
+        for (auto i {0u}; i < VBOX_PCI_MAX_BARS; ++i)
+        {
+            const auto barInfo {getBarInfo(i)};
+            if (not (pciBars[i].hRegion == NIL_IOMMMIOHANDLE or pciBars[i].hRegion == 0))
+            {
+                if (barInfo.isIoBar() and pciConfigIODecodingEnabled)
+                {
+                    registerPCIBar<0>(PDMDevHlpIoPortMap, PDMDevHlpIoPortUnmap, pDevIns, pciBars[i], barInfo.getBarAddress());
+                }
+                else if (barInfo.isMmioBar() and pciConfigMemoryDecodingEnabled)
+                {
+                    registerPCIBar<NIL_RTGCPHYS>(PDMDevHlpMmioMap, PDMDevHlpMmioUnmap, pDevIns, pciBars[i], barInfo.getBarAddress());
+                }
+            }
+        }
+    }
+}
+
+const PCIBar VfioDevice::getBarInfo(unsigned barNumber)
+{
+    uint64_t barOffset { VBOX_PCI_BASE_ADDRESS_0 + barNumber * sizeof(uint32_t)};
+    uint64_t barValue;
+
+    readConfigSpace(barValue, sizeof(barValue), barOffset);
+
+    PCIBar bar {barValue};
+
+    if (bar.is64BitBar()) {
+        return bar;
+    }
+
+    return {barValue & std::numeric_limits<uint32_t>::max()};
+}
diff --git a/src/VBox/Devices/Makefile.kmk b/src/VBox/Devices/Makefile.kmk
index e31080d7af..a7c089caa2 100644
--- a/src/VBox/Devices/Makefile.kmk
+++ b/src/VBox/Devices/Makefile.kmk
@@ -190,6 +190,8 @@ if !defined(VBOX_ONLY_EXTPACKS) && "$(intersects $(KBUILD_TARGET_ARCH),$(VBOX_SU
  	Input/UsbMouse.cpp \
  	Bus/DevPCI.cpp \
  	Bus/DevPciIch9.cpp \
+	$(if-expr defined(VBOX_WITH_KVM), Bus/DevVfio.cpp,) \
+	$(if-expr defined(VBOX_WITH_KVM), Bus/VfioDevice.cpp,) \
  	Bus/MsiCommon.cpp \
  	Bus/MsixCommon.cpp \
  	$(if $(VBOX_WITH_IOMMU_AMD),Bus/DevIommuAmd.cpp,) \
@@ -256,6 +258,10 @@ if !defined(VBOX_ONLY_EXTPACKS) && "$(intersects $(KBUILD_TARGET_ARCH),$(VBOX_SU
   VBoxDD_SOURCES += Storage/DrvHostFloppy.cpp
  endif
 
+ # VFIO
+ VBoxDD_LIBS.linux += $(LIB_VMM)
+ Bus/DevVfio.cpp_CXXFLAGS.linux += $(CYBERUS_CXX_FLAGS)
+ Bus/VfioDevice.cpp_CXXFLAGS.linux += $(CYBERUS_CXX_FLAGS)
 
  ifn1of ($(KBUILD_TARGET), darwin)
   VBoxDD_SOURCES += Storage/HBDMgmt-generic.cpp
diff --git a/src/VBox/Devices/build/VBoxDD.cpp b/src/VBox/Devices/build/VBoxDD.cpp
index 32a67a08f5..f355b992a6 100644
--- a/src/VBox/Devices/build/VBoxDD.cpp
+++ b/src/VBox/Devices/build/VBoxDD.cpp
@@ -218,6 +218,9 @@ extern "C" DECLEXPORT(int) VBoxDevicesRegister(PPDMDEVREGCB pCallbacks, uint32_t
     if (RT_FAILURE(rc))
         return rc;
 #endif
+    rc = pCallbacks->pfnRegister(pCallbacks, &g_DeviceVfioDev);
+    if (RT_FAILURE(rc))
+        return rc;
     rc = pCallbacks->pfnRegister(pCallbacks, &g_DeviceGIMDev);
     if (RT_FAILURE(rc))
         return rc;
diff --git a/src/VBox/Devices/build/VBoxDD.h b/src/VBox/Devices/build/VBoxDD.h
index 557d071213..ef70e457af 100644
--- a/src/VBox/Devices/build/VBoxDD.h
+++ b/src/VBox/Devices/build/VBoxDD.h
@@ -107,6 +107,7 @@ extern const PDMDEVREG g_DeviceEFI;
 #ifdef VBOX_WITH_PCI_PASSTHROUGH_IMPL
 extern const PDMDEVREG g_DevicePciRaw;
 #endif
+extern const PDMDEVREG g_DeviceVfioDev;
 extern const PDMDEVREG g_DeviceGIMDev;
 extern const PDMDEVREG g_DeviceLPC;
 #ifdef VBOX_WITH_VIRTUALKD
diff --git a/src/VBox/Frontends/VBoxManage/VBoxManageInfo.cpp b/src/VBox/Frontends/VBoxManage/VBoxManageInfo.cpp
index e229f4119f..64bf2b1df7 100644
--- a/src/VBox/Frontends/VBoxManage/VBoxManageInfo.cpp
+++ b/src/VBox/Frontends/VBoxManage/VBoxManageInfo.cpp
@@ -2748,6 +2748,30 @@ HRESULT showVMInfo(ComPtr<IVirtualBox> pVirtualBox,
     /* Host PCI passthrough devices */
 #endif
 
+    SafeArray<BSTR> vfioDevices;
+    hrc = machine->COMGETTER(VFIODeviceAssignments)(ComSafeArrayAsOutParam(vfioDevices));
+    if (SUCCEEDED(hrc))
+    {
+        if (vfioDevices.size() > 0 && (details != VMINFO_MACHINEREADABLE))
+        {
+            RTPrintf("\n Attached VFIO Devices: \n\n");
+        }
+
+        for (size_t i {0}; i < vfioDevices.size(); ++i)
+        {
+            Utf8Str devicePath {vfioDevices[i]};
+
+            if (details == VMINFO_MACHINEREADABLE)
+            {
+                RTPrintf("AttachedVFIO%d=%s\n", i, devicePath.c_str());
+            }
+            else
+            {
+                RTPrintf("   VFIO Device %s is attached\n", devicePath.c_str());
+            }
+        }
+    }
+
     /*
      * Bandwidth groups
      */
diff --git a/src/VBox/Frontends/VBoxManage/VBoxManageModifyVM.cpp b/src/VBox/Frontends/VBoxManage/VBoxManageModifyVM.cpp
index c433fc12d2..6c80d40e06 100644
--- a/src/VBox/Frontends/VBoxManage/VBoxManageModifyVM.cpp
+++ b/src/VBox/Frontends/VBoxManage/VBoxManageModifyVM.cpp
@@ -214,6 +214,8 @@ enum
     MODIFYVM_ATTACH_PCI,
     MODIFYVM_DETACH_PCI,
 #endif
+    MODIFYVM_ATTACH_VFIO,
+    MODIFYVM_DETACH_VFIO,
 #ifdef VBOX_WITH_USB_CARDREADER
     MODIFYVM_USBCARDREADER,
 #endif
@@ -466,6 +468,8 @@ static const RTGETOPTDEF g_aModifyVMOptions[] =
     OPT2("--pci-attach",                    "--pciattach",              MODIFYVM_ATTACH_PCI,                RTGETOPT_REQ_STRING),
     OPT2("--pci-detach",                    "--pcidetach",              MODIFYVM_DETACH_PCI,                RTGETOPT_REQ_STRING),
 #endif
+    { "--attachvfio",               MODIFYVM_ATTACH_VFIO,               RTGETOPT_REQ_STRING },
+    { "--detachvfio",               MODIFYVM_DETACH_VFIO,               RTGETOPT_REQ_STRING },
 #ifdef VBOX_WITH_USB_CARDREADER
     OPT2("--usb-card-reader",               "--usbcardreader",          MODIFYVM_USBCARDREADER,             RTGETOPT_REQ_BOOL_ONOFF),
 #endif
@@ -3593,6 +3597,17 @@ RTEXITCODE handleModifyVM(HandlerArg *a)
                 break;
             }
 #endif
+            case MODIFYVM_ATTACH_VFIO:
+            {
+                CHECK_ERROR(sessionMachine, AttachVFIODevice(Bstr(ValueUnion.psz).raw()));
+                break;
+            }
+
+            case MODIFYVM_DETACH_VFIO:
+            {
+                CHECK_ERROR(sessionMachine, DetachVFIODevice(Bstr(ValueUnion.psz).raw()));
+                break;
+            }
 
 #ifdef VBOX_WITH_USB_CARDREADER
             case MODIFYVM_USBCARDREADER:
diff --git a/src/VBox/Main/idl/VirtualBox.xidl b/src/VBox/Main/idl/VirtualBox.xidl
index b4472bb330..ec1e4974c7 100644
--- a/src/VBox/Main/idl/VirtualBox.xidl
+++ b/src/VBox/Main/idl/VirtualBox.xidl
@@ -8055,6 +8055,12 @@
       </desc>
     </attribute>
 
+    <attribute name="VFIODeviceAssignments" type="wstring" readonly="yes" safearray="yes">
+      <desc>
+        Array of VFIO Device paths, assigned to this machine.
+      </desc>
+    </attribute>
+
     <attribute name="bandwidthControl" type="IBandwidthControl" readonly="yes">
       <desc>
         Bandwidth control manager.
@@ -9207,6 +9213,24 @@
       </param>
     </method>
 
+    <method name="attachVFIODevice">
+      <desc>
+        Attach a host VFIO device from the virtual machine.
+      </desc>
+      <param name="hostFileName" type="wstring" dir="in">
+        <desc> Absolute path to the device file in sysfs.</desc>
+      </param>
+    </method>
+
+    <method name="detachVFIODevice">
+      <desc>
+        Detach a host VFIO device from the virtual machine.
+      </desc>
+      <param name="hostFileName" type="wstring" dir="in">
+        <desc> Absolute path to the device file in sysfs.</desc>
+      </param>
+    </method>
+
     <method name="getNetworkAdapter" const="yes">
       <rest request="get" path="/vms/{vmid}/configuration/"/>
       <desc>
diff --git a/src/VBox/Main/include/ConsoleImpl.h b/src/VBox/Main/include/ConsoleImpl.h
index 366f1b39ad..1d3e8155d6 100644
--- a/src/VBox/Main/include/ConsoleImpl.h
+++ b/src/VBox/Main/include/ConsoleImpl.h
@@ -847,6 +847,7 @@ private:
                                                      bool fForce);
 
     HRESULT i_attachRawPCIDevices(PUVM pUVM, BusAssignmentManager *BusMgr, PCFGMNODE pDevices);
+    HRESULT i_attachVfioDevices(BusAssignmentManager *BusMgr, PCFGMNODE pDevices, PCVMMR3VTABLE pVMM);
     struct LEDSET;
     typedef struct LEDSET *PLEDSET;
     PPDMLED volatile *i_getLedSet(uint32_t iLedSet);
diff --git a/src/VBox/Main/include/MachineImpl.h b/src/VBox/Main/include/MachineImpl.h
index dc11e96d59..3b66f995c5 100644
--- a/src/VBox/Main/include/MachineImpl.h
+++ b/src/VBox/Main/include/MachineImpl.h
@@ -339,6 +339,8 @@ public:
         typedef std::list<ComObjPtr<PCIDeviceAttachment> > PCIDeviceAssignmentList;
         PCIDeviceAssignmentList mPCIDeviceAssignments;
 
+        std::vector<Utf8Str> mVFIODeviceAssignments;
+
         settings::Debugging mDebugging;
         settings::Autostart mAutostart;
 
@@ -1011,6 +1013,7 @@ private:
     HRESULT getIOCacheSize(ULONG *aIOCacheSize);
     HRESULT setIOCacheSize(ULONG aIOCacheSize);
     HRESULT getPCIDeviceAssignments(std::vector<ComPtr<IPCIDeviceAttachment> > &aPCIDeviceAssignments);
+    HRESULT getVFIODeviceAssignments(std::vector<com::Utf8Str> &aVFIODeviceAssignments);
     HRESULT getBandwidthControl(ComPtr<IBandwidthControl> &aBandwidthControl);
     HRESULT getTracingEnabled(BOOL *aTracingEnabled);
     HRESULT setTracingEnabled(BOOL aTracingEnabled);
@@ -1110,6 +1113,8 @@ private:
                                 LONG aDesiredGuestAddress,
                                 BOOL aTryToUnbind);
     HRESULT detachHostPCIDevice(LONG aHostAddress);
+    HRESULT attachVFIODevice(const com::Utf8Str &aDevicePath);
+    HRESULT detachVFIODevice(const com::Utf8Str &aDevicePath);
     HRESULT getNetworkAdapter(ULONG aSlot,
                               ComPtr<INetworkAdapter> &aAdapter);
     HRESULT addStorageController(const com::Utf8Str &aName,
diff --git a/src/VBox/Main/src-client/BusAssignmentManager.cpp b/src/VBox/Main/src-client/BusAssignmentManager.cpp
index 9f87323810..dbdffead42 100644
--- a/src/VBox/Main/src-client/BusAssignmentManager.cpp
+++ b/src/VBox/Main/src-client/BusAssignmentManager.cpp
@@ -109,17 +109,18 @@ static const DeviceAssignmentRule g_aGenericRules[] =
 #endif
 
     /* Network controllers */
-    /* the first network card gets the PCI ID 3, the next 3 gets 8..10,
-     * next 4 get 16..19. In "VMWare compatibility" mode the IDs 3 and 17
-     * swap places, i.e. the first card goes to ID 17=0x11. */
+    /* the first network card gets the PCI ID 3, the next 3 gets 8..10 */
+
     {"nic",           0,  3,  0, 1},
     {"nic",           0,  8,  0, 1},
     {"nic",           0,  9,  0, 1},
     {"nic",           0, 10,  0, 1},
-    {"nic",           0, 16,  0, 1},
-    {"nic",           0, 17,  0, 1},
-    {"nic",           0, 18,  0, 1},
-    {"nic",           0, 19,  0, 1},
+
+    /* Vfio Devices */
+    {"vfio",           0, 16,  0, 1},
+    {"vfio",           0, 17,  0, 1},
+    {"vfio",           0, 18,  0, 1},
+    {"vfio",           0, 19,  0, 1},
 
     /* ISA/LPC controller */
     {"lpc",           0, 31,  0, 0},
diff --git a/src/VBox/Main/src-client/ConsoleImplConfigCommon.cpp b/src/VBox/Main/src-client/ConsoleImplConfigCommon.cpp
index f0b2ee0f0f..6ebebb28cc 100644
--- a/src/VBox/Main/src-client/ConsoleImplConfigCommon.cpp
+++ b/src/VBox/Main/src-client/ConsoleImplConfigCommon.cpp
@@ -536,7 +536,6 @@ int Console::i_configConstructorInner(PUVM pUVM, PVM pVM, PCVMMR3VTABLE pVMM, Au
     return VERR_PLATFORM_ARCH_NOT_SUPPORTED;
 }
 
-
 /**
  * Configures an audio driver via CFGM by getting (optional) values from extra data.
  *
diff --git a/src/VBox/Main/src-server/MachineImpl.cpp b/src/VBox/Main/src-server/MachineImpl.cpp
index 59fc285582..0fb5e25735 100644
--- a/src/VBox/Main/src-server/MachineImpl.cpp
+++ b/src/VBox/Main/src-server/MachineImpl.cpp
@@ -6423,6 +6423,80 @@ HRESULT Machine::getPCIDeviceAssignments(std::vector<ComPtr<IPCIDeviceAttachment
     return S_OK;
 }
 
+HRESULT Machine::attachVFIODevice(const com::Utf8Str &aDevicePath)
+{
+    AutoWriteLock alock(this COMMA_LOCKVAL_SRC_POS);
+
+    HRESULT hrc = i_checkStateDependency(MutableStateDep);
+
+    if (not SUCCEEDED(hrc))
+    {
+        return hrc;
+    }
+
+    auto search_fn = [&aDevicePath] (const com::Utf8Str& path)
+    {
+        return aDevicePath == path;
+    };
+
+    auto it {std::find_if(mHWData->mVFIODeviceAssignments.begin(), mHWData->mVFIODeviceAssignments.end(), search_fn)};
+
+    if (it != mHWData->mVFIODeviceAssignments.end())
+    {
+        return setError(E_INVALIDARG, tr("The VFIO device %s is already attached"), aDevicePath);
+    }
+
+    hrc = mHWData.backupEx();
+    if (not SUCCEEDED(hrc)) {
+        return hrc;
+    }
+
+    mHWData->mVFIODeviceAssignments.emplace_back(aDevicePath);
+    return S_OK;
+}
+
+HRESULT Machine::detachVFIODevice(const com::Utf8Str &aDevicePath)
+{
+    AutoWriteLock alock(this COMMA_LOCKVAL_SRC_POS);
+
+    HRESULT hrc = i_checkStateDependency(MutableStateDep);
+
+    if (not SUCCEEDED(hrc))
+    {
+        return hrc;
+    }
+
+    auto search_fn = [&aDevicePath] (const com::Utf8Str& path)
+    {
+        return aDevicePath == path;
+    };
+
+    hrc = mHWData.backupEx();
+
+    if (not SUCCEEDED(hrc)) {
+        return hrc;
+    }
+
+    auto it {std::find_if(mHWData->mVFIODeviceAssignments.begin(), mHWData->mVFIODeviceAssignments.end(), search_fn)};
+
+    if (it == mHWData->mVFIODeviceAssignments.end())
+    {
+        return setError(VBOX_E_OBJECT_NOT_FOUND, tr("No VFIO device %s attached"), aDevicePath);
+    }
+
+    mHWData->mVFIODeviceAssignments.erase(it);
+
+    return S_OK;
+}
+
+HRESULT Machine::getVFIODeviceAssignments(std::vector<com::Utf8Str>& aVFIODeviceAssignments)
+{
+    AutoReadLock alock(this COMMA_LOCKVAL_SRC_POS);
+
+    std::copy(mHWData->mVFIODeviceAssignments.begin(), mHWData->mVFIODeviceAssignments.end(), std::back_inserter(aVFIODeviceAssignments));
+    return S_OK;
+}
+
 HRESULT Machine::getBandwidthControl(ComPtr<IBandwidthControl> &aBandwidthControl)
 {
     mBandwidthControl.queryInterfaceTo(aBandwidthControl.asOutParam());
@@ -8853,6 +8927,12 @@ HRESULT Machine::i_loadHardware(const Guid *puuidRegistry,
             mHWData->mPCIDeviceAssignments.push_back(pda);
         }
 
+        // VFIO Devices
+        for (auto deviceAssignment : data.vfioAttachments)
+        {
+            mHWData->mVFIODeviceAssignments.push_back(deviceAssignment.strDevicePath);
+        }
+
         /*
          * (The following isn't really real hardware, but it lives in HWData
          * for reasons of convenience.)
@@ -10234,6 +10314,17 @@ HRESULT Machine::i_saveHardware(settings::Hardware &data, settings::Debugging *p
             data.pciAttachments.push_back(hpda);
         }
 
+        /* VFIO Devices */
+        data.vfioAttachments.clear();
+        for (auto devStr : mHWData->mVFIODeviceAssignments)
+        {
+            settings::VFIODeviceAttachment vfioda;
+
+            vfioda.strDevicePath = devStr;
+
+            data.vfioAttachments.push_back(vfioda);
+        }
+
         // guest properties
         data.llGuestProperties.clear();
 #ifdef VBOX_WITH_GUEST_PROPS
diff --git a/src/VBox/Main/xml/Settings.cpp b/src/VBox/Main/xml/Settings.cpp
index ea46a45ab8..a75cde069c 100644
--- a/src/VBox/Main/xml/Settings.cpp
+++ b/src/VBox/Main/xml/Settings.cpp
@@ -3967,6 +3967,22 @@ bool HostPCIDeviceAttachment::operator==(const HostPCIDeviceAttachment &a) const
             && strDeviceName  == a.strDeviceName);
 }
 
+/**
+ * VFIODeviceAttachment Constructor.
+ */
+VFIODeviceAttachment::VFIODeviceAttachment() {}
+
+/**
+ * Comparison operator. This gets called from MachineConfigFile::operator==,
+ * which in turn gets called from Machine::saveSettings to figure out whether
+ * machine settings have really changed and thus need to be written out to disk.
+ */
+bool VFIODeviceAttachment::operator==(const VFIODeviceAttachment &a) const
+{
+    return (this == &a)
+        || (strDevicePath == a.strDevicePath);
+}
+
 #ifdef VBOX_WITH_VIRT_ARMV8
 PlatformARM::PlatformARM()
 {
@@ -4214,6 +4230,7 @@ bool Hardware::operator==(const Hardware& h) const
             && llGuestProperties              == h.llGuestProperties
             && ioSettings                     == h.ioSettings
             && pciAttachments                 == h.pciAttachments
+            && vfioAttachments                == h.vfioAttachments
             && strDefaultFrontend             == h.strDefaultFrontend);
 }
 
@@ -6125,6 +6142,26 @@ void MachineConfigFile::readHardware(const xml::ElementNode &elmHardware,
                 }
             }
         }
+        else if (pelmHwChild->nameEquals("Vfio"))
+        {
+            const xml::ElementNode *pelmDevices;
+            if ((pelmDevices = pelmHwChild->findChildElement("Devices")))
+            {
+                xml::NodesLoop nl2(*pelmDevices, "Device");
+                const xml::ElementNode *pelmDevice;
+                while ((pelmDevice = nl2.forAllNodes()))
+                {
+                    VFIODeviceAttachment vfioda;
+
+                    if (!pelmDevice->getAttributeValue("devicePath", vfioda.strDevicePath))
+                    {
+                        throw ConfigFileError(this, pelmDevice, N_("Missing Device/@devicePath attribute"));
+                    }
+
+                    hw.vfioAttachments.push_back(vfioda);
+                }
+            }
+        }
         else if (pelmHwChild->nameEquals("EmulatedUSB"))
         {
             const xml::ElementNode *pelmCardReader;
@@ -8438,6 +8475,20 @@ void MachineConfigFile::buildHardwareXML(xml::ElementNode &elmParent,
         }
     }
 
+    if (   m->sv >= SettingsVersion_v1_17
+        && hw.vfioAttachments.size())
+    {
+        xml::ElementNode *pelmVFIO = pelmHardware->createChild("Vfio");
+        xml::ElementNode *pelmVFIODevices = pelmVFIO->createChild("Devices");
+
+        for (auto deviceAssignment : hw.vfioAttachments)
+        {
+            xml::ElementNode *pelmThis = pelmVFIODevices->createChild("Device");
+
+            pelmThis->setAttribute("devicePath",  deviceAssignment.strDevicePath);
+        }
+    }
+
     if (   m->sv >= SettingsVersion_v1_12
         && hw.fEmulatedUSBCardReader)
     {
@@ -9729,6 +9780,12 @@ void MachineConfigFile::bumpSettingsVersionIfNeeded()
                 return;
             }
         }
+
+        if (hardwareMachine.vfioAttachments.size() > 0)
+        {
+            m->sv = SettingsVersion_v1_17;
+            return;
+        }
     }
 
     if (m->sv < SettingsVersion_v1_16)
diff --git a/src/VBox/Runtime/VBox/log-vbox.cpp b/src/VBox/Runtime/VBox/log-vbox.cpp
index c1edf5a81b..522fd4c0cd 100644
--- a/src/VBox/Runtime/VBox/log-vbox.cpp
+++ b/src/VBox/Runtime/VBox/log-vbox.cpp
@@ -272,6 +272,7 @@ RTDECL(PRTLOGGER) RTLogDefaultInit(void)
     ASSERT_LOG_GROUP(DEV_SB16);
     ASSERT_LOG_GROUP(DEV_SERIAL);
     ASSERT_LOG_GROUP(DEV_SMC);
+    ASSERT_LOG_GROUP(DEV_VFIO);
     ASSERT_LOG_GROUP(DEV_VGA);
     ASSERT_LOG_GROUP(DEV_VIRTIO);
     ASSERT_LOG_GROUP(DEV_VIRTIO_NET);
-- 
2.45.0
openSUSE Build Service is sponsored by