File 20081-acs-filter.patch of Package xen
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1250683936 -3600
# Node ID 4a517458406ffdebf4f84d9f09a733de0bea6b22
# Parent c0576bd2ddfdfc7d6334b971490adfea60f6ee9f
xend: passthrough: check if a device is behind PCIe switch that lacks ACS
Imagine a PCIe switch, which doesn't support ACS (Access Control
Services), has 2 downstream ports: A and B, according to PCIe spec,
the PCIe switch should directly route the transaction that is from A
and to a device under B -- the Root Complex and IOMMU engine are
bypassed -- this doesn't work at all in the case of hvm guest and can
even incur potential security issue, so we should not allow such kind
of device assignment.
If all the intermediate PCIe swiches between a device and Root Complex
support and enable ACS, we can safely asssign the device to guest.
Cc: Allen Kay <allen.m.kay@intel.com>
Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
Index: xen-3.3.1-testing/tools/python/xen/util/pci.py
===================================================================
--- xen-3.3.1-testing.orig/tools/python/xen/util/pci.py
+++ xen-3.3.1-testing/tools/python/xen/util/pci.py
@@ -58,12 +58,18 @@ PCI_BRIDGE_CTL_BUS_RESET= 0x40
PCI_CAP_ID_EXP = 0x10
PCI_EXP_FLAGS = 0x2
PCI_EXP_FLAGS_TYPE = 0x00f0
+PCI_EXP_TYPE_DOWNSTREAM = 0x6
PCI_EXP_TYPE_PCI_BRIDGE = 0x7
PCI_EXP_DEVCAP = 0x4
PCI_EXP_DEVCAP_FLR = (0x1 << 28)
PCI_EXP_DEVCTL = 0x8
PCI_EXP_DEVCTL_FLR = (0x1 << 15)
+PCI_EXT_CAP_ID_ACS = 0x000d
+PCI_EXT_CAP_ACS_ENABLED = 0x1d # The bits V, R, C, U.
+PCI_EXT_ACS_CTRL = 0x06
+
+
PCI_CAP_ID_PM = 0x01
PCI_PM_CTRL = 4
PCI_PM_CTRL_NO_SOFT_RESET = 0x0004
@@ -382,10 +388,15 @@ class PciDevice:
self.subvendorname = ""
self.subdevicename = ""
self.dev_type = None
+ self.is_downstream_port = False
+ self.acs_enabled = False
self.has_non_page_aligned_bar = False
self.pcie_flr = False
self.pci_af_flr = False
self.detect_dev_info()
+ if (self.dev_type == DEV_TYPE_PCI_BRIDGE) or \
+ (self.dev_type == DEV_TYPE_PCIe_BRIDGE):
+ return
self.get_info_from_sysfs()
self.get_info_from_lspci()
@@ -588,6 +599,51 @@ class PciDevice:
(strerr, errno)))
return pos
+ def find_ext_cap(self, cap):
+ path = find_sysfs_mnt()+SYSFS_PCI_DEVS_PATH+'/'+ \
+ self.name+SYSFS_PCI_DEV_CONFIG_PATH
+
+ ttl = 480; # 3840 bytes, minimum 8 bytes per capability
+ pos = 0x100
+
+ try:
+ fd = os.open(path, os.O_RDONLY)
+ os.lseek(fd, pos, 0)
+ h = os.read(fd, 4)
+ if len(h) == 0: # MMCONF is not enabled?
+ return 0
+ header = struct.unpack('I', h)[0]
+ if header == 0 or header == -1:
+ return 0
+
+ while ttl > 0:
+ if (header & 0x0000ffff) == cap:
+ return pos
+ pos = (header >> 20) & 0xffc
+ if pos < 0x100:
+ break
+ os.lseek(fd, pos, 0)
+ header = struct.unpack('I', os.read(fd, 4))[0]
+ ttl = ttl - 1
+ os.close(fd)
+ except OSError, (errno, strerr):
+ raise PciDeviceParseError(('Error when accessing sysfs: %s (%d)' %
+ (strerr, errno)))
+ return 0
+
+ def is_behind_switch_lacking_acs(self):
+ # If there is intermediate PCIe switch, which doesn't support ACS or
+ # doesn't enable ACS, between Root Complex and the function, we return
+ # True, meaning the function is not allowed to be assigned to guest due
+ # to potential security issue.
+ parent = self.find_parent()
+ while parent is not None:
+ dev_parent = PciDevice(parent)
+ if dev_parent.is_downstream_port and not dev_parent.acs_enabled:
+ return True
+ parent = dev_parent.find_parent()
+ return False
+
def pci_conf_read8(self, pos):
fd = os.open(self.cfg_space_path, os.O_RDONLY)
os.lseek(fd, pos, 0)
@@ -641,11 +697,19 @@ class PciDevice:
self.dev_type = DEV_TYPE_PCI_BRIDGE
else:
creg = self.pci_conf_read16(pos + PCI_EXP_FLAGS)
- if ((creg & PCI_EXP_TYPE_PCI_BRIDGE) >> 4) == \
- PCI_EXP_TYPE_PCI_BRIDGE:
+ type = (creg & PCI_EXP_FLAGS_TYPE) >> 4
+ if type == PCI_EXP_TYPE_PCI_BRIDGE:
self.dev_type = DEV_TYPE_PCI_BRIDGE
else:
self.dev_type = DEV_TYPE_PCIe_BRIDGE
+ if type == PCI_EXP_TYPE_DOWNSTREAM:
+ self.is_downstream_port = True
+ pos = self.find_ext_cap(PCI_EXT_CAP_ID_ACS)
+ if pos != 0:
+ ctrl = self.pci_conf_read16(pos + PCI_EXT_ACS_CTRL)
+ if (ctrl & PCI_EXT_CAP_ACS_ENABLED) == \
+ PCI_EXT_CAP_ACS_ENABLED
+ self.acs_enabled = True
else:
if pos != 0:
self.dev_type = DEV_TYPE_PCIe_ENDPOINT
Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -586,6 +586,14 @@ class XendDomainInfo:
log.error("Device model's pci dev num dismatch")
return
+ # Check if there is intermediate PCIe switch bewteen the device and
+ # Root Complex.
+ if pci_device.is_behind_switch_lacking_acs():
+ err_msg = 'pci: to avoid potential security issue, %s is not'+\
+ ' allowed to be assigned to guest since it is behind'+\
+ ' PCIe switch that does not support or enable ACS.'
+ raise VmError(err_msg % pci_device.name)
+
#update the vslot info
count = 0;
for x in pci_devs:
Index: xen-3.3.1-testing/tools/python/xen/xend/server/pciif.py
===================================================================
--- xen-3.3.1-testing.orig/tools/python/xen/xend/server/pciif.py
+++ xen-3.3.1-testing/tools/python/xen/xend/server/pciif.py
@@ -380,6 +380,15 @@ class PciController(DevController):
except Exception, e:
raise VmError("pci: failed to locate device and "+
"parse it's resources - "+str(e))
+
+ # Check if there is intermediate PCIe switch bewteen the device and
+ # Root Complex.
+ if self.vm.info.is_hvm() and dev.is_behind_switch_lacking_acs():
+ err_msg = 'pci: to avoid potential security issue, %s is not'+\
+ ' allowed to be assigned to guest since it is behind'+\
+ ' PCIe switch that does not support or enable ACS.'
+ raise VmError(err_msg % dev.name)
+
if (dev.dev_type == DEV_TYPE_PCIe_ENDPOINT) and not dev.pcie_flr:
if dev.bus == 0:
# We cope with this case by using the Dstate transition