File 5d9ee312-AMD-IOMMU-prefill-all-DTEs.patch of Package xen.18780
# Commit 1b00c16bdfbec98887731a40ea9f377f7dcac405
# Date 2019-10-10 09:51:46 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
AMD/IOMMU: pre-fill all DTEs right after table allocation
Make sure we don't leave any DTEs unexpected requests through which
would be passed through untranslated. Set V and IV right away (with
all other fields left as zero), relying on the V and/or IV bits
getting cleared only by amd_iommu_set_root_page_table() and
amd_iommu_set_intremap_table() under special pass-through circumstances.
Switch back to initial settings in amd_iommu_disable_domain_device().
Take the liberty and also make the latter function static, constifying
its first parameter at the same time, at this occasion.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
# Commit f06d11d5a8339bb5a525069fdffdb45ca811d6f8
# Date 2019-11-15 14:17:26 +0100
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
AMD/IOMMU: restore DTE fields in amd_iommu_setup_domain_device()
Commit 1b00c16bdf ("AMD/IOMMU: pre-fill all DTEs right after table
allocation") moved ourselves into a more secure default state, but
didn't take sufficient care to also undo the effects when handing a
previously disabled device back to a(nother) domain. Put the fields
that may have been changed elsewhere back to their intended values
(some fields amd_iommu_disable_domain_device() touches don't
currently get written anywhere else, and hence don't need modifying
here).
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
# Commit 195b79a97e6721ba8830036f47d2454545f32e44
# Date 2019-11-27 11:28:24 +0000
# Author Igor Druzhinin <igor.druzhinin@citrix.com>
# Committer Andrew Cooper <andrew.cooper3@citrix.com>
AMD/IOMMU: honour IR setting while pre-filling DTEs
IV bit shouldn't be set in DTE if interrupt remapping is not
enabled. It's a regression in behavior of "iommu=no-intremap"
option which otherwise would keep interrupt requests untranslated
for all of the devices in the system regardless of wether it's
described as valid in IVRS or not.
Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -1271,12 +1271,40 @@ static int __init amd_iommu_setup_device
 
     if ( !dt )
     {
+        unsigned int size = dt_alloc_size();
+
         /* allocate 'device table' on a 4K boundary */
         dt = IVRS_MAPPINGS_DEVTAB(ivrs_mappings) =
-            allocate_buffer(dt_alloc_size(), "Device Table", true);
+            allocate_buffer(size, "Device Table", false);
+        if ( !dt )
+            return -ENOMEM;
+
+        /*
+         * Prefill every DTE such that all kinds of requests will get aborted.
+         * Besides the two bits set to true below this builds upon
+         * IOMMU_DEV_TABLE_SYS_MGT_DMA_ABORTED,
+         * IOMMU_DEV_TABLE_IO_CONTROL_ABORTED, as well as
+         * IOMMU_DEV_TABLE_INT_CONTROL_ABORTED all being zero, and us also
+         * wanting at least TV, GV, I, and EX set to false.
+         */
+        for ( bdf = 0, size /= IOMMU_DEV_TABLE_ENTRY_SIZE; bdf < size; ++bdf )
+        {
+            uint32_t *dte = dt + bdf * IOMMU_DEV_TABLE_ENTRY_SIZE;
+
+            set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
+                                 IOMMU_DEV_TABLE_VALID_MASK,
+                                 IOMMU_DEV_TABLE_VALID_SHIFT, &dte[0]);
+            dte[1] = 0;
+            dte[2] = 0;
+            dte[3] = 0;
+            set_field_in_reg_u32(iommu_intremap, 0,
+                                 IOMMU_DEV_TABLE_INT_VALID_MASK,
+                                 IOMMU_DEV_TABLE_INT_VALID_SHIFT, &dte[4]);
+            dte[5] = 0;
+            dte[6] = 0;
+            dte[7] = 0;
+        }
     }
-    if ( !dt )
-        return -ENOMEM;
 
     /* Add device table entries */
     for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -99,11 +99,60 @@ static void disable_translation(u32 *dte
 {
     u32 entry;
 
+    /* See the comment in amd_iommu_setup_device_table(). */
+    entry = dte[5];
+    set_field_in_reg_u32(IOMMU_DEV_TABLE_INT_CONTROL_ABORTED, entry,
+                         IOMMU_DEV_TABLE_INT_CONTROL_MASK,
+                         IOMMU_DEV_TABLE_INT_CONTROL_SHIFT, &entry);
+    dte[5] = entry;
+
+    smp_wmb();
+
     entry = dte[0];
     set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
                          IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
                          IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT, &entry);
+    dte[0] = entry;
+
+    entry = dte[1];
+    set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_DEV_TABLE_GV_MASK,
+                         IOMMU_DEV_TABLE_GV_SHIFT, &entry);
+    dte[1] = entry;
+
+    iommu_dte_set_iotlb(dte, IOMMU_CONTROL_DISABLED);
+
+    entry = dte[3];
+    set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_DEV_TABLE_SUPRESS_LOGGED_PAGES_MASK,
+                         IOMMU_DEV_TABLE_SUPRESS_LOGGED_PAGES_SHIFT, &entry);
+    set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_DEV_TABLE_SUPRESS_ALL_PAGES_MASK,
+                         IOMMU_DEV_TABLE_SUPRESS_ALL_PAGES_SHIFT, &entry);
+    set_field_in_reg_u32(IOMMU_DEV_TABLE_IO_CONTROL_ABORTED, entry,
+                         IOMMU_DEV_TABLE_IO_CONTROL_MASK,
+                         IOMMU_DEV_TABLE_IO_CONTROL_SHIFT, &entry);
+    set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_DEV_TABLE_SNOOP_DISABLE_MASK,
+                         IOMMU_DEV_TABLE_SNOOP_DISABLE_SHIFT, &entry);
     set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
+                         IOMMU_DEV_TABLE_ALLOW_EXCLUSION_MASK,
+                         IOMMU_DEV_TABLE_ALLOW_EXCLUSION_SHIFT, &entry);
+    set_field_in_reg_u32(IOMMU_DEV_TABLE_SYS_MGT_DMA_ABORTED, entry,
+                         IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_MASK,
+                         IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_SHIFT, &entry);
+    dte[3] = entry;
+
+    entry = dte[4];
+    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+                         IOMMU_DEV_TABLE_INT_VALID_MASK,
+                         IOMMU_DEV_TABLE_INT_VALID_SHIFT, &entry);
+    dte[4] = entry;
+
+    smp_wmb();
+
+    entry = dte[0];
+    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
                          IOMMU_DEV_TABLE_VALID_MASK,
                          IOMMU_DEV_TABLE_VALID_SHIFT, &entry);
     dte[0] = entry;
@@ -113,7 +162,7 @@ static void amd_iommu_setup_domain_devic
     struct domain *domain, struct amd_iommu *iommu,
     u8 devfn, struct pci_dev *pdev)
 {
-    void *dte;
+    u32 *dte;
     unsigned long flags;
     int req_id, valid = 1;
     int dte_i = 0;
@@ -135,16 +184,53 @@ static void amd_iommu_setup_domain_devic
 
     spin_lock_irqsave(&iommu->lock, flags);
 
-    if ( !is_translation_valid((u32 *)dte) )
+    if ( !is_translation_valid(dte) )
     {
+        const struct ivrs_mappings *ivrs_dev;
+        u32 entry;
+
         /* bind DTE to domain page-tables */
         amd_iommu_set_root_page_table(
-            (u32 *)dte, page_to_maddr(hd->arch.root_table), domain->domain_id,
+            dte, page_to_maddr(hd->arch.root_table), domain->domain_id,
             hd->arch.paging_mode, valid);
 
+        /* Undo what disable_translation() may have done. */
+
+        entry = dte[5];
+        if ( get_field_from_reg_u32(dte[4],
+                                    IOMMU_DEV_TABLE_INT_TABLE_PTR_LOW_MASK,
+                                    IOMMU_DEV_TABLE_INT_TABLE_PTR_LOW_SHIFT) |
+             get_field_from_reg_u32(entry,
+                                    IOMMU_DEV_TABLE_INT_TABLE_PTR_HIGH_MASK,
+                                    IOMMU_DEV_TABLE_INT_TABLE_PTR_HIGH_SHIFT) )
+        {
+            set_field_in_reg_u32(IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED, entry,
+                                 IOMMU_DEV_TABLE_INT_CONTROL_MASK,
+                                 IOMMU_DEV_TABLE_INT_CONTROL_SHIFT, &entry);
+            dte[5] = entry;
+            smp_wmb();
+        }
+
+        entry = dte[4];
+        set_field_in_reg_u32(iommu_intremap, entry,
+                             IOMMU_DEV_TABLE_INT_VALID_MASK,
+                             IOMMU_DEV_TABLE_INT_VALID_SHIFT, &entry);
+        dte[4] = entry;
+
+        ivrs_dev = &get_ivrs_mappings(iommu->seg)[req_id];
+        entry = dte[3];
+        set_field_in_reg_u32(ivrs_dev->dte_allow_exclusion, entry,
+                             IOMMU_DEV_TABLE_ALLOW_EXCLUSION_MASK,
+                             IOMMU_DEV_TABLE_ALLOW_EXCLUSION_SHIFT, &entry);
+        set_field_in_reg_u32(MASK_EXTR(ivrs_dev->device_flags,
+                                       ACPI_IVHD_SYSTEM_MGMT),
+                             entry, IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_MASK,
+                             IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_SHIFT, &entry);
+        dte[3] = entry;
+
         if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
              iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
-            iommu_dte_set_iotlb((u32 *)dte, dte_i);
+            iommu_dte_set_iotlb(dte, dte_i);
 
         amd_iommu_flush_device(iommu, req_id);
 
@@ -287,9 +373,9 @@ static void __hwdom_init amd_iommu_hwdom
     setup_hwdom_pci_devices(d, amd_iommu_add_device);
 }
 
-void amd_iommu_disable_domain_device(struct domain *domain,
-                                     struct amd_iommu *iommu,
-                                     u8 devfn, struct pci_dev *pdev)
+static void amd_iommu_disable_domain_device(const struct domain *domain,
+                                            struct amd_iommu *iommu,
+                                            uint8_t devfn, struct pci_dev *pdev)
 {
     void *dte;
     unsigned long flags;
@@ -305,10 +391,6 @@ void amd_iommu_disable_domain_device(str
     {
         disable_translation((u32 *)dte);
 
-        if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
-             iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
-            iommu_dte_set_iotlb((u32 *)dte, 0);
-
         amd_iommu_flush_device(iommu, req_id);
 
         AMD_IOMMU_DEBUG("Disable: device id = %#x, "