File 19415-vtd-dom0-s3.patch of Package xen
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1237541817 0
# Node ID 115c164721dce0edfec63db3ea0fff492829ab88
# Parent bbfcea821a0ded028c1a81101b5992cafca32f0b
vtd: fix Dom0 S3 when VT-d is enabled.
On some platforms that support Queued Invalidation and Interrupt
Remapping, Dom0 S3 doesn't work. The patch fixes the issue.
1) In device_power_down(), we should invoke iommu_suspend() after
ioapic_suspend(); in device_power_up(), we should invoke
iommu_resume() before ioapic_resume().
2) Add 2 functions: disable_qinval() and disable_intremap(); in
iommu_suspend(), we invoke them and iommu_disable_translation().
Rename qinval_setup() to enable_qinval() and rename
intremap_setup() to enable_intremap().
3) In iommu_resume(), remove the unnecessary
iommu_flush_{context, iotlb}_global() -- actually we mustn't do that
if Queued Invalidation was enabled before S3 because at this point of
S3 resume, Queued Invalidation hasn't been re-enabled.
4) Add a static global array ioapic_pin_to_intremap_index[] to
remember what intremap_index an ioapic pin uses -- during S3 resume,
ioapic_resume() re-writes all the ioapic RTEs, so we can use the array
to re-use the previously-allocated IRTE;
5) Some cleanups:
a) Change some failure handlings in enable_intremap() to panic().
b) Remove the unnecessary local variable iec_cap in
__iommu_flush_iec().
c) Add a dmar_writeq(iommu->reg, DMAR_IQT_REG, 0) in
enable_qinval().
Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
--- a/xen/arch/x86/acpi/power.c
+++ b/xen/arch/x86/acpi/power.c
@@ -44,16 +44,16 @@ void do_suspend_lowlevel(void);
static int device_power_down(void)
{
- iommu_suspend();
-
console_suspend();
time_suspend();
i8259A_suspend();
-
+
ioapic_suspend();
-
+
+ iommu_suspend();
+
lapic_suspend();
return 0;
@@ -62,16 +62,16 @@ static int device_power_down(void)
static void device_power_up(void)
{
lapic_resume();
-
+
+ iommu_resume();
+
ioapic_resume();
i8259A_resume();
-
+
time_resume();
console_resume();
-
- iommu_resume();
}
static void freeze_domains(void)
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -30,8 +30,10 @@ void print_iommu_regs(struct acpi_drhd_u
void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn);
void dump_iommu_info(unsigned char key);
-int qinval_setup(struct iommu *iommu);
-int intremap_setup(struct iommu *iommu);
+int enable_qinval(struct iommu *iommu);
+void disable_qinval(struct iommu *iommu);
+int enable_intremap(struct iommu *iommu);
+void disable_intremap(struct iommu *iommu);
int queue_invalidate_context(struct iommu *iommu,
u16 did, u16 source_id, u8 function_mask, u8 granu);
int queue_invalidate_iotlb(struct iommu *iommu,
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -30,6 +30,15 @@
#include "vtd.h"
#include "extern.h"
+/* The max number of IOAPIC (or IOSAPIC) pin. The typical values can be 24 or
+ * 48 on x86 and Itanium platforms. Here we use a biger number 256. This
+ * should be big enough. Actually now IREMAP_ENTRY_NR is also 256.
+ */
+#define MAX_IOAPIC_PIN_NUM 256
+
+static int ioapic_pin_to_intremap_index[MAX_IOAPIC_PIN_NUM] =
+ { [0 ... MAX_IOAPIC_PIN_NUM-1] = -1 };
+
u16 apicid_to_bdf(int apic_id)
{
struct acpi_drhd_unit *drhd = ioapic_to_drhd(apic_id);
@@ -90,7 +99,7 @@ static int remap_entry_to_ioapic_rte(
}
static int ioapic_rte_to_remap_entry(struct iommu *iommu,
- int apic_id, struct IO_APIC_route_entry *old_rte,
+ int apic_id, unsigned int ioapic_pin, struct IO_APIC_route_entry *old_rte,
unsigned int rte_upper, unsigned int value)
{
struct iremap_entry *iremap_entry = NULL, *iremap_entries;
@@ -104,13 +113,14 @@ static int ioapic_rte_to_remap_entry(str
remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
- if ( remap_rte->format == 0 )
+ if ( ioapic_pin_to_intremap_index[ioapic_pin] < 0 )
{
ir_ctrl->iremap_index++;
index = ir_ctrl->iremap_index;
+ ioapic_pin_to_intremap_index[ioapic_pin] = index;
}
else
- index = (remap_rte->index_15 << 15) | remap_rte->index_0_14;
+ index = ioapic_pin_to_intremap_index[ioapic_pin];
if ( index > IREMAP_ENTRY_NR - 1 )
{
@@ -222,6 +232,7 @@ unsigned int io_apic_read_remap_rte(
void io_apic_write_remap_rte(
unsigned int apic, unsigned int reg, unsigned int value)
{
+ unsigned int ioapic_pin = (reg - 0x10) / 2;
struct IO_APIC_route_entry old_rte = { 0 };
struct IO_APIC_route_remap_entry *remap_rte;
unsigned int rte_upper = (reg & 1) ? 1 : 0;
@@ -279,8 +290,9 @@ void io_apic_write_remap_rte(
*(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0);
remap_rte->mask = saved_mask;
+ ASSERT(ioapic_pin < MAX_IOAPIC_PIN_NUM);
if ( ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid,
- &old_rte, rte_upper, value) )
+ ioapic_pin, &old_rte, rte_upper, value) )
{
*IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
*(IO_APIC_BASE(apic)+4) = value;
@@ -467,13 +479,12 @@ void msi_msg_write_remap_rte(
msi_msg_to_remap_entry(iommu, pdev, msi_desc, msg);
}
-int intremap_setup(struct iommu *iommu)
+int enable_intremap(struct iommu *iommu)
{
struct ir_ctrl *ir_ctrl;
s_time_t start_time;
- if ( !ecap_intr_remap(iommu->ecap) )
- return -ENODEV;
+ ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
ir_ctrl = iommu_ir_ctrl(iommu);
if ( ir_ctrl->iremap_maddr == 0 )
@@ -506,11 +517,7 @@ int intremap_setup(struct iommu *iommu)
while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_SIRTPS) )
{
if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
- {
- dprintk(XENLOG_ERR VTDPREFIX,
- "Cannot set SIRTP field for interrupt remapping\n");
- return -ENODEV;
- }
+ panic("Cannot set SIRTP field for interrupt remapping\n");
cpu_relax();
}
@@ -522,11 +529,7 @@ int intremap_setup(struct iommu *iommu)
while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_CFIS) )
{
if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
- {
- dprintk(XENLOG_ERR VTDPREFIX,
- "Cannot set CFI field for interrupt remapping\n");
- return -ENODEV;
- }
+ panic("Cannot set CFI field for interrupt remapping\n");
cpu_relax();
}
@@ -537,12 +540,8 @@ int intremap_setup(struct iommu *iommu)
start_time = NOW();
while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_IRES) )
{
- if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
- {
- dprintk(XENLOG_ERR VTDPREFIX,
- "Cannot set IRE field for interrupt remapping\n");
- return -ENODEV;
- }
+ if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
+ panic("Cannot set IRE field for interrupt remapping\n");
cpu_relax();
}
@@ -551,3 +550,21 @@ int intremap_setup(struct iommu *iommu)
return 0;
}
+
+void disable_intremap(struct iommu *iommu)
+{
+ s_time_t start_time;
+
+ ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
+
+ iommu->gcmd &= ~(DMA_GCMD_SIRTP | DMA_GCMD_CFI | DMA_GCMD_IRE);
+ dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
+
+ start_time = NOW();
+ while ( dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_IRES )
+ {
+ if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
+ panic("Cannot clear IRE field for interrupt remapping\n");
+ cpu_relax();
+ }
+}
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -682,7 +682,7 @@ static int iommu_enable_translation(stru
return 0;
}
-int iommu_disable_translation(struct iommu *iommu)
+static void iommu_disable_translation(struct iommu *iommu)
{
u32 sts;
unsigned long flags;
@@ -705,7 +705,6 @@ int iommu_disable_translation(struct iom
cpu_relax();
}
spin_unlock_irqrestore(&iommu->register_lock, flags);
- return 0;
}
static struct iommu *vector_to_iommu[NR_VECTORS];
@@ -1751,7 +1750,7 @@ static int init_vtd_hw(void)
for_each_drhd_unit ( drhd )
{
iommu = drhd->iommu;
- if ( qinval_setup(iommu) != 0 )
+ if ( enable_qinval(iommu) != 0 )
{
dprintk(XENLOG_INFO VTDPREFIX,
"Failed to enable Queued Invalidation!\n");
@@ -1765,7 +1764,7 @@ static int init_vtd_hw(void)
for_each_drhd_unit ( drhd )
{
iommu = drhd->iommu;
- if ( intremap_setup(iommu) != 0 )
+ if ( enable_intremap(iommu) != 0 )
{
dprintk(XENLOG_INFO VTDPREFIX,
"Failed to enable Interrupt Remapping!\n");
@@ -1965,6 +1964,14 @@ int iommu_suspend(void)
(u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
iommu_state[i][DMAR_FEUADDR_REG] =
(u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
+
+ iommu_disable_translation(iommu);
+
+ if ( iommu_intremap )
+ disable_intremap(iommu);
+
+ if ( iommu_qinval )
+ disable_qinval(iommu);
}
return 0;
@@ -1979,7 +1986,11 @@ int iommu_resume(void)
if ( !vtd_enabled )
return 0;
- iommu_flush_all();
+ /* Not sure whether the flush operation is required to meet iommu
+ * specification. Note that BIOS also executes in S3 resume and iommu may
+ * be touched again, so let us do the flush operation for safety.
+ */
+ wbinvd();
if ( init_vtd_hw() != 0 && force_iommu )
panic("IOMMU setup failed, crash Xen for security purpose!\n");
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -318,7 +318,6 @@ int queue_invalidate_iec(struct iommu *i
return ret;
}
-u64 iec_cap;
int __iommu_flush_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
{
int ret;
@@ -329,7 +328,7 @@ int __iommu_flush_iec(struct iommu *iomm
* reading vt-d architecture register will ensure
* draining happens in implementation independent way.
*/
- iec_cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
+ (void)dmar_readq(iommu->reg, DMAR_CAP_REG);
return ret;
}
@@ -414,7 +413,7 @@ static int flush_iotlb_qi(
return ret;
}
-int qinval_setup(struct iommu *iommu)
+int enable_qinval(struct iommu *iommu)
{
s_time_t start_time;
struct qi_ctrl *qi_ctrl;
@@ -423,8 +422,7 @@ int qinval_setup(struct iommu *iommu)
qi_ctrl = iommu_qi_ctrl(iommu);
flush = iommu_get_flush(iommu);
- if ( !ecap_queued_inval(iommu->ecap) )
- return -ENODEV;
+ ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
if ( qi_ctrl->qinval_maddr == 0 )
{
@@ -448,6 +446,8 @@ int qinval_setup(struct iommu *iommu)
*/
dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
+ dmar_writeq(iommu->reg, DMAR_IQT_REG, 0);
+
/* enable queued invalidation hardware */
iommu->gcmd |= DMA_GCMD_QIE;
dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
@@ -463,3 +463,22 @@ int qinval_setup(struct iommu *iommu)
return 0;
}
+
+void disable_qinval(struct iommu *iommu)
+{
+ s_time_t start_time;
+
+ ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
+
+ iommu->gcmd &= ~DMA_GCMD_QIE;
+ dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
+
+ /* Make sure hardware complete it */
+ start_time = NOW();
+ while ( dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_QIES )
+ {
+ if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
+ panic("Cannot clear QIE field for queue invalidation\n");
+ cpu_relax();
+ }
+}