File qemu-0.9.0-kvm.patch of Package qemu

2007-03-13  Gwenole Beauchesne  <gbeauchesne@mandriva.com>

	* Merge in KVM rev 4486. Requires kernel 2.6.17 >= 12mdv.

================================================================================
--- qemu-0.9.0/Makefile.target
+++ qemu-0.9.0/Makefile.target
@@ -1,5 +1,9 @@
+CFLAGS=
+LDFLAGS=
+
 include config.mak
 
+LDFLAGS_BASE:=$(LDFLAGS)
 TARGET_BASE_ARCH:=$(TARGET_ARCH)
 ifeq ($(TARGET_ARCH), x86_64)
 TARGET_BASE_ARCH:=i386
@@ -227,8 +231,8 @@
 OBJS+= libqemu.a
 
 # cpu emulator library
-LIBOBJS=exec.o kqemu.o translate-op.o translate-all.o cpu-exec.o\
-        translate.o op.o 
+LIBOBJS=exec.o kqemu.o qemu-kvm.o translate-op.o translate-all.o cpu-exec.o\
+        translate.o op.o
 ifdef CONFIG_SOFTFLOAT
 LIBOBJS+=fpu/softfloat.o
 else
@@ -365,6 +369,13 @@
 # PCI network cards
 VL_OBJS+= ne2000.o rtl8139.o pcnet.o
 
+# KVM layer
+ifeq ($(USE_KVM), yes)
+VL_OBJS+= kvmctl.o
+# PCI Hypercall
+VL_OBJS+= hypercall.o
+endif
+
 ifeq ($(TARGET_BASE_ARCH), i386)
 # Hardware support
 VL_OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o $(AUDIODRV)
@@ -429,7 +440,7 @@
 VL_OBJS+=$(addprefix slirp/, $(SLIRP_OBJS))
 endif
 
-VL_LDFLAGS=
+VL_LDFLAGS=$(LDFLAGS_BASE)
 # specific flags are needed for non soft mmu emulator
 ifdef CONFIG_STATIC
 VL_LDFLAGS+=-static
@@ -440,7 +451,7 @@
 ifndef CONFIG_DARWIN
 ifndef CONFIG_WIN32
 ifndef CONFIG_SOLARIS
-VL_LIBS=-lutil -lrt
+VL_LIBS=-lutil -lrt -luuid
 endif
 endif
 endif
@@ -462,7 +473,7 @@
 SDL_LIBS := $(filter-out -mwindows, $(SDL_LIBS)) -mconsole
 endif
 
-$(QEMU_SYSTEM): $(VL_OBJS) libqemu.a
+$(QEMU_SYSTEM): $(VL_OBJS) libqemu.a $(DEPLIBS)
 	$(CC) $(VL_LDFLAGS) -o $@ $^ $(LIBS) $(SDL_LIBS) $(COCOA_LIBS) $(VL_LIBS)
 
 cocoa.o: cocoa.m
@@ -521,6 +532,9 @@
 cpu-exec.o: cpu-exec.c
 	$(CC) $(HELPER_CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) -c -o $@ $<
 
+qemu-kvm.o: qemu-kvm.c
+	$(CC) $(HELPER_CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) -c -o $@ $<
+
 # Note: this is a workaround. The real fix is to avoid compiling
 # cpu_signal_handler() in cpu-exec.c.
 signal.o: signal.c
--- qemu-0.9.0/configure
+++ qemu-0.9.0/configure
@@ -89,7 +89,9 @@
 bsd="no"
 linux="no"
 kqemu="no"
+kvm="no"
 profiler="no"
+kernel_path=""
 cocoa="no"
 check_gfx="yes"
 check_gcc="yes"
@@ -114,6 +116,7 @@
 oss="yes"
 if [ "$cpu" = "i386" -o "$cpu" = "x86_64" ] ; then
     kqemu="yes"
+    kvm="yes"
 fi
 ;;
 NetBSD)
@@ -141,6 +144,7 @@
 linux_user="yes"
 if [ "$cpu" = "i386" -o "$cpu" = "x86_64" ] ; then
     kqemu="yes"
+    kvm="yes"
 fi
 ;;
 esac
@@ -232,8 +236,12 @@
   ;;
   --disable-kqemu) kqemu="no"
   ;;
+  --enable-kvm) kvm="yes"
+  ;;
   --enable-profiler) profiler="yes"
   ;;
+  --kernel-path=*) kernel_path="$optarg"
+  ;;
   --enable-cocoa) cocoa="yes" ; coreaudio="yes" ; sdl="no"
   ;;
   --disable-gfx-check) check_gfx="no"
@@ -277,6 +285,8 @@
 echo ""
 echo "kqemu kernel acceleration support:"
 echo "  --disable-kqemu          disable kqemu support"
+echo "  --kernel-path=PATH       set the kernel path (configure probes it)"
+echo "  --enable-kvm             enable kernel virtual machine support"
 echo ""
 echo "Advanced options (experts only):"
 echo "  --source-path=PATH       path of source code [$source_path]"
@@ -623,6 +633,7 @@
 fi
 echo "FMOD support      $fmod $fmod_support"
 echo "kqemu support     $kqemu"
+echo "kvm support       $kvm"
 echo "Documentation     $build_docs"
 [ ! -z "$uname_release" ] && \
 echo "uname -r          $uname_release"
@@ -857,6 +868,13 @@
 interp_prefix1=`echo "$interp_prefix" | sed "s/%M/$target_cpu/g"`
 echo "#define CONFIG_QEMU_PREFIX \"$interp_prefix1\"" >> $config_h
 
+configure_kvm() {
+  if test $kvm = "yes" -a "$target_softmmu" = "yes" -a $cpu = "$target_cpu" ; then
+    echo "#define USE_KVM 1" >> $config_h
+    echo "USE_KVM=yes" >> $config_mak
+  fi
+}
+
 if test "$target_cpu" = "i386" ; then
   echo "TARGET_ARCH=i386" >> $config_mak
   echo "#define TARGET_ARCH \"i386\"" >> $config_h
@@ -864,6 +882,7 @@
   if test $kqemu = "yes" -a "$target_softmmu" = "yes" -a $cpu = "i386" ; then
     echo "#define USE_KQEMU 1" >> $config_h
   fi
+  configure_kvm
 elif test "$target_cpu" = "arm" -o "$target_cpu" = "armeb" ; then
   echo "TARGET_ARCH=arm" >> $config_mak
   echo "#define TARGET_ARCH \"arm\"" >> $config_h
@@ -895,6 +914,7 @@
   if test $kqemu = "yes" -a "$target_softmmu" = "yes" -a $cpu = "x86_64"  ; then
     echo "#define USE_KQEMU 1" >> $config_h
   fi
+  configure_kvm
 elif test "$target_cpu" = "mips" -o "$target_cpu" = "mipsel" ; then
   echo "TARGET_ARCH=mips" >> $config_mak
   echo "#define TARGET_ARCH \"mips\"" >> $config_h
--- qemu-0.9.0/cpu-all.h
+++ qemu-0.9.0/cpu-all.h
@@ -834,6 +834,7 @@
 extern int phys_ram_fd;
 extern uint8_t *phys_ram_base;
 extern uint8_t *phys_ram_dirty;
+extern uint8_t *bios_mem;
 
 /* physical memory access */
 #define TLB_INVALID_MASK   (1 << 3)
--- qemu-0.9.0/cpu-exec.c
+++ qemu-0.9.0/cpu-exec.c
@@ -35,6 +35,11 @@
 #include <sys/ucontext.h>
 #endif
 
+#ifdef USE_KVM
+#include "qemu-kvm.h"
+extern int kvm_allowed;
+#endif
+
 int tb_invalidated_flag;
 
 //#define DEBUG_EXEC
@@ -401,6 +406,12 @@
             }
 #endif
 
+#ifdef USE_KVM
+            if (kvm_allowed) {
+                kvm_cpu_exec(env);
+                longjmp(env->jmp_env, 1);
+            }
+#endif
             T0 = 0; /* force lookup of first TB */
             for(;;) {
 #if defined(__sparc__) && !defined(HOST_SOLARIS)
--- qemu-0.9.0/exec.c
+++ qemu-0.9.0/exec.c
@@ -69,6 +69,10 @@
 #define TARGET_PHYS_ADDR_SPACE_BITS 32
 #endif
 
+#ifdef USE_KVM
+extern int kvm_allowed;
+#endif
+
 TranslationBlock tbs[CODE_GEN_MAX_BLOCKS];
 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
 int nb_tbs;
@@ -82,6 +86,7 @@
 int phys_ram_fd;
 uint8_t *phys_ram_base;
 uint8_t *phys_ram_dirty;
+uint8_t *bios_mem;
 static int in_migration;
 
 CPUState *first_cpu;
@@ -1044,6 +1049,11 @@
     if (env->nb_breakpoints >= MAX_BREAKPOINTS)
         return -1;
     env->breakpoints[env->nb_breakpoints++] = pc;
+
+#ifdef USE_KVM
+    if (kvm_allowed)
+	kvm_update_debugger(env);
+#endif
     
     breakpoint_invalidate(env, pc);
     return 0;
@@ -1067,6 +1077,11 @@
     if (i < env->nb_breakpoints)
       env->breakpoints[i] = env->breakpoints[env->nb_breakpoints];
 
+#ifdef USE_KVM
+    if (kvm_allowed)
+	kvm_update_debugger(env);
+#endif
+    
     breakpoint_invalidate(env, pc);
     return 0;
 #else
@@ -1085,6 +1100,10 @@
         /* XXX: only flush what is necessary */
         tb_flush(env);
     }
+#ifdef USE_KVM
+    if (kvm_allowed)
+	kvm_update_debugger(env);
+#endif
 #endif
 }
 
@@ -1425,6 +1444,9 @@
 {
     int r=0;
 
+#ifdef USE_KVM
+    r = kvm_physical_memory_set_dirty_tracking(enable);
+#endif
     in_migration = enable;
     return r;
 }
--- qemu-0.9.0/hw/cirrus_vga.c
+++ qemu-0.9.0/hw/cirrus_vga.c
@@ -28,6 +28,9 @@
  */
 #include "vl.h"
 #include "vga_int.h"
+#ifndef _WIN32
+#include <sys/mman.h>
+#endif
 
 /*
  * TODO:
@@ -231,6 +234,10 @@
     int cirrus_linear_io_addr;
     int cirrus_linear_bitblt_io_addr;
     int cirrus_mmio_io_addr;
+#ifdef USE_KVM
+    unsigned long cirrus_lfb_addr;
+    unsigned long cirrus_lfb_end;
+#endif
     uint32_t cirrus_addr_mask;
     uint32_t linear_mmio_mask;
     uint8_t cirrus_shadow_gr0;
@@ -267,6 +274,10 @@
     int last_hw_cursor_y_end;
     int real_vram_size; /* XXX: suppress that */
     CPUWriteMemoryFunc **cirrus_linear_write;
+#ifdef USE_KVM
+    unsigned long map_addr;
+    unsigned long map_end;
+#endif
 } CirrusVGAState;
 
 typedef struct PCICirrusVGAState {
@@ -2525,6 +2536,48 @@
     cirrus_linear_bitblt_writel,
 };
 
+#ifdef USE_KVM
+
+#include "qemu-kvm.h"
+
+extern kvm_context_t kvm_context;
+
+static void *set_vram_mapping(unsigned long begin, unsigned long end)
+{
+    void *vram_pointer = NULL;
+
+    /* align begin and end address */
+    begin = begin & TARGET_PAGE_MASK;
+    end = begin + VGA_RAM_SIZE;
+    end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
+
+    vram_pointer = kvm_create_phys_mem(kvm_context, begin, end - begin, 1, 
+				       1, 1);
+
+    if (vram_pointer == NULL) {
+        printf("set_vram_mapping: cannot allocate memory: %m\n");
+        return NULL;
+    }
+
+    memset(vram_pointer, 0, end - begin);
+
+    return vram_pointer;
+}
+
+static int unset_vram_mapping(unsigned long begin, unsigned long end)
+{
+    /* align begin and end address */
+    end = begin + VGA_RAM_SIZE;
+    begin = begin & TARGET_PAGE_MASK;
+    end = (end + TARGET_PAGE_SIZE -1 ) & TARGET_PAGE_MASK;
+
+    kvm_destroy_phys_mem(kvm_context, begin, end - begin);
+
+    return 0;
+}
+
+#endif
+
 /* Compute the memory access functions */
 static void cirrus_update_memory_access(CirrusVGAState *s)
 {
@@ -2543,11 +2596,45 @@
         
 	mode = s->gr[0x05] & 0x7;
 	if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) {
+#ifdef USE_KVM
+            if (kvm_allowed && s->cirrus_lfb_addr && s->cirrus_lfb_end &&
+		!s->map_addr) {
+                void *vram_pointer, *old_vram;
+
+                vram_pointer = set_vram_mapping(s->cirrus_lfb_addr,
+                                                s->cirrus_lfb_end);
+                if (!vram_pointer)
+                    fprintf(stderr, "NULL vram_pointer\n");
+                else {
+                    old_vram = vga_update_vram((VGAState *)s, vram_pointer,
+                                               VGA_RAM_SIZE);
+                    qemu_free(old_vram);
+                }
+                s->map_addr = s->cirrus_lfb_addr;
+                s->map_end = s->cirrus_lfb_end;
+            }
+#endif
             s->cirrus_linear_write[0] = cirrus_linear_mem_writeb;
             s->cirrus_linear_write[1] = cirrus_linear_mem_writew;
             s->cirrus_linear_write[2] = cirrus_linear_mem_writel;
         } else {
         generic_io:
+#ifdef USE_KVM
+            if (kvm_allowed && s->cirrus_lfb_addr && s->cirrus_lfb_end &&
+		s->map_addr) {
+		int error;
+                void *old_vram = NULL;
+
+		error = unset_vram_mapping(s->cirrus_lfb_addr,
+					   s->cirrus_lfb_end);
+		if (!error)
+		    old_vram = vga_update_vram((VGAState *)s, NULL,
+                                               VGA_RAM_SIZE);
+                if (old_vram)
+                    munmap(old_vram, s->map_addr - s->map_end);
+                s->map_addr = s->map_end = 0;
+            }
+#endif
             s->cirrus_linear_write[0] = cirrus_linear_writeb;
             s->cirrus_linear_write[1] = cirrus_linear_writew;
             s->cirrus_linear_write[2] = cirrus_linear_writel;
@@ -2946,6 +3033,13 @@
     qemu_put_be32s(f, &s->hw_cursor_y);
     /* XXX: we do not save the bitblt state - we assume we do not save
        the state when the blitter is active */
+
+#ifdef USE_KVM
+    if (kvm_allowed) { /* XXX: KVM images ought to be loadable in QEMU */
+	qemu_put_be32s(f, &s->real_vram_size);
+	qemu_put_buffer(f, s->vram_ptr, s->real_vram_size);
+    }
+#endif
 }
 
 static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
@@ -2996,6 +3090,22 @@
     qemu_get_be32s(f, &s->hw_cursor_x);
     qemu_get_be32s(f, &s->hw_cursor_y);
 
+#ifdef USE_KVM
+    if (kvm_allowed) {
+        int real_vram_size;
+        qemu_get_be32s(f, &real_vram_size);
+        if (real_vram_size != s->real_vram_size) {
+            if (real_vram_size > s->real_vram_size)
+                real_vram_size = s->real_vram_size;
+            printf("%s: REAL_VRAM_SIZE MISMATCH !!!!!! SAVED=%d CURRENT=%d", 
+                   __FUNCTION__, real_vram_size, s->real_vram_size);
+        }
+        qemu_get_buffer(f, s->vram_ptr, real_vram_size);
+        cirrus_update_memory_access(s);
+    }
+#endif
+
+
     /* force refresh */
     s->graphic_mode = -1;
     cirrus_update_bank_ptr(s, 0);
@@ -3151,6 +3261,17 @@
     /* XXX: add byte swapping apertures */
     cpu_register_physical_memory(addr, s->vram_size,
 				 s->cirrus_linear_io_addr);
+#ifdef USE_KVM
+    if (kvm_allowed) {
+	s->cirrus_lfb_addr = addr;
+	s->cirrus_lfb_end = addr + VGA_RAM_SIZE;
+
+	if (s->map_addr && (s->cirrus_lfb_addr != s->map_addr) &&
+	    (s->cirrus_lfb_end != s->map_end))
+	    printf("cirrus vga map change while on lfb mode\n");
+    }
+#endif
+
     cpu_register_physical_memory(addr + 0x1000000, 0x400000,
 				 s->cirrus_linear_bitblt_io_addr);
 }
--- qemu-0.9.0/hw/hypercall.c
+++ qemu-0.9.0/hw/hypercall.c
@@ -0,0 +1,302 @@
+/*
+ * QEMU-KVM Hypercall emulation
+ * 
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Qumranet
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+#include "hypercall.h"
+#include <stddef.h>
+
+int use_hypercall_dev = 0;
+
+typedef struct VmChannelCharDriverState {
+    CharDriverState *vmchannel_hd;
+    uint32_t deviceid;
+} VmChannelCharDriverState;
+
+static VmChannelCharDriverState vmchannel_hds[MAX_VMCHANNEL_DEVICES];
+
+typedef struct HypercallState {
+    uint32_t hcr;
+    uint32_t hsr;
+    uint32_t txsize;
+    uint32_t txbuff;
+    uint32_t rxsize;
+    uint8_t  RxBuff[HP_MEM_SIZE];
+    uint8_t  txbufferaccu[HP_MEM_SIZE];
+    int      txbufferaccu_offset;
+    int      irq;
+    PCIDevice *pci_dev;
+    uint32_t index;
+} HypercallState;
+
+HypercallState *pHypercallStates[MAX_VMCHANNEL_DEVICES] = {NULL};
+
+//#define HYPERCALL_DEBUG 1
+
+static void hp_reset(HypercallState *s)
+{
+    s->hcr = 0;
+    s->hsr = 0;
+    s->txsize = 0;
+    s->txbuff = 0;
+    s->rxsize= 0;
+    s->txbufferaccu_offset = 0;
+}
+
+static void hypercall_update_irq(HypercallState *s);
+
+
+static void hp_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    HypercallState *s = opaque;
+
+#ifdef HYPERCALL_DEBUG
+    printf("%s: addr=0x%x, val=0x%x\n", __FUNCTION__, addr, val);
+#endif
+    addr &= 0xff;
+
+    switch(addr)
+    {
+        case HCR_REGISTER:
+        {
+            s->hcr = val;
+	    if (s->hcr & HCR_DI)
+                hypercall_update_irq(s);
+            if (val & HCR_GRS){
+                hp_reset(s);
+            }
+            break;
+        }
+
+        case HP_TXSIZE:
+        {
+            // handle the case when the we are being called when txsize is not 0
+            if (s->txsize != 0) {
+                printf("txsize is being set, but txsize is not 0!!!\n");
+            }
+            if (val > HP_MEM_SIZE) {
+                printf("txsize is larger than allowed by hw!!!\n");
+            }
+            s->txsize = val;
+            s->txbufferaccu_offset = 0;
+            break;
+        }
+
+        case HP_TXBUFF:
+        {
+            if (s->txsize == 0) {
+                printf("error with txbuff!!!\n");
+                break;
+            }
+
+            s->txbufferaccu[s->txbufferaccu_offset] = val;
+            s->txbufferaccu_offset++;
+            if (s->txbufferaccu_offset >= s->txsize) {
+                qemu_chr_write(vmchannel_hds[s->index].vmchannel_hd, s->txbufferaccu, s->txsize);
+                s->txbufferaccu_offset = 0;
+                s->txsize = 0;
+            }
+            break;
+        }
+        default:
+        {
+            printf("hp_ioport_write to unhandled address!!!\n");
+        }
+    }
+}
+
+static uint32_t hp_ioport_read(void *opaque, uint32_t addr)
+{
+    HypercallState *s = opaque;
+    int ret;
+
+    addr &= 0xff;
+#ifdef HYPERCALL_DEBUG
+    // Since HSR_REGISTER is being repeatedly read in the guest ISR we don't print it
+    if (addr != HSR_REGISTER)
+        printf("%s: addr=0x%x\n", __FUNCTION__, addr);
+#endif
+
+    if (addr >= offsetof(HypercallState, RxBuff) )
+    {
+        int RxBuffOffset = addr - (offsetof(HypercallState, RxBuff));
+        ret = s->RxBuff[RxBuffOffset];
+        return ret;
+    }
+
+    switch (addr)
+    {
+    case HSR_REGISTER:
+        ret = s->hsr;
+        if (ret & HSR_VDR) {
+            s->hsr &= ~HSR_VDR;
+        }
+        break;
+    case HP_RXSIZE:
+        ret = s->rxsize;
+        break;
+
+    default:
+        ret = 0x00;
+        break;
+    }
+
+    return ret;
+}
+
+/***********************************************************/
+/* PCI Hypercall definitions */
+
+typedef struct PCIHypercallState {
+    PCIDevice dev;
+    HypercallState hp;
+} PCIHypercallState;
+
+static void hp_map(PCIDevice *pci_dev, int region_num, 
+                       uint32_t addr, uint32_t size, int type)
+{
+    PCIHypercallState *d = (PCIHypercallState *)pci_dev;
+    HypercallState *s = &d->hp;
+
+    register_ioport_write(addr, 0x100, 1, hp_ioport_write, s);
+    register_ioport_read(addr, 0x100, 1, hp_ioport_read, s);
+
+}
+
+
+static void hypercall_update_irq(HypercallState *s)
+{
+    /* PCI irq */
+    pci_set_irq(s->pci_dev, 0, !(s->hcr & HCR_DI));
+}
+
+void pci_hypercall_single_init(PCIBus *bus, uint32_t deviceid, uint32_t index)
+{
+    PCIHypercallState *d;
+    HypercallState *s;
+    uint8_t *pci_conf;
+    char name[sizeof("HypercallX")];
+
+#ifdef HYPERCALL_DEBUG
+    printf("%s\n", __FUNCTION__);
+#endif
+
+    // If the vmchannel wasn't initialized, we don't want the Hypercall device in the guest
+    if (use_hypercall_dev == 0) {
+        return;
+    }
+
+    d = (PCIHypercallState *)pci_register_device(bus,
+                                                 name, sizeof(PCIHypercallState),
+                                                 -1,
+                                                 NULL, NULL);
+
+    pci_conf = d->dev.config;
+    pci_conf[0x00] = 0x02; // Qumranet vendor ID 0x5002
+    pci_conf[0x01] = 0x50;
+    pci_conf[0x02] = deviceid & 0x00ff;
+    pci_conf[0x03] = (deviceid & 0xff00) >> 8;
+
+    pci_conf[0x09] = 0x00; // ProgIf
+    pci_conf[0x0a] = 0x00; // SubClass
+    pci_conf[0x0b] = 0x05; // BaseClass
+
+    pci_conf[0x0e] = 0x00; // header_type
+    pci_conf[0x3d] = 1; // interrupt pin 0
+
+    pci_register_io_region(&d->dev, 0, 0x100,
+                           PCI_ADDRESS_SPACE_IO, hp_map);
+    s = &d->hp;
+    pHypercallStates[index] = s;
+    s->index = index;
+    s->irq = 16; /* PCI interrupt */
+    s->pci_dev = (PCIDevice *)d;
+
+    hp_reset(s);
+}
+
+void pci_hypercall_init(PCIBus *bus)
+{
+    int i;
+
+    // loop devices & call pci_hypercall_single_init with device id's
+    for(i = 0; i < MAX_VMCHANNEL_DEVICES; i++){
+        if (vmchannel_hds[i].vmchannel_hd) {
+            pci_hypercall_single_init(bus, vmchannel_hds[i].deviceid, i);
+        }
+    }
+}
+
+static int vmchannel_can_read(void *opaque)
+{
+    return 128;
+}
+
+static void vmchannel_event(void *opaque, int event)
+{
+
+#ifdef HYPERCALL_DEBUG
+    // if index is to be used outside the printf, take it out of the #ifdef block!
+    long index = (long)opaque;
+    printf("%s index:%ld, got event %i\n", __FUNCTION__, index, event);
+#endif
+    
+    return;
+}
+
+// input from vmchannel outside caller
+static void vmchannel_read(void *opaque, const uint8_t *buf, int size)
+{
+    int i;
+    long index = (long)opaque;
+
+#ifdef HYPERCALL_DEBUG    
+    printf("vmchannel_read buf size:%d\n", size);
+#endif
+
+    // if the hypercall device is in interrupts disabled state, don't accept the data
+    if (pHypercallStates[index]->hcr & HCR_DI) {
+        return;
+    }
+
+    for(i = 0; i < size; i++) {
+        pHypercallStates[index]->RxBuff[i] = buf[i];
+    }
+    pHypercallStates[index]->rxsize = size;
+    pHypercallStates[index]->hsr = HSR_VDR;
+    hypercall_update_irq(pHypercallStates[index]);
+}
+
+void vmchannel_init(CharDriverState *hd, uint32_t deviceid, uint32_t index)
+{
+#ifdef HYPERCALL_DEBUG
+    printf("vmchannel_init, index=%d, deviceid=0x%x\n", index, deviceid);
+#endif
+
+    vmchannel_hds[index].deviceid = deviceid;
+    vmchannel_hds[index].vmchannel_hd = hd;
+   
+    use_hypercall_dev = 1;
+    qemu_chr_add_handlers(vmchannel_hds[index].vmchannel_hd, vmchannel_can_read, vmchannel_read,
+                          vmchannel_event, (void *)(long)index);
+}
--- qemu-0.9.0/hw/hypercall.h
+++ qemu-0.9.0/hw/hypercall.h
@@ -0,0 +1,45 @@
+/*
+ * QEMU-KVM Hypercall emulation
+ * 
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Qumranet
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define HCR_REGISTER    0x00  // Hypercall Command Register WR
+#define HSR_REGISTER    0x04  // Hypercall Status Register RD
+#define HP_TXSIZE       0x08
+#define HP_TXBUFF       0x0c
+#define HP_RXSIZE       0x10
+#define HP_RXBUFF       0x14
+
+// HCR_REGISTER commands
+#define HCR_DI		1 // disable interrupts
+#define HCR_EI		2 // enable interrupts
+#define HCR_GRS		4 // Global reset
+#define HCR_RESET	(HCR_GRS|HCR_DI)
+
+
+// Bits in HSR_REGISTER
+#define HSR_VDR		0x01  // vmchannel Data is ready to be read
+
+#define HP_MEM_SIZE    0xE0
+
+
--- qemu-0.9.0/hw/pc.c
+++ qemu-0.9.0/hw/pc.c
@@ -22,6 +22,10 @@
  * THE SOFTWARE.
  */
 #include "vl.h"
+#ifdef USE_KVM
+#include "qemu-kvm.h"
+extern int kvm_allowed;
+#endif
 
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
@@ -444,6 +448,11 @@
     nb_ne2k++;
 }
 
+#ifdef USE_KVM
+extern kvm_context_t kvm_context;
+extern int kvm_allowed;
+#endif
+
 /* PC hardware initialisation */
 static void pc_init1(int ram_size, int vga_ram_size, int boot_device,
                      DisplayState *ds, const char **fd_filename, int snapshot,
@@ -511,6 +520,11 @@
     /* setup basic memory access */
     cpu_register_physical_memory(0xc0000, 0x10000, 
                                  vga_bios_offset | IO_MEM_ROM);
+#ifdef USE_KVM
+    if (kvm_allowed)
+	    memcpy(phys_ram_base + 0xc0000, phys_ram_base + vga_bios_offset,
+		   0x10000);
+#endif
 
     /* map the last 128KB of the BIOS in ISA space */
     isa_bios_size = bios_size;
@@ -522,6 +536,26 @@
                                  isa_bios_size, 
                                  (bios_offset + bios_size - isa_bios_size) | IO_MEM_ROM);
 
+#ifdef USE_KVM
+    if (kvm_allowed)
+	    memcpy(phys_ram_base + 0x100000 - isa_bios_size,
+		   phys_ram_base + (bios_offset + bios_size - isa_bios_size),
+		   isa_bios_size);
+#endif
+
+#ifdef USE_KVM
+    if (kvm_allowed) {
+	    bios_mem = kvm_create_phys_mem(kvm_context, (uint32_t)(-bios_size),
+					   bios_size, 2, 0, 1);
+	    if (!bios_mem)
+		    exit(1);
+	    memcpy(bios_mem, phys_ram_base + bios_offset, bios_size);
+
+	    cpu_register_physical_memory(phys_ram_size - KVM_EXTRA_PAGES * 4096, KVM_EXTRA_PAGES * 4096,
+					 (phys_ram_size - KVM_EXTRA_PAGES * 4096) | IO_MEM_ROM);
+    }
+#endif
+    
     option_rom_offset = 0;
     for (i = 0; i < nb_option_roms; i++) {
 	int offset = bios_offset + bios_size + option_rom_offset;
@@ -718,6 +752,11 @@
         }
     }
 
+#ifdef USE_KVM
+    if (kvm_allowed) {
+        pci_hypercall_init(pci_bus);
+	}
+#endif
     if (pci_enabled) {
         pci_piix3_ide_init(pci_bus, bs_table, piix3_devfn + 1);
     } else {
--- qemu-0.9.0/hw/vga.c
+++ qemu-0.9.0/hw/vga.c
@@ -1373,6 +1373,26 @@
     }
 }
 
+#ifdef USE_KVM
+
+#include "kvmctl.h"
+extern kvm_context_t kvm_context;
+
+static int bitmap_get_dirty(unsigned long *bitmap, unsigned nr)
+{
+    unsigned word = nr / ((sizeof bitmap[0]) * 8);
+    unsigned bit = nr % ((sizeof bitmap[0]) * 8);
+
+    //printf("%x -> %ld\n", nr, (bitmap[word] >> bit) & 1);
+    return (bitmap[word] >> bit) & 1;
+}
+
+#endif
+
+#ifdef USE_KVM
+extern int kvm_allowed;
+#endif
+
 /* 
  * graphic modes
  */
@@ -1385,6 +1405,20 @@
     uint32_t v, addr1, addr;
     vga_draw_line_func *vga_draw_line;
     
+#ifdef USE_KVM
+
+    /* HACK ALERT */
+#define BITMAP_SIZE ((8*1024*1024) / 4096 / 8 / sizeof(long))
+    unsigned long bitmap[BITMAP_SIZE];
+    int r;
+
+    if (kvm_allowed) {
+	    r = kvm_get_dirty_pages(kvm_context, 1, &bitmap);
+	    if (r < 0)
+		    fprintf(stderr, "kvm: get_dirty_pages returned %d\n", r);
+    }
+#endif
+
     full_update |= update_basic_params(s);
 
     s->get_resolution(s, &width, &height);
@@ -1491,10 +1525,20 @@
         update = full_update | 
             cpu_physical_memory_get_dirty(page0, VGA_DIRTY_FLAG) |
             cpu_physical_memory_get_dirty(page1, VGA_DIRTY_FLAG);
+#ifdef USE_KVM
+	if (kvm_allowed) {
+		update |= bitmap_get_dirty(bitmap, (page0 - s->vram_offset) >> TARGET_PAGE_BITS);
+		update |= bitmap_get_dirty(bitmap, (page1 - s->vram_offset) >> TARGET_PAGE_BITS);
+	}
+#endif
         if ((page1 - page0) > TARGET_PAGE_SIZE) {
             /* if wide line, can use another page */
             update |= cpu_physical_memory_get_dirty(page0 + TARGET_PAGE_SIZE, 
                                                     VGA_DIRTY_FLAG);
+#ifdef USE_KVM
+	    if (kvm_allowed)
+		    update |= bitmap_get_dirty(bitmap, (page0 - s->vram_offset) >> TARGET_PAGE_BITS);
+#endif
         }
         /* explicit invalidation for the hardware cursor */
         update |= (s->invalidated_y_table[y >> 5] >> (y & 0x1f)) & 1;
@@ -1751,6 +1795,7 @@
     }
 }
 
+/* when used on xen/kvm environment, the vga_ram_base is not used */
 void vga_common_init(VGAState *s, DisplayState *ds, uint8_t *vga_ram_base, 
                      unsigned long vga_ram_offset, int vga_ram_size)
 {
@@ -1781,7 +1826,14 @@
 
     vga_reset(s);
 
+#ifndef USE_KVM
     s->vram_ptr = vga_ram_base;
+#else
+    if (kvm_allowed)
+	    s->vram_ptr = qemu_malloc(vga_ram_size);
+    else
+	    s->vram_ptr = vga_ram_base;
+#endif
     s->vram_offset = vga_ram_offset;
     s->vram_size = vga_ram_size;
     s->ds = ds;
@@ -1909,6 +1961,31 @@
     return 0;
 }
 
+void *vga_update_vram(VGAState *s, void *vga_ram_base, int vga_ram_size)
+{
+    uint8_t *old_pointer;
+
+    if (s->vram_size != vga_ram_size) {
+        fprintf(stderr, "No support to change vga_ram_size\n");
+        return NULL;
+    }
+
+    if (!vga_ram_base) {
+        vga_ram_base = qemu_malloc(vga_ram_size);
+        if (!vga_ram_base) {
+            fprintf(stderr, "reallocate error\n");
+            return NULL;
+        }
+    }
+
+    /* XXX lock needed? */
+    memcpy(vga_ram_base, s->vram_ptr, vga_ram_size);
+    old_pointer = s->vram_ptr;
+    s->vram_ptr = vga_ram_base;
+
+    return old_pointer;
+}
+
 /********************************************************/
 /* vga screen dump */
 
--- qemu-0.9.0/hw/vga_int.h
+++ qemu-0.9.0/hw/vga_int.h
@@ -174,5 +174,6 @@
                              unsigned int color0, unsigned int color1,
                              unsigned int color_xor);
 
+void *vga_update_vram(VGAState *s, void *vga_ram_base, int vga_ram_size);
 extern const uint8_t sr_mask[8];
 extern const uint8_t gr_mask[16];
--- qemu-0.9.0/kvm.h
+++ qemu-0.9.0/kvm.h
@@ -0,0 +1,247 @@
+#ifndef __LINUX_KVM_H
+#define __LINUX_KVM_H
+
+/*
+ * Userspace interface for /dev/kvm - kernel based virtual machine
+ *
+ * Note: this interface is considered experimental and may change without
+ *       notice.
+ */
+
+#include <asm/types.h>
+#include <linux/ioctl.h>
+
+#define KVM_API_VERSION 4
+
+/*
+ * Architectural interrupt line count, and the size of the bitmap needed
+ * to hold them.
+ */
+#define KVM_NR_INTERRUPTS 256
+#define KVM_IRQ_BITMAP_SIZE_BYTES    ((KVM_NR_INTERRUPTS + 7) / 8)
+#define KVM_IRQ_BITMAP_SIZE(type)    (KVM_IRQ_BITMAP_SIZE_BYTES / sizeof(type))
+
+
+/* for KVM_CREATE_MEMORY_REGION */
+struct kvm_memory_region {
+	__u32 slot;
+	__u32 flags;
+	__u64 guest_phys_addr;
+	__u64 memory_size; /* bytes */
+};
+
+/* for kvm_memory_region::flags */
+#define KVM_MEM_LOG_DIRTY_PAGES  1UL
+
+
+#define KVM_EXIT_TYPE_FAIL_ENTRY 1
+#define KVM_EXIT_TYPE_VM_EXIT    2
+
+enum kvm_exit_reason {
+	KVM_EXIT_UNKNOWN          = 0,
+	KVM_EXIT_EXCEPTION        = 1,
+	KVM_EXIT_IO               = 2,
+	KVM_EXIT_CPUID            = 3,
+	KVM_EXIT_DEBUG            = 4,
+	KVM_EXIT_HLT              = 5,
+	KVM_EXIT_MMIO             = 6,
+	KVM_EXIT_IRQ_WINDOW_OPEN  = 7,
+	KVM_EXIT_SHUTDOWN         = 8,
+};
+
+/* for KVM_RUN */
+struct kvm_run {
+	/* in */
+	__u32 emulated;  /* skip current instruction */
+	__u32 mmio_completed; /* mmio request completed */
+	__u8 request_interrupt_window;
+	__u8 padding1[7];
+
+	/* out */
+	__u32 exit_type;
+	__u32 exit_reason;
+	__u32 instruction_length;
+	__u8 ready_for_interrupt_injection;
+	__u8 if_flag;
+	__u16 padding2;
+
+	/* in (pre_kvm_run), out (post_kvm_run) */
+	__u64 cr8;
+	__u64 apic_base;
+
+	union {
+		/* KVM_EXIT_UNKNOWN */
+		struct {
+			__u32 hardware_exit_reason;
+		} hw;
+		/* KVM_EXIT_EXCEPTION */
+		struct {
+			__u32 exception;
+			__u32 error_code;
+		} ex;
+		/* KVM_EXIT_IO */
+		struct {
+#define KVM_EXIT_IO_IN  0
+#define KVM_EXIT_IO_OUT 1
+			__u8 direction;
+			__u8 size; /* bytes */
+			__u8 string;
+			__u8 string_down;
+			__u8 rep;
+			__u8 pad;
+			__u16 port;
+			__u64 count;
+			union {
+				__u64 address;
+				__u32 value;
+			};
+		} io;
+		struct {
+		} debug;
+		/* KVM_EXIT_MMIO */
+		struct {
+			__u64 phys_addr;
+			__u8  data[8];
+			__u32 len;
+			__u8  is_write;
+		} mmio;
+	};
+};
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
+	__u64 rax, rbx, rcx, rdx;
+	__u64 rsi, rdi, rsp, rbp;
+	__u64 r8,  r9,  r10, r11;
+	__u64 r12, r13, r14, r15;
+	__u64 rip, rflags;
+};
+
+struct kvm_segment {
+	__u64 base;
+	__u32 limit;
+	__u16 selector;
+	__u8  type;
+	__u8  present, dpl, db, s, l, g, avl;
+	__u8  unusable;
+	__u8  padding;
+};
+
+struct kvm_dtable {
+	__u64 base;
+	__u16 limit;
+	__u16 padding[3];
+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+	/* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
+	struct kvm_segment cs, ds, es, fs, gs, ss;
+	struct kvm_segment tr, ldt;
+	struct kvm_dtable gdt, idt;
+	__u64 cr0, cr2, cr3, cr4, cr8;
+	__u64 efer;
+	__u64 apic_base;
+	__u64 interrupt_bitmap[KVM_IRQ_BITMAP_SIZE(__u64)];
+};
+
+struct kvm_msr_entry {
+	__u32 index;
+	__u32 reserved;
+	__u64 data;
+};
+
+/* for KVM_GET_MSRS and KVM_SET_MSRS */
+struct kvm_msrs {
+	__u32 nmsrs; /* number of msrs in entries */
+	__u32 pad;
+
+	struct kvm_msr_entry entries[0];
+};
+
+/* for KVM_GET_MSR_INDEX_LIST */
+struct kvm_msr_list {
+	__u32 nmsrs; /* number of msrs in entries */
+	__u32 indices[0];
+};
+
+/* for KVM_TRANSLATE */
+struct kvm_translation {
+	/* in */
+	__u64 linear_address;
+
+	/* out */
+	__u64 physical_address;
+	__u8  valid;
+	__u8  writeable;
+	__u8  usermode;
+	__u8  pad[5];
+};
+
+/* for KVM_INTERRUPT */
+struct kvm_interrupt {
+	/* in */
+	__u32 irq;
+};
+
+struct kvm_breakpoint {
+	__u32 enabled;
+	__u32 padding;
+	__u64 address;
+};
+
+/* for KVM_DEBUG_GUEST */
+struct kvm_debug_guest {
+	/* int */
+	__u32 enabled;
+	__u32 pad;
+	struct kvm_breakpoint breakpoints[4];
+	__u32 singlestep;
+};
+
+/* for KVM_GET_DIRTY_LOG */
+struct kvm_dirty_log {
+	__u32 slot;
+	__u32 padding;
+	union {
+		void __user *dirty_bitmap; /* one bit per page */
+		__u64 padding;
+	};
+};
+
+#define KVMIO 0xAE
+
+/*
+ * ioctls for /dev/kvm fds:
+ */
+#define KVM_GET_API_VERSION       _IO(KVMIO, 1)
+#define KVM_CREATE_VM             _IO(KVMIO, 2) /* returns a VM fd */
+#define KVM_GET_MSR_INDEX_LIST    _IOWR(KVMIO, 15, struct kvm_msr_list)
+
+/*
+ * ioctls for VM fds
+ */
+#define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 10, struct kvm_memory_region)
+/*
+ * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
+ * a vcpu fd.
+ */
+#define KVM_CREATE_VCPU           _IOW(KVMIO, 11, int)
+#define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 12, struct kvm_dirty_log)
+
+/*
+ * ioctls for vcpu fds
+ */
+#define KVM_RUN                   _IOWR(KVMIO, 2, struct kvm_run)
+#define KVM_GET_REGS              _IOR(KVMIO, 3, struct kvm_regs)
+#define KVM_SET_REGS              _IOW(KVMIO, 4, struct kvm_regs)
+#define KVM_GET_SREGS             _IOR(KVMIO, 5, struct kvm_sregs)
+#define KVM_SET_SREGS             _IOW(KVMIO, 6, struct kvm_sregs)
+#define KVM_TRANSLATE             _IOWR(KVMIO, 7, struct kvm_translation)
+#define KVM_INTERRUPT             _IOW(KVMIO, 8, struct kvm_interrupt)
+#define KVM_DEBUG_GUEST           _IOW(KVMIO, 9, struct kvm_debug_guest)
+#define KVM_GET_MSRS              _IOWR(KVMIO, 13, struct kvm_msrs)
+#define KVM_SET_MSRS              _IOW(KVMIO, 14, struct kvm_msrs)
+
+#endif
--- qemu-0.9.0/kvmctl.c
+++ qemu-0.9.0/kvmctl.c
@@ -0,0 +1,809 @@
+/*
+ * Kernel-based Virtual Machine control library
+ *
+ * This library provides an API to control the kvm hardware virtualization
+ * module.
+ *
+ * Copyright (C) 2006 Qumranet
+ *
+ * Authors:
+ *
+ *  Avi Kivity <avi@qumranet.com>
+ *  Yaniv Kamay <yaniv@qumranet.com>
+ *
+ * This work is licensed under the GNU LGPL license, version 2.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include "kvmctl.h"
+
+#define EXPECTED_KVM_API_VERSION 4
+
+#if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
+#error libkvm: userspace and kernel version mismatch
+#endif
+
+#define PAGE_SIZE 4096ul
+
+/* FIXME: share this number with kvm */
+/* FIXME: or dynamically alloc/realloc regions */
+#define KVM_MAX_NUM_MEM_REGIONS 4u
+
+/**
+ * \brief The KVM context
+ *
+ * The verbose KVM context
+ */
+struct kvm_context {
+	/// Filedescriptor to /dev/kvm
+	int fd;
+	int vm_fd;
+	int vcpu_fd[1];
+	/// Callbacks that KVM uses to emulate various unvirtualizable functionality
+	struct kvm_callbacks *callbacks;
+	void *opaque;
+	/// A pointer to the memory used as the physical memory for the guest
+	void *physical_memory;
+	/// is dirty pages logging enabled for all regions or not
+	int dirty_pages_log_all;
+	/// memory regions parameters
+	struct kvm_memory_region mem_regions[KVM_MAX_NUM_MEM_REGIONS];
+};
+
+struct translation_cache {
+	unsigned long linear;
+	void *physical;
+};
+
+static void translation_cache_init(struct translation_cache *tr)
+{
+	tr->physical = 0;
+}
+
+static int translate(kvm_context_t kvm, int vcpu, struct translation_cache *tr,
+		     unsigned long linear, void **physical)
+{
+	unsigned long page = linear & ~(PAGE_SIZE-1);
+	unsigned long offset = linear & (PAGE_SIZE-1);
+
+	if (!(tr->physical && tr->linear == page)) {
+		struct kvm_translation kvm_tr;
+		int r;
+
+		kvm_tr.linear_address = page;
+		
+		r = ioctl(kvm->vcpu_fd[vcpu], KVM_TRANSLATE, &kvm_tr);
+		if (r == -1)
+			return -errno;
+
+		if (!kvm_tr.valid)
+			return -EFAULT;
+
+		tr->linear = page;
+		tr->physical = kvm->physical_memory + kvm_tr.physical_address;
+	}
+	*physical = tr->physical + offset;
+	return 0;
+}
+
+/*
+ * memory regions parameters
+ */
+static void kvm_memory_region_save_params(kvm_context_t kvm, 
+					 struct kvm_memory_region *mem)
+{
+	if (!mem || (mem->slot >= KVM_MAX_NUM_MEM_REGIONS)) {
+		fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
+		return;
+	}
+	kvm->mem_regions[mem->slot] = *mem;
+}
+
+static void kvm_memory_region_clear_params(kvm_context_t kvm, int regnum)
+{
+	if (regnum >= KVM_MAX_NUM_MEM_REGIONS) {
+		fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
+		return;
+	}
+	kvm->mem_regions[regnum].memory_size = 0;
+}
+
+/* 
+ * dirty pages logging control 
+ */
+static int kvm_dirty_pages_log_change(kvm_context_t kvm, int regnum, __u32 flag)
+{
+	int r;
+	struct kvm_memory_region *mem;
+
+	if (regnum >= KVM_MAX_NUM_MEM_REGIONS) {
+		fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__);
+		return 1;
+	}
+	mem = &kvm->mem_regions[regnum];
+	if (mem->memory_size == 0) /* not used */
+		return 0;
+	if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) /* log already enabled */
+		return 0;
+	mem->flags |= flag;  /* temporary turn on flag */
+	r = ioctl(kvm->vm_fd, KVM_SET_MEMORY_REGION, mem);
+	mem->flags &= ~flag; /* back to previous value */
+	if (r == -1) {
+		fprintf(stderr, "%s: %m\n", __FUNCTION__);
+	}
+	return r;
+}
+
+static int kvm_dirty_pages_log_change_all(kvm_context_t kvm, __u32 flag)
+{
+	int i, r;
+
+	for (i=r=0; i<KVM_MAX_NUM_MEM_REGIONS && r==0; i++) {
+		r = kvm_dirty_pages_log_change(kvm, i, flag);
+	}
+	return r;
+}
+
+/**
+ * Enable dirty page logging for all memory regions
+ */
+int kvm_dirty_pages_log_enable_all(kvm_context_t kvm)
+{
+	if (kvm->dirty_pages_log_all)
+		return 0;
+	kvm->dirty_pages_log_all = 1;
+	return kvm_dirty_pages_log_change_all(kvm, KVM_MEM_LOG_DIRTY_PAGES);
+}
+
+/**
+ * Enable dirty page logging only for memory regions that were created with
+ *     dirty logging enabled (disable for all other memory regions).
+ */
+int kvm_dirty_pages_log_reset(kvm_context_t kvm)
+{
+	if (!kvm->dirty_pages_log_all)
+		return 0;
+	kvm->dirty_pages_log_all = 0;
+	return kvm_dirty_pages_log_change_all(kvm, 0);
+}
+
+
+kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
+		       void *opaque)
+{
+	int fd;
+	kvm_context_t kvm;
+	int r;
+
+	fd = open("/dev/kvm", O_RDWR);
+	if (fd == -1) {
+		perror("open /dev/kvm");
+		return NULL;
+	}
+	r = ioctl(fd, KVM_GET_API_VERSION, 0);
+	if (r == -1) {
+	    fprintf(stderr, "kvm kernel version too old\n");
+	    goto out_close;
+	}
+	if (r < EXPECTED_KVM_API_VERSION) {
+	    fprintf(stderr, "kvm kernel version too old\n");
+	    goto out_close;
+	}
+	if (r > EXPECTED_KVM_API_VERSION) {
+	    fprintf(stderr, "kvm userspace version too old\n");
+	    goto out_close;
+	}
+	kvm = malloc(sizeof(*kvm));
+	kvm->fd = fd;
+	kvm->vm_fd = -1;
+	kvm->callbacks = callbacks;
+	kvm->opaque = opaque;
+	kvm->dirty_pages_log_all = 0;
+	memset(&kvm->mem_regions, 0, sizeof(kvm->mem_regions));
+
+	return kvm;
+ out_close:
+	close(fd);
+	return NULL;
+}
+
+void kvm_finalize(kvm_context_t kvm)
+{
+    	if (kvm->vcpu_fd[0] != -1)
+		close(kvm->vcpu_fd[0]);
+    	if (kvm->vm_fd != -1)
+		close(kvm->vm_fd);
+	close(kvm->fd);
+	free(kvm);
+}
+
+int kvm_create(kvm_context_t kvm, unsigned long memory, void **vm_mem)
+{
+	unsigned long dosmem = 0xa0000;
+	unsigned long exmem = 0xc0000;
+	int fd = kvm->fd;
+	int r;
+	struct kvm_memory_region low_memory = {
+		.slot = 3,
+		.memory_size = memory  < dosmem ? memory : dosmem,
+		.guest_phys_addr = 0,
+	};
+	struct kvm_memory_region extended_memory = {
+		.slot = 0,
+		.memory_size = memory < exmem ? 0 : memory - exmem,
+		.guest_phys_addr = exmem,
+	};
+
+	kvm->vcpu_fd[0] = -1;
+
+	fd = ioctl(fd, KVM_CREATE_VM, 0);
+	if (fd == -1) {
+		fprintf(stderr, "kvm_create_vm: %m\n");
+		return -1;
+	}
+	kvm->vm_fd = fd;
+
+	/* 640K should be enough. */
+	r = ioctl(fd, KVM_SET_MEMORY_REGION, &low_memory);
+	if (r == -1) {
+		fprintf(stderr, "kvm_create_memory_region: %m\n");
+		return -1;
+	}
+	if (extended_memory.memory_size) {
+		r = ioctl(fd, KVM_SET_MEMORY_REGION, &extended_memory);
+		if (r == -1) {
+			fprintf(stderr, "kvm_create_memory_region: %m\n");
+			return -1;
+		}
+	}
+
+	kvm_memory_region_save_params(kvm, &low_memory);
+	kvm_memory_region_save_params(kvm, &extended_memory);
+
+	*vm_mem = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+	if (*vm_mem == MAP_FAILED) {
+		fprintf(stderr, "mmap: %m\n");
+		return -1;
+	}
+	kvm->physical_memory = *vm_mem;
+
+	r = ioctl(fd, KVM_CREATE_VCPU, 0);
+	if (r == -1) {
+		fprintf(stderr, "kvm_create_vcpu: %m\n");
+		return -1;
+	}
+	kvm->vcpu_fd[0] = r;
+	return 0;
+}
+
+void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start, 
+			  unsigned long len, int slot, int log, int writable)
+{
+	void *ptr;
+	int r;
+	int fd = kvm->vm_fd;
+	int prot = PROT_READ;
+	struct kvm_memory_region memory = {
+		.slot = slot,
+		.memory_size = len,
+		.guest_phys_addr = phys_start,
+		.flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
+	};
+
+	r = ioctl(fd, KVM_SET_MEMORY_REGION, &memory);
+	if (r == -1)
+	    return 0;
+
+	kvm_memory_region_save_params(kvm, &memory);
+
+	if (writable)
+		prot |= PROT_WRITE;
+
+	ptr = mmap(0, len, prot, MAP_SHARED, fd, phys_start);
+	if (ptr == MAP_FAILED)
+		return 0;
+	return ptr;
+}
+
+void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start, 
+			  unsigned long len)
+{
+	//for each memory region in (phys_start, phys_start+len) do
+	//    kvm_memory_region_clear_params(kvm, region);
+	kvm_memory_region_clear_params(kvm, 0); /* avoid compiler warning */
+	printf("kvm_destroy_phys_mem: implement me\n");
+	exit(1);
+}
+
+
+int kvm_get_dirty_pages(kvm_context_t kvm, int slot, void *buf)
+{
+	int r;
+	struct kvm_dirty_log log = {
+		.slot = slot,
+	};
+
+	log.dirty_bitmap = buf;
+
+	r = ioctl(kvm->vm_fd, KVM_GET_DIRTY_LOG, &log);
+	if (r == -1)
+		return -errno;
+	return 0;
+}
+
+static int more_io(struct kvm_run *run, int first_time)
+{
+	if (!run->io.rep)
+		return first_time;
+	else
+		return run->io.count != 0;
+}
+
+static int handle_io(kvm_context_t kvm, struct kvm_run *run, int vcpu)
+{
+	uint16_t addr = run->io.port;
+	struct kvm_regs regs;
+	int first_time = 1;
+	int delta;
+	struct translation_cache tr;
+	int _in = (run->io.direction == KVM_EXIT_IO_IN);
+	int r;
+
+	translation_cache_init(&tr);
+
+	if (run->io.string || _in) {
+		r = ioctl(kvm->vcpu_fd[vcpu], KVM_GET_REGS, &regs);
+		if (r == -1)
+			return -errno;
+	}
+
+	delta = run->io.string_down ? -run->io.size : run->io.size;
+
+	while (more_io(run, first_time)) {
+		void *value_addr;
+
+		if (!run->io.string) {
+			if (_in)
+				value_addr = &regs.rax;
+			else
+				value_addr = &run->io.value;
+		} else {
+			r = translate(kvm, vcpu, &tr, run->io.address, 
+				      &value_addr);
+			if (r) {
+				fprintf(stderr, "failed translating I/O address %llx\n",
+					run->io.address);
+				return r;
+			}
+		}
+
+		switch (run->io.direction) {
+		case KVM_EXIT_IO_IN: {
+			switch (run->io.size) {
+			case 1: {
+				uint8_t value;
+				r = kvm->callbacks->inb(kvm->opaque, addr, &value);
+				*(uint8_t *)value_addr = value;
+				break;
+			}
+			case 2: {
+				uint16_t value;
+				r = kvm->callbacks->inw(kvm->opaque, addr, &value);
+				*(uint16_t *)value_addr = value;
+				break;
+			}
+			case 4: {
+				uint32_t value;
+				r = kvm->callbacks->inl(kvm->opaque, addr, &value);
+				*(uint32_t *)value_addr = value;
+				break;
+			}
+			default:
+				fprintf(stderr, "bad I/O size %d\n", run->io.size);
+				return -EMSGSIZE;
+			}
+			break;
+		}
+		case KVM_EXIT_IO_OUT:
+			switch (run->io.size) {
+			case 1:
+				r = kvm->callbacks->outb(kvm->opaque, addr,
+						     *(uint8_t *)value_addr);
+				break;
+			case 2:
+				r = kvm->callbacks->outw(kvm->opaque, addr,
+						     *(uint16_t *)value_addr);
+				break;
+			case 4:
+				r = kvm->callbacks->outl(kvm->opaque, addr,
+						     *(uint32_t *)value_addr);
+				break;
+			default:
+				fprintf(stderr, "bad I/O size %d\n", run->io.size);
+				return -EMSGSIZE;
+			}
+			break;
+		default:
+			fprintf(stderr, "bad I/O direction %d\n", run->io.direction);
+			return -EPROTO;
+		}
+		if (run->io.string) {
+			run->io.address += delta;
+			switch (run->io.direction) {
+			case KVM_EXIT_IO_IN:  regs.rdi += delta; break;
+			case KVM_EXIT_IO_OUT: regs.rsi += delta; break;
+			}
+			if (run->io.rep) {
+				--regs.rcx;
+				--run->io.count;
+			}
+		}
+		first_time = 0;
+		if (r) {
+			int savedret = r;
+			r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_REGS, &regs);
+			if (r == -1)
+				return -errno;
+
+			return savedret;
+		}
+	}
+
+	if (run->io.string || _in) {
+		r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_REGS, &regs);
+		if (r == -1)
+			return -errno;
+
+	}
+
+	run->emulated = 1;
+	return 0;
+}
+
+int handle_debug(kvm_context_t kvm, struct kvm_run *run, int vcpu)
+{
+	return kvm->callbacks->debug(kvm->opaque, vcpu);
+}
+
+int kvm_get_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
+{
+    return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_REGS, regs);
+}
+
+int kvm_set_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs)
+{
+    return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_REGS, regs);
+}
+
+int kvm_get_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
+{
+    return ioctl(kvm->vcpu_fd[vcpu], KVM_GET_SREGS, sregs);
+}
+
+int kvm_set_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *sregs)
+{
+    return ioctl(kvm->vcpu_fd[vcpu], KVM_SET_SREGS, sregs);
+}
+
+/*
+ * Returns available msr list.  User must free.
+ */
+struct kvm_msr_list *kvm_get_msr_list(kvm_context_t kvm)
+{
+    struct kvm_msr_list sizer, *msrs;
+    int r, e;
+
+    sizer.nmsrs = 0;
+    r = ioctl(kvm->fd, KVM_GET_MSR_INDEX_LIST, &sizer);
+    if (r == -1 && errno != E2BIG)
+	return 0;
+    msrs = malloc(sizeof *msrs + sizer.nmsrs * sizeof *msrs->indices);
+    if (!msrs) {
+	errno = ENOMEM;
+	return 0;
+    }
+    msrs->nmsrs = sizer.nmsrs;
+    r = ioctl(kvm->fd, KVM_GET_MSR_INDEX_LIST, msrs);
+    if (r == -1) {
+	e = errno;
+	free(msrs);
+	errno = e;
+	return 0;
+    }
+    return msrs;
+}
+
+int kvm_get_msrs(kvm_context_t kvm, int vcpu, struct kvm_msr_entry *msrs,
+		 int n)
+{
+    struct kvm_msrs *kmsrs = malloc(sizeof *kmsrs + n * sizeof *msrs);
+    int r, e;
+
+    if (!kmsrs) {
+	errno = ENOMEM;
+	return -1;
+    }
+    kmsrs->nmsrs = n;
+    memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
+    r = ioctl(kvm->vcpu_fd[vcpu], KVM_GET_MSRS, kmsrs);
+    e = errno;
+    memcpy(msrs, kmsrs->entries, n * sizeof *msrs);
+    free(kmsrs);
+    errno = e;
+    return r;
+}
+
+int kvm_set_msrs(kvm_context_t kvm, int vcpu, struct kvm_msr_entry *msrs,
+		 int n)
+{
+    struct kvm_msrs *kmsrs = malloc(sizeof *kmsrs + n * sizeof *msrs);
+    int r, e;
+
+    if (!kmsrs) {
+	errno = ENOMEM;
+	return -1;
+    }
+    kmsrs->nmsrs = n;
+    memcpy(kmsrs->entries, msrs, n * sizeof *msrs);
+    r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_MSRS, kmsrs);
+    e = errno;
+    free(kmsrs);
+    errno = e;
+    return r;
+}
+
+static void print_seg(FILE *file, const char *name, struct kvm_segment *seg)
+{
+    	fprintf(stderr,
+		"%s %04x (%08llx/%08x p %d dpl %d db %d s %d type %x l %d"
+		" g %d avl %d)\n",
+		name, seg->selector, seg->base, seg->limit, seg->present,
+		seg->dpl, seg->db, seg->s, seg->type, seg->l, seg->g,
+		seg->avl);
+}
+
+static void print_dt(FILE *file, const char *name, struct kvm_dtable *dt)
+{
+    	fprintf(stderr, "%s %llx/%x\n", name, dt->base, dt->limit);
+}
+
+void kvm_show_regs(kvm_context_t kvm, int vcpu)
+{
+	int fd = kvm->vcpu_fd[vcpu];
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+	int r;
+
+	r = ioctl(fd, KVM_GET_REGS, &regs);
+	if (r == -1) {
+		perror("KVM_GET_REGS");
+		return;
+	}
+	fprintf(stderr,
+		"rax %016llx rbx %016llx rcx %016llx rdx %016llx\n"
+		"rsi %016llx rdi %016llx rsp %016llx rbp %016llx\n"
+		"r8  %016llx r9  %016llx r10 %016llx r11 %016llx\n"
+		"r12 %016llx r13 %016llx r14 %016llx r15 %016llx\n"
+		"rip %016llx rflags %08llx\n",
+		regs.rax, regs.rbx, regs.rcx, regs.rdx,
+		regs.rsi, regs.rdi, regs.rsp, regs.rbp,
+		regs.r8,  regs.r9,  regs.r10, regs.r11,
+		regs.r12, regs.r13, regs.r14, regs.r15,
+		regs.rip, regs.rflags);
+	r = ioctl(fd, KVM_GET_SREGS, &sregs);
+	if (r == -1) {
+		perror("KVM_GET_SREGS");
+		return;
+	}
+	print_seg(stderr, "cs", &sregs.cs);
+	print_seg(stderr, "ds", &sregs.ds);
+	print_seg(stderr, "es", &sregs.es);
+	print_seg(stderr, "ss", &sregs.ss);
+	print_seg(stderr, "fs", &sregs.fs);
+	print_seg(stderr, "gs", &sregs.gs);
+	print_seg(stderr, "tr", &sregs.tr);
+	print_seg(stderr, "ldt", &sregs.ldt);
+	print_dt(stderr, "gdt", &sregs.gdt);
+	print_dt(stderr, "idt", &sregs.idt);
+	fprintf(stderr, "cr0 %llx cr2 %llx cr3 %llx cr4 %llx cr8 %llx"
+		" efer %llx\n",
+		sregs.cr0, sregs.cr2, sregs.cr3, sregs.cr4, sregs.cr8,
+		sregs.efer);
+}
+
+static int handle_cpuid(kvm_context_t kvm, struct kvm_run *run, int vcpu)
+{
+	struct kvm_regs regs;
+	uint32_t orig_eax;
+	uint64_t rax, rbx, rcx, rdx;
+	int r;
+
+	kvm_get_regs(kvm, vcpu, &regs);
+	orig_eax = regs.rax;
+	rax = regs.rax;
+	rbx = regs.rbx;
+	rcx = regs.rcx;
+	rdx = regs.rdx;
+	r = kvm->callbacks->cpuid(kvm->opaque, &rax, &rbx, &rcx, &rdx);
+	regs.rax = rax;
+	regs.rbx = rbx;
+	regs.rcx = rcx;
+	regs.rdx = rdx;
+	if (orig_eax == 1)
+		regs.rdx &= ~(1ull << 12); /* disable mtrr support */
+	kvm_set_regs(kvm, vcpu, &regs);
+	run->emulated = 1;
+	return r;
+}
+
+static int handle_mmio(kvm_context_t kvm, struct kvm_run *kvm_run)
+{
+	unsigned long addr = kvm_run->mmio.phys_addr;
+	void *data = kvm_run->mmio.data;
+	int r = -1;
+
+	if (kvm_run->mmio.is_write) {
+		switch (kvm_run->mmio.len) {
+		case 1:
+			r = kvm->callbacks->writeb(kvm->opaque, addr, *(uint8_t *)data);
+			break;
+		case 2:
+			r = kvm->callbacks->writew(kvm->opaque, addr, *(uint16_t *)data);
+			break;
+		case 4:
+			r = kvm->callbacks->writel(kvm->opaque, addr, *(uint32_t *)data);
+			break;
+		case 8:
+			r = kvm->callbacks->writeq(kvm->opaque, addr, *(uint64_t *)data);
+			break;
+		}
+	} else {
+		switch (kvm_run->mmio.len) {
+		case 1:
+			r = kvm->callbacks->readb(kvm->opaque, addr, (uint8_t *)data);
+			break;
+		case 2:
+			r = kvm->callbacks->readw(kvm->opaque, addr, (uint16_t *)data);
+			break;
+		case 4:
+			r = kvm->callbacks->readl(kvm->opaque, addr, (uint32_t *)data);
+			break;
+		case 8:
+			r = kvm->callbacks->readq(kvm->opaque, addr, (uint64_t *)data);
+			break;
+		}
+		kvm_run->mmio_completed = 1;
+	}
+	return r;
+}
+
+static int handle_io_window(kvm_context_t kvm, struct kvm_run *kvm_run)
+{
+	return kvm->callbacks->io_window(kvm->opaque);
+}
+
+static int handle_halt(kvm_context_t kvm, struct kvm_run *kvm_run, int vcpu)
+{
+	return kvm->callbacks->halt(kvm->opaque, vcpu);
+}
+
+static int handle_shutdown(kvm_context_t kvm, struct kvm_run *kvm_run,
+			   int vcpu)
+{
+	return kvm->callbacks->shutdown(kvm->opaque, vcpu);
+}
+
+int try_push_interrupts(kvm_context_t kvm)
+{
+	return kvm->callbacks->try_push_interrupts(kvm->opaque);
+}
+
+static void post_kvm_run(kvm_context_t kvm, struct kvm_run *kvm_run)
+{
+	kvm->callbacks->post_kvm_run(kvm->opaque, kvm_run);
+}
+
+static void pre_kvm_run(kvm_context_t kvm, struct kvm_run *kvm_run)
+{
+	kvm->callbacks->pre_kvm_run(kvm->opaque, kvm_run);
+}
+
+int kvm_run(kvm_context_t kvm, int vcpu)
+{
+	int r;
+	int fd = kvm->vcpu_fd[vcpu];
+	struct kvm_run kvm_run = {
+		.emulated = 0,
+		.mmio_completed = 0,
+	};
+
+again:
+	kvm_run.request_interrupt_window = try_push_interrupts(kvm);
+	pre_kvm_run(kvm, &kvm_run);
+	r = ioctl(fd, KVM_RUN, &kvm_run);
+	post_kvm_run(kvm, &kvm_run);
+
+	kvm_run.emulated = 0;
+	kvm_run.mmio_completed = 0;
+	if (r == -1 && errno != EINTR) {
+		r = -errno;
+		printf("kvm_run: %m\n");
+		return r;
+	}
+	if (r == -1) {
+		r = handle_io_window(kvm, &kvm_run);
+		goto more;
+	}
+	switch (kvm_run.exit_type) {
+	case KVM_EXIT_TYPE_FAIL_ENTRY:
+		fprintf(stderr, "kvm_run: failed entry, reason %u\n", 
+			kvm_run.exit_reason & 0xffff);
+		return -ENOEXEC;
+		break;
+	case KVM_EXIT_TYPE_VM_EXIT:
+		switch (kvm_run.exit_reason) {
+		case KVM_EXIT_UNKNOWN:
+			fprintf(stderr, "unhandled vm exit:  0x%x\n", 
+			       kvm_run.hw.hardware_exit_reason);
+			kvm_show_regs(kvm, vcpu);
+			abort();
+			break;
+		case KVM_EXIT_EXCEPTION:
+			fprintf(stderr, "exception %d (%x)\n", 
+			       kvm_run.ex.exception,
+			       kvm_run.ex.error_code);
+			kvm_show_regs(kvm, vcpu);
+			abort();
+			break;
+		case KVM_EXIT_IO:
+			r = handle_io(kvm, &kvm_run, vcpu);
+			break;
+		case KVM_EXIT_CPUID:
+			r = handle_cpuid(kvm, &kvm_run, vcpu);
+			break;
+		case KVM_EXIT_DEBUG:
+			r = handle_debug(kvm, &kvm_run, vcpu);
+			break;
+		case KVM_EXIT_MMIO:
+			r = handle_mmio(kvm, &kvm_run);
+			break;
+		case KVM_EXIT_HLT:
+			r = handle_halt(kvm, &kvm_run, vcpu);
+			break;
+		case KVM_EXIT_IRQ_WINDOW_OPEN:
+			break;
+		case KVM_EXIT_SHUTDOWN:
+			r = handle_shutdown(kvm, &kvm_run, vcpu);
+			break;
+		default:
+			fprintf(stderr, "unhandled vm exit: 0x%x\n", kvm_run.exit_reason);
+			kvm_show_regs(kvm, vcpu);
+			abort();
+			break;
+		}
+	}
+more:
+	if (!r)
+		goto again;
+	return r;
+}
+
+int kvm_inject_irq(kvm_context_t kvm, int vcpu, unsigned irq)
+{
+	struct kvm_interrupt intr;
+
+	intr.irq = irq;
+	return ioctl(kvm->vcpu_fd[vcpu], KVM_INTERRUPT, &intr);
+}
+
+int kvm_guest_debug(kvm_context_t kvm, int vcpu, struct kvm_debug_guest *dbg)
+{
+	return ioctl(kvm->vcpu_fd[vcpu], KVM_DEBUG_GUEST, dbg);
+}
--- qemu-0.9.0/kvmctl.h
+++ qemu-0.9.0/kvmctl.h
@@ -0,0 +1,269 @@
+/** \file kvmctl.h
+ * libkvm API
+ */
+
+#ifndef KVMCTL_H
+#define KVMCTL_H
+
+#define __user /* temporary, until installed via make headers_install */
+#include "kvm.h"
+#include <stdint.h>
+
+struct kvm_context;
+
+typedef struct kvm_context *kvm_context_t;
+
+/*!
+ * \brief KVM callbacks structure
+ *
+ * This structure holds pointers to various functions that KVM will call
+ * when it encounters something that cannot be virtualized, such as
+ * accessing hardware devices via MMIO or regular IO.
+ */
+struct kvm_callbacks {
+    int (*cpuid)(void *opaque, 
+		  uint64_t *rax, uint64_t *rbx, uint64_t *rcx, uint64_t *rdx);
+	/// For 8bit IO reads from the guest (Usually when executing 'inb')
+    int (*inb)(void *opaque, uint16_t addr, uint8_t *data);
+	/// For 16bit IO reads from the guest (Usually when executing 'inw')
+    int (*inw)(void *opaque, uint16_t addr, uint16_t *data);
+	/// For 32bit IO reads from the guest (Usually when executing 'inl')
+    int (*inl)(void *opaque, uint16_t addr, uint32_t *data);
+	/// For 8bit IO writes from the guest (Usually when executing 'outb')
+    int (*outb)(void *opaque, uint16_t addr, uint8_t data);
+	/// For 16bit IO writes from the guest (Usually when executing 'outw')
+    int (*outw)(void *opaque, uint16_t addr, uint16_t data);
+	/// For 32bit IO writes from the guest (Usually when executing 'outl')
+    int (*outl)(void *opaque, uint16_t addr, uint32_t data);
+	/// For 8bit memory reads from unmapped memory (For MMIO devices)
+    int (*readb)(void *opaque, uint64_t addr, uint8_t *data);
+	/// For 16bit memory reads from unmapped memory (For MMIO devices)
+    int (*readw)(void *opaque, uint64_t addr, uint16_t *data);
+	/// For 32bit memory reads from unmapped memory (For MMIO devices)
+    int (*readl)(void *opaque, uint64_t addr, uint32_t *data);
+	/// For 64bit memory reads from unmapped memory (For MMIO devices)
+    int (*readq)(void *opaque, uint64_t addr, uint64_t *data);
+	/// For 8bit memory writes to unmapped memory (For MMIO devices)
+    int (*writeb)(void *opaque, uint64_t addr, uint8_t data);
+	/// For 16bit memory writes to unmapped memory (For MMIO devices)
+    int (*writew)(void *opaque, uint64_t addr, uint16_t data);
+	/// For 32bit memory writes to unmapped memory (For MMIO devices)
+    int (*writel)(void *opaque, uint64_t addr, uint32_t data);
+	/// For 64bit memory writes to unmapped memory (For MMIO devices)
+    int (*writeq)(void *opaque, uint64_t addr, uint64_t data);
+    int (*debug)(void *opaque, int vcpu);
+	/*!
+	 * \brief Called when the VCPU issues an 'hlt' instruction.
+	 *
+	 * Typically, you should yeild here to prevent 100% CPU utilization
+	 * on the host CPU.
+	 */
+    int (*halt)(void *opaque, int vcpu);
+    int (*shutdown)(void *opaque, int vcpu);
+    int (*io_window)(void *opaque);
+    int (*try_push_interrupts)(void *opaque);
+    void (*post_kvm_run)(void *opaque, struct kvm_run *kvm_run);
+    void (*pre_kvm_run)(void *opaque, struct kvm_run *kvm_run);
+};
+
+/*!
+ * \brief Create new KVM context
+ *
+ * This creates a new kvm_context. A KVM context is a small area of data that
+ * holds information about the KVM instance that gets created by this call.\n
+ * This should always be your first call to KVM.
+ *
+ * \param callbacks Pointer to a valid kvm_callbacks structure
+ * \param opaque Not used
+ * \return NULL on failure
+ */
+kvm_context_t kvm_init(struct kvm_callbacks *callbacks,
+		       void *opaque);
+
+/*!
+ * \brief Cleanup the KVM context
+ *
+ * Should always be called when closing down KVM.\n
+ * Exception: If kvm_init() fails, this function should not be called, as the
+ * context would be invalid
+ *
+ * \param kvm Pointer to the kvm_context that is to be freed
+ */
+void kvm_finalize(kvm_context_t kvm);
+
+/*!
+ * \brief Create new virtual machine
+ *
+ * This creates a new virtual machine, maps physical RAM to it, and creates a
+ * virtual CPU for it.\n
+ * \n
+ * Memory gets mapped for addresses 0->0xA0000, 0xC0000->phys_mem_bytes
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param phys_mem_bytes The amount of physical ram you want the VM to have
+ * \param phys_mem This pointer will be set to point to the memory that
+ * kvm_create allocates for physical RAM
+ * \return 0 on success
+ */
+int kvm_create(kvm_context_t kvm,
+	       unsigned long phys_mem_bytes,
+	       void **phys_mem);
+
+/*!
+ * \brief Start the VCPU
+ *
+ * This starts the VCPU and virtualization is started.\n
+ * \n
+ * This function will not return until any of these conditions are met:
+ * - An IO/MMIO handler does not return "0"
+ * - An exception that neither the guest OS, nor KVM can handle occurs
+ *
+ * \note This function will call the callbacks registered in kvm_init()
+ * to emulate those functions
+ * \note If you at any point want to interrupt the VCPU, kvm_run() will
+ * listen to the EINTR signal. This allows you to simulate external interrupts
+ * and asyncronous IO.
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param vcpu Which virtual CPU should be started
+ * \return 0 on success, but you really shouldn't expect this function to
+ * return except for when an error has occured, or when you have sent it
+ * an EINTR signal.
+ */
+int kvm_run(kvm_context_t kvm, int vcpu);
+
+/*!
+ * \brief Read VCPU registers
+ *
+ * This gets the GP registers from the VCPU and outputs them
+ * into a kvm_regs structure
+ *
+ * \note This function returns a \b copy of the VCPUs registers.\n
+ * If you wish to modify the VCPUs GP registers, you should call kvm_set_regs()
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param vcpu Which virtual CPU should get dumped
+ * \param regs Pointer to a kvm_regs which will be populated with the VCPUs
+ * registers values
+ * \return 0 on success
+ */
+int kvm_get_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs);
+
+/*!
+ * \brief Write VCPU registers
+ *
+ * This sets the GP registers on the VCPU from a kvm_regs structure
+ *
+ * \note When this function returns, the regs pointer and the data it points to
+ * can be discarded
+ * \param kvm Pointer to the current kvm_context
+ * \param vcpu Which virtual CPU should get dumped
+ * \param regs Pointer to a kvm_regs which will be populated with the VCPUs
+ * registers values
+ * \return 0 on success
+ */
+int kvm_set_regs(kvm_context_t kvm, int vcpu, struct kvm_regs *regs);
+
+/*!
+ * \brief Read VCPU system registers
+ *
+ * This gets the non-GP registers from the VCPU and outputs them
+ * into a kvm_sregs structure
+ *
+ * \note This function returns a \b copy of the VCPUs registers.\n
+ * If you wish to modify the VCPUs non-GP registers, you should call
+ * kvm_set_sregs()
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param vcpu Which virtual CPU should get dumped
+ * \param regs Pointer to a kvm_sregs which will be populated with the VCPUs
+ * registers values
+ * \return 0 on success
+ */
+int kvm_get_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *regs);
+
+/*!
+ * \brief Write VCPU system registers
+ *
+ * This sets the non-GP registers on the VCPU from a kvm_sregs structure
+ *
+ * \note When this function returns, the regs pointer and the data it points to
+ * can be discarded
+ * \param kvm Pointer to the current kvm_context
+ * \param vcpu Which virtual CPU should get dumped
+ * \param regs Pointer to a kvm_sregs which will be populated with the VCPUs
+ * registers values
+ * \return 0 on success
+ */
+int kvm_set_sregs(kvm_context_t kvm, int vcpu, struct kvm_sregs *regs);
+
+struct kvm_msr_list *kvm_get_msr_list(kvm_context_t);
+int kvm_get_msrs(kvm_context_t, int vcpu, struct kvm_msr_entry *msrs, int n);
+int kvm_set_msrs(kvm_context_t, int vcpu, struct kvm_msr_entry *msrs, int n);
+
+/*!
+ * \brief Simulate an external vectored interrupt
+ *
+ * This allows you to simulate an external vectored interrupt.
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param vcpu Which virtual CPU should get dumped
+ * \param irq Vector number
+ * \return 0 on success
+ */
+int kvm_inject_irq(kvm_context_t kvm, int vcpu, unsigned irq);
+int kvm_guest_debug(kvm_context_t, int vcpu, struct kvm_debug_guest *dbg);
+
+/*!
+ * \brief Dump all VCPU information
+ *
+ * This dumps \b all the information that KVM has about a virtual CPU, namely:
+ * - GP Registers
+ * - System registers (selectors, descriptors, etc)
+ * - VMCS Data
+ * - MSRS
+ * - Pending interrupts
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param vcpu Which virtual CPU should get dumped
+ * \return 0 on success
+ */
+int kvm_dump_vcpu(kvm_context_t kvm, int vcpu);
+
+/*!
+ * \brief Dump VCPU registers
+ *
+ * This dumps some of the information that KVM has about a virtual CPU, namely:
+ * - GP Registers
+ *
+ * A much more verbose version of this is available as kvm_dump_vcpu()
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param vcpu Which virtual CPU should get dumped
+ * \return 0 on success
+ */
+void kvm_show_regs(kvm_context_t kvm, int vcpu);
+
+void *kvm_create_phys_mem(kvm_context_t, unsigned long phys_start, 
+			  unsigned long len, int slot, int log, int writable);
+void kvm_destroy_phys_mem(kvm_context_t, unsigned long phys_start, 
+			  unsigned long len);
+int kvm_get_dirty_pages(kvm_context_t, int slot, void *buf);
+
+/*!
+ * \brief Enable dirty-pages-logging for all memory regions
+ *
+ * \param kvm Pointer to the current kvm_context
+ */
+int kvm_dirty_pages_log_enable_all(kvm_context_t kvm);
+
+/*!
+ * \brief Disable dirty-page-logging for some memory regions
+ *
+ * Disable dirty-pages-logging for those memory regions that were
+ * created with dirty-page-logging disabled.
+ *
+ * \param kvm Pointer to the current kvm_context
+ */
+int kvm_dirty_pages_log_reset(kvm_context_t kvm);
+#endif
--- qemu-0.9.0/migration.c
+++ qemu-0.9.0/migration.c
@@ -24,6 +24,9 @@
 
 #include "vl.h"
 #include "qemu_socket.h"
+#ifdef USE_KVM
+#include "qemu-kvm.h"
+#endif
 
 #include <sys/wait.h>
 
@@ -172,6 +175,10 @@
     int dirty_count = 0;
 
     for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
+#ifdef USE_KVM
+        if (kvm_allowed && (addr>=0xa0000) && (addr<0xc0000)) /* do not access video-addresses */
+            continue;
+#endif
 	if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
 	    dirty_count++;
     }
@@ -186,6 +193,11 @@
     if (migrate_write_buffer(s))
 	return;
 
+#ifdef USE_KVM
+    if (kvm_allowed && !*s->has_error)
+        *s->has_error = kvm_update_dirty_pages_log();
+#endif
+
     if (migrate_check_convergence(s) || *s->has_error) {
 	qemu_del_timer(s->timer);
 	qemu_free_timer(s->timer);
@@ -195,6 +207,11 @@
     }	
 
     while (s->addr < phys_ram_size) {
+#ifdef USE_KVM
+        if (kvm_allowed && (s->addr>=0xa0000) && (s->addr<0xc0000)) /* do not access video-addresses */
+            s->addr = 0xc0000;
+#endif
+
 	if (cpu_physical_memory_get_dirty(s->addr, MIGRATION_DIRTY_FLAG)) {
 	    uint32_t value = cpu_to_be32(s->addr);
 
@@ -254,6 +271,10 @@
     fcntl(s->fd, F_SETFL, O_NONBLOCK);
 
     for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
+#ifdef USE_KVM
+        if (kvm_allowed && (addr>=0xa0000) && (addr<0xc0000)) /* do not access video-addresses */
+            continue;
+#endif
 	if (!cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG))
 	    cpu_physical_memory_set_dirty(addr);
     }
@@ -723,6 +744,10 @@
     unsigned int sum;
 
     for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
+#ifdef USE_KVM
+        if (kvm_allowed && (addr>=0xa0000) && (addr<0xc0000)) /* do not access video-addresses */
+            continue;
+#endif
         sum = calc_page_checksum(addr);
         qemu_put_be32(f, addr);
         qemu_put_be32(f, sum);
@@ -737,6 +762,10 @@
     int num_errors = 0;
     
     for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
+#ifdef USE_KVM
+        if (kvm_allowed && (addr>=0xa0000) && (addr<0xc0000)) /* do not access video-addresses */
+            continue;
+#endif
         sum = calc_page_checksum(addr);
         raddr = qemu_get_be32(f);
         rsum  = qemu_get_be32(f);
--- qemu-0.9.0/qemu-kvm.c
+++ qemu-0.9.0/qemu-kvm.c
@@ -0,0 +1,793 @@
+
+#include "config.h"
+#include "config-host.h"
+
+#ifdef USE_KVM
+
+#include "exec.h"
+
+#include "qemu-kvm.h"
+#include <kvmctl.h>
+#include <string.h>
+
+#define MSR_IA32_TSC		0x10
+
+extern void perror(const char *s);
+
+int kvm_allowed = 1;
+kvm_context_t kvm_context;
+static struct kvm_msr_list *kvm_msr_list;
+static int kvm_has_msr_star;
+
+#define NR_CPU 16
+static CPUState *saved_env[NR_CPU];
+
+static void set_msr_entry(struct kvm_msr_entry *entry, uint32_t index, 
+                          uint64_t data)
+{
+    entry->index = index;
+    entry->data  = data;
+}
+
+/* returns 0 on success, non-0 on failure */
+static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env)
+{
+        switch (entry->index) {
+        case MSR_IA32_SYSENTER_CS:  
+            env->sysenter_cs  = entry->data;
+            break;
+        case MSR_IA32_SYSENTER_ESP:
+            env->sysenter_esp = entry->data;
+            break;
+        case MSR_IA32_SYSENTER_EIP:
+            env->sysenter_eip = entry->data;
+            break;
+        case MSR_STAR:
+            env->star         = entry->data;
+            break;
+#ifdef TARGET_X86_64
+        case MSR_CSTAR:
+            env->cstar        = entry->data;
+            break;
+        case MSR_KERNELGSBASE:
+            env->kernelgsbase = entry->data;
+            break;
+        case MSR_FMASK:
+            env->fmask        = entry->data;
+            break;
+        case MSR_LSTAR:
+            env->lstar        = entry->data;
+            break;
+#endif
+        case MSR_IA32_TSC:
+            env->tsc          = entry->data;
+            break;
+        default:
+            printf("Warning unknown msr index 0x%x\n", entry->index);
+            return 1;
+        }
+        return 0;
+}
+
+#ifdef TARGET_X86_64
+#define MSR_COUNT 9
+#else
+#define MSR_COUNT 5
+#endif
+
+static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
+{
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->type = 3;
+    lhs->present = 1;
+    lhs->dpl = 3;
+    lhs->db = 0;
+    lhs->s = 1;
+    lhs->l = 0;
+    lhs->g = 0;
+    lhs->avl = 0;
+    lhs->unusable = 0;
+}
+
+static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
+{
+    unsigned flags = rhs->flags;
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
+    lhs->present = (flags & DESC_P_MASK) != 0;
+    lhs->dpl = rhs->selector & 3;
+    lhs->db = (flags >> DESC_B_SHIFT) & 1;
+    lhs->s = (flags & DESC_S_MASK) != 0;
+    lhs->l = (flags >> DESC_L_SHIFT) & 1;
+    lhs->g = (flags & DESC_G_MASK) != 0;
+    lhs->avl = (flags & DESC_AVL_MASK) != 0;
+    lhs->unusable = 0;
+}
+
+static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
+{
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->flags =
+	(rhs->type << DESC_TYPE_SHIFT)
+	| (rhs->present * DESC_P_MASK)
+	| (rhs->dpl << DESC_DPL_SHIFT)
+	| (rhs->db << DESC_B_SHIFT)
+	| (rhs->s * DESC_S_MASK)
+	| (rhs->l << DESC_L_SHIFT)
+	| (rhs->g * DESC_G_MASK)
+	| (rhs->avl * DESC_AVL_MASK);
+}
+
+/* the reset values of qemu are not compatible to SVM
+ * this function is used to fix the segment descriptor values */
+static void fix_realmode_dataseg(struct kvm_segment *seg)
+{
+	seg->type = 0x02;
+	seg->present = 1;
+	seg->s = 1;
+}
+
+static void load_regs(CPUState *env)
+{
+    struct kvm_regs regs;
+    struct kvm_sregs sregs;
+    struct kvm_msr_entry msrs[MSR_COUNT];
+    int rc, n;
+
+    /* hack: save env */
+    if (!saved_env[0])
+	saved_env[0] = env;
+
+    regs.rax = env->regs[R_EAX];
+    regs.rbx = env->regs[R_EBX];
+    regs.rcx = env->regs[R_ECX];
+    regs.rdx = env->regs[R_EDX];
+    regs.rsi = env->regs[R_ESI];
+    regs.rdi = env->regs[R_EDI];
+    regs.rsp = env->regs[R_ESP];
+    regs.rbp = env->regs[R_EBP];
+#ifdef TARGET_X86_64
+    regs.r8 = env->regs[8];
+    regs.r9 = env->regs[9];
+    regs.r10 = env->regs[10];
+    regs.r11 = env->regs[11];
+    regs.r12 = env->regs[12];
+    regs.r13 = env->regs[13];
+    regs.r14 = env->regs[14];
+    regs.r15 = env->regs[15];
+#endif
+    
+    regs.rflags = env->eflags;
+    regs.rip = env->eip;
+
+    kvm_set_regs(kvm_context, 0, &regs);
+
+    memcpy(sregs.interrupt_bitmap, env->kvm_interrupt_bitmap, sizeof(sregs.interrupt_bitmap));
+
+    if ((env->eflags & VM_MASK)) {
+	    set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
+	    set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
+	    set_v8086_seg(&sregs.es, &env->segs[R_ES]);
+	    set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
+	    set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
+	    set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
+    } else {
+	    set_seg(&sregs.cs, &env->segs[R_CS]);
+	    set_seg(&sregs.ds, &env->segs[R_DS]);
+	    set_seg(&sregs.es, &env->segs[R_ES]);
+	    set_seg(&sregs.fs, &env->segs[R_FS]);
+	    set_seg(&sregs.gs, &env->segs[R_GS]);
+	    set_seg(&sregs.ss, &env->segs[R_SS]);
+
+	    if (env->cr[0] & CR0_PE_MASK) {
+		/* force ss cpl to cs cpl */
+		sregs.ss.selector = (sregs.ss.selector & ~3) | 
+			(sregs.cs.selector & 3);
+		sregs.ss.dpl = sregs.ss.selector & 3;
+	    }
+
+	    if (!(env->cr[0] & CR0_PG_MASK)) {
+		    fix_realmode_dataseg(&sregs.ds);
+		    fix_realmode_dataseg(&sregs.es);
+		    fix_realmode_dataseg(&sregs.fs);
+		    fix_realmode_dataseg(&sregs.gs);
+		    fix_realmode_dataseg(&sregs.ss);
+	    }
+    }
+
+    set_seg(&sregs.tr, &env->tr);
+    set_seg(&sregs.ldt, &env->ldt);
+
+    sregs.idt.limit = env->idt.limit;
+    sregs.idt.base = env->idt.base;
+    sregs.gdt.limit = env->gdt.limit;
+    sregs.gdt.base = env->gdt.base;
+
+    sregs.cr0 = env->cr[0];
+    sregs.cr2 = env->cr[2];
+    sregs.cr3 = env->cr[3];
+    sregs.cr4 = env->cr[4];
+
+    sregs.apic_base = cpu_get_apic_base(env);
+    sregs.efer = env->efer;
+    sregs.cr8 = cpu_get_apic_tpr(env);
+
+    kvm_set_sregs(kvm_context, 0, &sregs);
+
+    /* msrs */
+    n = 0;
+    set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_CS,  env->sysenter_cs);
+    set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
+    set_msr_entry(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
+    if (kvm_has_msr_star)
+	set_msr_entry(&msrs[n++], MSR_STAR,              env->star);
+    set_msr_entry(&msrs[n++], MSR_IA32_TSC, env->tsc);
+#ifdef TARGET_X86_64
+    set_msr_entry(&msrs[n++], MSR_CSTAR,             env->cstar);
+    set_msr_entry(&msrs[n++], MSR_KERNELGSBASE,      env->kernelgsbase);
+    set_msr_entry(&msrs[n++], MSR_FMASK,             env->fmask);
+    set_msr_entry(&msrs[n++], MSR_LSTAR  ,           env->lstar);
+#endif
+
+    rc = kvm_set_msrs(kvm_context, 0, msrs, n);
+    if (rc == -1)
+        perror("kvm_set_msrs FAILED");
+}
+
+
+static void save_regs(CPUState *env)
+{
+    struct kvm_regs regs;
+    struct kvm_sregs sregs;
+    struct kvm_msr_entry msrs[MSR_COUNT];
+    uint32_t hflags;
+    uint32_t i, n, rc;
+
+    kvm_get_regs(kvm_context, 0, &regs);
+
+    env->regs[R_EAX] = regs.rax;
+    env->regs[R_EBX] = regs.rbx;
+    env->regs[R_ECX] = regs.rcx;
+    env->regs[R_EDX] = regs.rdx;
+    env->regs[R_ESI] = regs.rsi;
+    env->regs[R_EDI] = regs.rdi;
+    env->regs[R_ESP] = regs.rsp;
+    env->regs[R_EBP] = regs.rbp;
+#ifdef TARGET_X86_64
+    env->regs[8] = regs.r8;
+    env->regs[9] = regs.r9;
+    env->regs[10] = regs.r10;
+    env->regs[11] = regs.r11;
+    env->regs[12] = regs.r12;
+    env->regs[13] = regs.r13;
+    env->regs[14] = regs.r14;
+    env->regs[15] = regs.r15;
+#endif
+
+    env->eflags = regs.rflags;
+    env->eip = regs.rip;
+
+    kvm_get_sregs(kvm_context, 0, &sregs);
+
+    memcpy(env->kvm_interrupt_bitmap, sregs.interrupt_bitmap, sizeof(env->kvm_interrupt_bitmap));
+
+    get_seg(&env->segs[R_CS], &sregs.cs);
+    get_seg(&env->segs[R_DS], &sregs.ds);
+    get_seg(&env->segs[R_ES], &sregs.es);
+    get_seg(&env->segs[R_FS], &sregs.fs);
+    get_seg(&env->segs[R_GS], &sregs.gs);
+    get_seg(&env->segs[R_SS], &sregs.ss);
+
+    get_seg(&env->tr, &sregs.tr);
+    get_seg(&env->ldt, &sregs.ldt);
+    
+    env->idt.limit = sregs.idt.limit;
+    env->idt.base = sregs.idt.base;
+    env->gdt.limit = sregs.gdt.limit;
+    env->gdt.base = sregs.gdt.base;
+
+    env->cr[0] = sregs.cr0;
+    env->cr[2] = sregs.cr2;
+    env->cr[3] = sregs.cr3;
+    env->cr[4] = sregs.cr4;
+
+    cpu_set_apic_base(env, sregs.apic_base);
+
+    env->efer = sregs.efer;
+    cpu_set_apic_tpr(env, sregs.cr8);
+
+#define HFLAG_COPY_MASK ~( \
+			HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
+			HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
+			HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
+			HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
+
+
+
+    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+    hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
+    hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & 
+	    (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
+    hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK)); 
+    hflags |= (env->cr[4] & CR4_OSFXSR_MASK) << 
+	    (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
+
+    if (env->efer & MSR_EFER_LMA) {
+        hflags |= HF_LMA_MASK;
+    }
+
+    if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
+        hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
+    } else {
+        hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >> 
+		(DESC_B_SHIFT - HF_CS32_SHIFT);
+        hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >> 
+		(DESC_B_SHIFT - HF_SS32_SHIFT);
+        if (!(env->cr[0] & CR0_PE_MASK) || 
+                   (env->eflags & VM_MASK) ||
+                   !(hflags & HF_CS32_MASK)) {
+                hflags |= HF_ADDSEG_MASK;
+            } else {
+                hflags |= ((env->segs[R_DS].base | 
+                                env->segs[R_ES].base |
+                                env->segs[R_SS].base) != 0) << 
+                    HF_ADDSEG_SHIFT;
+            }
+    }
+    env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
+    CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    DF = 1 - (2 * ((env->eflags >> 10) & 1));
+    CC_OP = CC_OP_EFLAGS;
+    env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+
+    tlb_flush(env, 1);
+
+    /* msrs */    
+    n = 0;
+    msrs[n++].index = MSR_IA32_SYSENTER_CS;
+    msrs[n++].index = MSR_IA32_SYSENTER_ESP;
+    msrs[n++].index = MSR_IA32_SYSENTER_EIP;
+    if (kvm_has_msr_star)
+	msrs[n++].index = MSR_STAR;
+    msrs[n++].index = MSR_IA32_TSC;
+#ifdef TARGET_X86_64
+    msrs[n++].index = MSR_CSTAR;
+    msrs[n++].index = MSR_KERNELGSBASE;
+    msrs[n++].index = MSR_FMASK;
+    msrs[n++].index = MSR_LSTAR;
+#endif
+    rc = kvm_get_msrs(kvm_context, 0, msrs, n);
+    if (rc == -1) {
+        perror("kvm_get_msrs FAILED");
+    }
+    else {
+        n = rc; /* actual number of MSRs */
+        for (i=0 ; i<n; i++) {
+            if (get_msr_entry(&msrs[i], env))
+                return;
+        }
+    }
+}
+
+#include <signal.h>
+
+
+static int try_push_interrupts(void *opaque)
+{
+    CPUState **envs = opaque, *env;
+    env = envs[0];
+
+    if (env->ready_for_interrupt_injection &&
+        (env->interrupt_request & CPU_INTERRUPT_HARD) &&
+        (env->eflags & IF_MASK)) {
+            env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+            // for now using cpu 0
+            kvm_inject_irq(kvm_context, 0, cpu_get_pic_interrupt(env));
+    }
+
+    return (env->interrupt_request & CPU_INTERRUPT_HARD) != 0;
+}
+
+static void post_kvm_run(void *opaque, struct kvm_run *kvm_run)
+{
+    CPUState **envs = opaque, *env;
+    env = envs[0];
+
+    env->eflags = (kvm_run->if_flag) ? env->eflags | IF_MASK:env->eflags & ~IF_MASK;
+    env->ready_for_interrupt_injection = kvm_run->ready_for_interrupt_injection;
+    cpu_set_apic_tpr(env, kvm_run->cr8);
+    cpu_set_apic_base(env, kvm_run->apic_base);
+}
+
+static void pre_kvm_run(void *opaque, struct kvm_run *kvm_run)
+{
+    CPUState **envs = opaque, *env;
+    env = envs[0];
+
+    kvm_run->cr8 = cpu_get_apic_tpr(env);
+}
+
+void kvm_load_registers(CPUState *env)
+{
+    load_regs(env);
+}
+
+void kvm_save_registers(CPUState *env)
+{
+    save_regs(env);
+}
+
+int kvm_cpu_exec(CPUState *env)
+{
+    int r;
+    int pending = (!env->ready_for_interrupt_injection ||
+                   ((env->interrupt_request & CPU_INTERRUPT_HARD) &&
+		   (env->eflags & IF_MASK)));
+
+    if (!pending && (env->interrupt_request & CPU_INTERRUPT_EXIT)) {
+        env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
+        env->exception_index = EXCP_INTERRUPT;
+        cpu_loop_exit();
+    }
+
+    
+    if (!saved_env[0])
+	saved_env[0] = env;
+
+    r = kvm_run(kvm_context, 0);
+    if (r < 0) {
+        printf("kvm_run returned %d\n", r);
+        exit(1);
+    }
+
+    return 0;
+}
+
+
+static int kvm_cpuid(void *opaque, uint64_t *rax, uint64_t *rbx, 
+		      uint64_t *rcx, uint64_t *rdx)
+{
+    CPUState **envs = opaque;
+    CPUState *saved_env;
+    uint32_t eax = *rax;
+
+    saved_env = env;
+    env = envs[0];
+
+    env->regs[R_EAX] = *rax;
+    env->regs[R_EBX] = *rbx;
+    env->regs[R_ECX] = *rcx;
+    env->regs[R_EDX] = *rdx;
+    helper_cpuid();
+    *rdx = env->regs[R_EDX];
+    *rcx = env->regs[R_ECX];
+    *rbx = env->regs[R_EBX];
+    *rax = env->regs[R_EAX];
+    // don't report long mode/syscall/nx if no native support
+    if (eax == 0x80000001) {
+	unsigned long h_eax = eax, h_edx;
+
+
+	// push/pop hack to workaround gcc 3 register pressure trouble
+	asm (
+#ifdef __x86_64__
+	     "push %%rbx; push %%rcx; cpuid; pop %%rcx; pop %%rbx"
+#else
+	     "push %%ebx; push %%ecx; cpuid; pop %%ecx; pop %%ebx"
+#endif
+	     : "+a"(h_eax), "=d"(h_edx));
+
+	// long mode
+	if ((h_edx & 0x20000000) == 0)
+	    *rdx &= ~0x20000000ull;
+	// syscall
+	if ((h_edx & 0x00000800) == 0)
+	    *rdx &= ~0x00000800ull;
+	// nx
+	if ((h_edx & 0x00100000) == 0)
+	    *rdx &= ~0x00100000ull;
+    }
+    env = saved_env;
+    return 0;
+}
+
+static int kvm_debug(void *opaque, int vcpu)
+{
+    CPUState **envs = opaque;
+
+    env = envs[0];
+    env->exception_index = EXCP_DEBUG;
+    return 1;
+}
+
+static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
+{
+    *data = cpu_inb(0, addr);
+    return 0;
+}
+
+static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
+{
+    *data = cpu_inw(0, addr);
+    return 0;
+}
+
+static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
+{
+    *data = cpu_inl(0, addr);
+    return 0;
+}
+
+static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
+{
+    if (addr == 0xb2 && data == 0) {
+	struct kvm_regs regs;
+
+	kvm_get_regs(kvm_context, 0, &regs);
+
+	/* hack around smm entry: kvm doesn't emulate smm at this time */
+	if (regs.rip == 0x409f4)
+	    regs.rip += 0x4b;
+	kvm_set_regs(kvm_context, 0, &regs);
+	
+	return 0;
+    }
+    cpu_outb(0, addr, data);
+    return 0;
+}
+
+static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
+{
+    cpu_outw(0, addr, data);
+    return 0;
+}
+
+static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
+{
+    cpu_outl(0, addr, data);
+    return 0;
+}
+
+static int kvm_readb(void *opaque, uint64_t addr, uint8_t *data)
+{
+    *data = ldub_phys(addr);
+    return 0;
+}
+ 
+static int kvm_readw(void *opaque, uint64_t addr, uint16_t *data)
+{
+    *data = lduw_phys(addr);
+    return 0;
+}
+
+static int kvm_readl(void *opaque, uint64_t addr, uint32_t *data)
+{
+    *data = ldl_phys(addr);
+    return 0;
+}
+
+static int kvm_readq(void *opaque, uint64_t addr, uint64_t *data)
+{
+    *data = ldq_phys(addr);
+    return 0;
+}
+
+static int kvm_writeb(void *opaque, uint64_t addr, uint8_t data)
+{
+    stb_phys(addr, data);
+    return 0;
+}
+
+static int kvm_writew(void *opaque, uint64_t addr, uint16_t data)
+{
+    stw_phys(addr, data);
+    return 0;
+}
+
+static int kvm_writel(void *opaque, uint64_t addr, uint32_t data)
+{
+    stl_phys(addr, data);
+    return 0;
+}
+
+static int kvm_writeq(void *opaque, uint64_t addr, uint64_t data)
+{
+    stq_phys(addr, data);
+    return 0;
+}
+
+static int kvm_io_window(void *opaque)
+{
+    return 1;
+}
+
+ 
+static int kvm_halt(void *opaque, int vcpu)
+{
+    CPUState **envs = opaque, *env;
+
+    env = envs[0];
+    if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
+	  (env->eflags & IF_MASK))) {
+	    env->hflags |= HF_HALTED_MASK;
+	    env->exception_index = EXCP_HLT;
+    }
+
+    return 1;
+}
+
+static int kvm_shutdown(void *opaque, int vcpu)
+{
+    qemu_system_reset_request();
+    return 1;
+}
+ 
+static struct kvm_callbacks qemu_kvm_ops = {
+    .cpuid = kvm_cpuid,
+    .debug = kvm_debug,
+    .inb   = kvm_inb,
+    .inw   = kvm_inw,
+    .inl   = kvm_inl,
+    .outb  = kvm_outb,
+    .outw  = kvm_outw,
+    .outl  = kvm_outl,
+    .readb = kvm_readb,
+    .readw = kvm_readw,
+    .readl = kvm_readl,
+    .readq = kvm_readq,
+    .writeb = kvm_writeb,
+    .writew = kvm_writew,
+    .writel = kvm_writel,
+    .writeq = kvm_writeq,
+    .halt  = kvm_halt,
+    .shutdown = kvm_shutdown,
+    .io_window = kvm_io_window,
+    .try_push_interrupts = try_push_interrupts,
+    .post_kvm_run = post_kvm_run,
+    .pre_kvm_run = pre_kvm_run,
+};
+
+int kvm_qemu_init()
+{
+    /* Try to initialize kvm */
+    kvm_context = kvm_init(&qemu_kvm_ops, saved_env);
+    if (!kvm_context) {
+      	return -1;
+    }
+
+    return 0;
+}
+
+int kvm_qemu_create_context(void)
+{
+    int i;
+
+    if (kvm_create(kvm_context, phys_ram_size, (void**)&phys_ram_base) < 0) {
+	kvm_qemu_destroy();
+	return -1;
+    }
+    kvm_msr_list = kvm_get_msr_list(kvm_context);
+    if (!kvm_msr_list) {
+	kvm_qemu_destroy();
+	return -1;
+    }
+    for (i = 0; i < kvm_msr_list->nmsrs; ++i)
+	if (kvm_msr_list->indices[i] == MSR_STAR)
+	    kvm_has_msr_star = 1;
+    return 0;
+}
+
+void kvm_qemu_destroy(void)
+{
+    kvm_finalize(kvm_context);
+}
+
+int kvm_update_debugger(CPUState *env)
+{
+    struct kvm_debug_guest dbg;
+    int i;
+
+    dbg.enabled = 0;
+    if (env->nb_breakpoints || env->singlestep_enabled) {
+	dbg.enabled = 1;
+	for (i = 0; i < 4 && i < env->nb_breakpoints; ++i) {
+	    dbg.breakpoints[i].enabled = 1;
+	    dbg.breakpoints[i].address = env->breakpoints[i];
+	}
+	dbg.singlestep = env->singlestep_enabled;
+    }
+    return kvm_guest_debug(kvm_context, 0, &dbg);
+}
+
+
+/*
+ * dirty pages logging
+ */
+/* FIXME: use unsigned long pointer instead of unsigned char */
+unsigned char *kvm_dirty_bitmap = NULL;
+int kvm_physical_memory_set_dirty_tracking(int enable)
+{
+    int r = 0;
+
+    if (!kvm_allowed)
+        return 0;
+
+    if (enable) {
+        if (!kvm_dirty_bitmap) {
+            unsigned bitmap_size = BITMAP_SIZE(phys_ram_size);
+            kvm_dirty_bitmap = qemu_malloc(bitmap_size);
+            if (kvm_dirty_bitmap == NULL) {
+                perror("Failed to allocate dirty pages bitmap");
+                r=-1;
+            }
+            else {
+                r = kvm_dirty_pages_log_enable_all(kvm_context);
+            }
+        }
+    }
+    else {
+        if (kvm_dirty_bitmap) {
+            r = kvm_dirty_pages_log_reset(kvm_context);
+            qemu_free(kvm_dirty_bitmap);
+            kvm_dirty_bitmap = NULL;
+        }
+    }
+    return r;
+}
+
+/* get kvm's dirty pages bitmap and update qemu's */
+int kvm_get_dirty_pages_log_slot(int slot, 
+                                 unsigned char *bitmap,
+                                 unsigned int offset,
+                                 unsigned int len)
+{
+    int r;
+    unsigned int i, j, n=0;
+    unsigned char c;
+    unsigned page_number, addr, addr1;
+
+    memset(bitmap, 0, len);
+    r = kvm_get_dirty_pages(kvm_context, slot, bitmap);
+    if (r)
+        return r;
+
+    /* 
+     * bitmap-traveling is faster than memory-traveling (for addr...) 
+     * especially when most of the memory is not dirty.
+     */
+    for (i=0; i<len; i++) {
+        c = bitmap[i];
+        while (c>0) {
+            j = ffsl(c) - 1;
+            c &= ~(1u<<j);
+            page_number = i * 8 + j;
+            addr1 = page_number * TARGET_PAGE_SIZE;
+            addr  = offset + addr1;
+            cpu_physical_memory_set_dirty(addr);
+            n++;
+        }
+    }
+    return 0;
+}
+
+/* 
+ * get kvm's dirty pages bitmap and update qemu's
+ * we only care about physical ram, which resides in slots 0 and 3
+ */
+int kvm_update_dirty_pages_log(void)
+{
+    int r = 0, len;
+
+    len = BITMAP_SIZE(0xa0000);
+    r =      kvm_get_dirty_pages_log_slot(3, kvm_dirty_bitmap, 0      , len);
+    len = BITMAP_SIZE(phys_ram_size - 0xc0000);
+    r = r || kvm_get_dirty_pages_log_slot(0, kvm_dirty_bitmap, 0xc0000, len);
+    return r;
+}
+#endif
--- qemu-0.9.0/qemu-kvm.h
+++ qemu-0.9.0/qemu-kvm.h
@@ -0,0 +1,19 @@
+#ifndef QEMU_KVM_H
+#define QEMU_KVM_H
+
+#include "kvmctl.h"
+
+int kvm_qemu_init(void);
+int kvm_qemu_create_context(void);
+void kvm_qemu_destroy(void);
+void kvm_load_registers(CPUState *env);
+void kvm_save_registers(CPUState *env);
+int kvm_cpu_exec(CPUState *env);
+int kvm_update_debugger(CPUState *env);
+
+int kvm_physical_memory_set_dirty_tracking(int enable);
+int kvm_update_dirty_pages_log(void);
+
+#define ALIGN(x, y)  (((x)+(y)-1) & ~((y)-1))
+#define BITMAP_SIZE(m) (ALIGN(((m)>>TARGET_PAGE_BITS), HOST_LONG_BITS) / 8)
+#endif
--- qemu-0.9.0/sdl.c
+++ qemu-0.9.0/sdl.c
@@ -214,6 +214,11 @@
 {
     char buf[1024];
     strcpy(buf, "QEMU");
+#if USE_KVM
+    if (kvm_allowed) {
+        strcat(buf, "/KVM");
+    }
+#endif
     if (!vm_running) {
         strcat(buf, " [Stopped]");
     }
--- qemu-0.9.0/target-i386/cpu.h
+++ qemu-0.9.0/target-i386/cpu.h
@@ -161,12 +161,17 @@
 #define HF_MP_MASK           (1 << HF_MP_SHIFT)
 #define HF_EM_MASK           (1 << HF_EM_SHIFT)
 #define HF_TS_MASK           (1 << HF_TS_SHIFT)
+#define HF_IOPL_MASK         (3 << HF_IOPL_SHIFT)
 #define HF_LMA_MASK          (1 << HF_LMA_SHIFT)
 #define HF_CS64_MASK         (1 << HF_CS64_SHIFT)
 #define HF_OSFXSR_MASK       (1 << HF_OSFXSR_SHIFT)
+#define HF_VM_MASK           (1 << HF_VM_SHIFT)
 #define HF_HALTED_MASK       (1 << HF_HALTED_SHIFT)
 #define HF_SMM_MASK          (1 << HF_SMM_SHIFT)
 
+#define CR0_PE_SHIFT 0
+#define CR0_MP_SHIFT 1
+
 #define CR0_PE_MASK  (1 << 0)
 #define CR0_MP_MASK  (1 << 1)
 #define CR0_EM_MASK  (1 << 2)
@@ -185,7 +190,8 @@
 #define CR4_PAE_MASK  (1 << 5)
 #define CR4_PGE_MASK  (1 << 7)
 #define CR4_PCE_MASK  (1 << 8)
-#define CR4_OSFXSR_MASK (1 << 9)
+#define CR4_OSFXSR_SHIFT 9
+#define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT)
 #define CR4_OSXMMEXCPT_MASK  (1 << 10)
 
 #define PG_PRESENT_BIT	0
@@ -496,6 +502,10 @@
     target_ulong kernelgsbase;
 #endif
 
+#ifdef USE_KVM
+    uint64_t tsc; /* time stamp counter */
+    uint8_t ready_for_interrupt_injection;
+#endif
     uint64_t pat;
 
     /* temporary data for USE_CODE_COPY mode */
@@ -534,6 +544,13 @@
     int kqemu_enabled;
     int last_io_time;
 #endif
+
+#ifdef USE_KVM
+#define BITS_PER_LONG (8 * sizeof (long))
+#define NR_IRQ_WORDS (256/ BITS_PER_LONG)
+    unsigned long kvm_interrupt_bitmap[NR_IRQ_WORDS];
+#endif
+
     /* in order to simplify APIC support, we leave this pointer to the
        user */
     struct APICState *apic_state;
--- qemu-0.9.0/target-i386/helper.c
+++ qemu-0.9.0/target-i386/helper.c
@@ -18,7 +18,9 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 #include "exec.h"
-
+#ifdef USE_KVM
+extern int kvm_allowed;
+#endif
 //#define DEBUG_PCALL
 
 #if 0
@@ -839,6 +841,13 @@
     uint32_t e1, e2, e3, ss;
     target_ulong old_eip, esp, offset;
 
+#ifdef USE_KVM
+    if (kvm_allowed) {
+	    printf("%s: unexpect\n", __FUNCTION__);
+	    exit(-1);
+    }
+#endif
+
     has_error_code = 0;
     if (!is_int && !is_hw) {
         switch(intno) {
@@ -1122,6 +1131,12 @@
     int dpl, cpl;
     uint32_t e2;
 
+#ifdef USE_KVM
+    if (kvm_allowed) {
+	    printf("%s: unexpect\n", __FUNCTION__);
+	    exit(-1);
+    }
+#endif
     dt = &env->idt;
     ptr = dt->base + (intno * 8);
     e2 = ldl_kernel(ptr + 4);
@@ -1147,6 +1162,12 @@
 void do_interrupt(int intno, int is_int, int error_code, 
                   target_ulong next_eip, int is_hw)
 {
+#ifdef USE_KVM
+    if (kvm_allowed) {
+	printf("%s: unexpect\n", __FUNCTION__);
+	exit(-1);
+    }
+#endif
     if (loglevel & CPU_LOG_INT) {
         if ((env->cr[0] & CR0_PE_MASK)) {
             static int count;
@@ -1958,6 +1979,12 @@
         cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
                        get_seg_base(e1, e2), limit, e2);
         EIP = new_eip;
+#ifdef USE_KVM
+        if (kvm_allowed && (e2 & DESC_L_MASK)) {
+            env->exception_index = -1;
+            cpu_loop_exit();   
+        }       
+#endif
     } else {
         /* jump to call or task gate */
         dpl = (e2 >> DESC_DPL_SHIFT) & 3;
--- qemu-0.9.0/target-i386/helper2.c
+++ qemu-0.9.0/target-i386/helper2.c
@@ -143,6 +143,9 @@
 #ifdef USE_KQEMU
     kqemu_init(env);
 #endif
+#ifdef USE_KVM
+    env->ready_for_interrupt_injection = 1;
+#endif
     return env;
 }
 
--- qemu-0.9.0/vl.c
+++ qemu-0.9.0/vl.c
@@ -88,6 +88,10 @@
 
 #include "exec-all.h"
 
+#if USE_KVM
+#include "qemu-kvm.h"
+#endif
+
 #define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
 #ifdef __sun__
 #define SMBD_COMMAND "/usr/sfw/sbin/smbd"
@@ -149,6 +153,9 @@
 int graphic_depth = 15;
 int full_screen = 0;
 int no_quit = 0;
+#ifdef USE_KVM
+CharDriverState *vmchannel_hds[MAX_VMCHANNEL_DEVICES];
+#endif
 CharDriverState *serial_hds[MAX_SERIAL_PORTS];
 CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
 #ifdef TARGET_I386
@@ -5407,6 +5414,15 @@
     /* XXX: compute hflags from scratch, except for CPL and IIF */
     env->hflags = hflags;
     tlb_flush(env, 1);
+#ifdef USE_KVM
+    if (kvm_allowed) {
+        for (i = 0; i < NR_IRQ_WORDS ; i++) {
+            qemu_get_betls(f, &env->kvm_interrupt_bitmap[i]);
+        }
+        qemu_get_be64s(f, &env->tsc);
+        kvm_load_registers(env);
+    }
+#endif
     return 0;
 }
 
@@ -5555,6 +5571,10 @@
     if (qemu_get_be32(f) != phys_ram_size)
         return -EINVAL;
     for(i = 0; i < phys_ram_size; i+= TARGET_PAGE_SIZE) {
+#ifdef USE_KVM
+        if (kvm_allowed && (i>=0xa0000) && (i<0xc0000)) /* do not access video-addresses */
+            continue;
+#endif
         ret = ram_get_page(f, phys_ram_base + i, TARGET_PAGE_SIZE);
         if (ret)
             return ret;
@@ -5689,6 +5709,10 @@
     target_ulong addr;
 
     for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) {
+#ifdef USE_KVM
+        if (kvm_allowed && (addr>=0xa0000) && (addr<0xc0000)) /* do not access video-addresses */
+            continue;
+#endif
 	if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG)) {
 	    qemu_put_be32(f, addr);
 	    qemu_put_buffer(f, phys_ram_base + addr, TARGET_PAGE_SIZE);
@@ -6237,6 +6261,10 @@
             if (reset_requested) {
                 reset_requested = 0;
                 qemu_system_reset();
+#ifdef USE_KVM
+		if (kvm_allowed)
+			kvm_load_registers(env);
+#endif
                 ret = EXCP_INTERRUPT;
             }
             if (powerdown_requested) {
@@ -6354,6 +6382,9 @@
            "\n"
            "Debug/Expert options:\n"
            "-monitor dev    redirect the monitor to char device 'dev'\n"
+#ifdef USE_KVM
+           "-vmchannel di:DI,dev  redirect the hypercall device with device id DI, to char device 'dev'\n"
+#endif
            "-serial dev     redirect the serial port to char device 'dev'\n"
            "-parallel dev   redirect the parallel port to char device 'dev'\n"
            "-pidfile file   Write PID to 'file'\n"
@@ -6368,6 +6399,9 @@
            "-kernel-kqemu   enable KQEMU full virtualization (default is user mode only)\n"
            "-no-kqemu       disable KQEMU kernel module usage\n"
 #endif
+#ifdef USE_KVM
+	   "-no-kvm         disable KVM hardware virtualization\n"
+#endif
 #ifdef USE_CODE_COPY
            "-no-code-copy   disable code copy acceleration\n"
 #endif
@@ -6448,6 +6482,9 @@
     QEMU_OPTION_g,
     QEMU_OPTION_std_vga,
     QEMU_OPTION_monitor,
+#ifdef USE_KVM
+    QEMU_OPTION_vmchannel,
+#endif
     QEMU_OPTION_serial,
     QEMU_OPTION_parallel,
     QEMU_OPTION_loadvm,
@@ -6462,6 +6499,7 @@
     QEMU_OPTION_smp,
     QEMU_OPTION_vnc,
     QEMU_OPTION_no_acpi,
+    QEMU_OPTION_no_kvm,
     QEMU_OPTION_no_reboot,
     QEMU_OPTION_daemonize,
     QEMU_OPTION_option_rom,
@@ -6524,12 +6562,18 @@
     { "no-kqemu", 0, QEMU_OPTION_no_kqemu },
     { "kernel-kqemu", 0, QEMU_OPTION_kernel_kqemu },
 #endif
+#ifdef USE_KVM
+    { "no-kvm", 0, QEMU_OPTION_no_kvm },
+#endif
 #if defined(TARGET_PPC) || defined(TARGET_SPARC)
     { "g", 1, QEMU_OPTION_g },
 #endif
     { "localtime", 0, QEMU_OPTION_localtime },
     { "std-vga", 0, QEMU_OPTION_std_vga },
     { "monitor", 1, QEMU_OPTION_monitor },
+#ifdef USE_KVM
+    { "vmchannel", 1, QEMU_OPTION_vmchannel },
+#endif
     { "serial", 1, QEMU_OPTION_serial },
     { "parallel", 1, QEMU_OPTION_parallel },
     { "loadvm", HAS_ARG, QEMU_OPTION_loadvm },
@@ -6787,6 +6831,10 @@
     const char *r, *optarg;
     CharDriverState *monitor_hd;
     char monitor_device[128];
+#ifdef USE_KVM
+    char vmchannel_devices[MAX_VMCHANNEL_DEVICES][128];
+    int vmchannel_device_index;
+#endif
     char serial_devices[MAX_SERIAL_PORTS][128];
     int serial_device_index;
     char parallel_devices[MAX_PARALLEL_PORTS][128];
@@ -6858,6 +6906,12 @@
     translation = BIOS_ATA_TRANSLATION_AUTO;
     pstrcpy(monitor_device, sizeof(monitor_device), "vc");
 
+#ifdef USE_KVM
+    for(i = 0; i < MAX_VMCHANNEL_DEVICES; i++)
+        vmchannel_devices[i][0] = '\0';
+    vmchannel_device_index = 0;
+#endif
+
     pstrcpy(serial_devices[0], sizeof(serial_devices[0]), "vc");
     for(i = 1; i < MAX_SERIAL_PORTS; i++)
         serial_devices[i][0] = '\0';
@@ -7145,6 +7199,17 @@
             case QEMU_OPTION_monitor:
                 pstrcpy(monitor_device, sizeof(monitor_device), optarg);
                 break;
+#ifdef USE_KVM
+            case QEMU_OPTION_vmchannel:
+                if (vmchannel_device_index >= MAX_VMCHANNEL_DEVICES) {
+                    fprintf(stderr, "qemu: too many vmchannel devices\n");
+                    exit(1);
+                }
+                pstrcpy(vmchannel_devices[vmchannel_device_index], 
+                        sizeof(vmchannel_devices[0]), optarg);
+                vmchannel_device_index++;
+                break;
+#endif
             case QEMU_OPTION_serial:
                 if (serial_device_index >= MAX_SERIAL_PORTS) {
                     fprintf(stderr, "qemu: too many serial ports\n");
@@ -7193,6 +7258,11 @@
                 kqemu_allowed = 2;
                 break;
 #endif
+#ifdef USE_KVM
+	    case QEMU_OPTION_no_kvm:
+		kvm_allowed = 0;
+		break;
+#endif
             case QEMU_OPTION_usb:
                 usb_enabled = 1;
                 break;
@@ -7283,6 +7353,15 @@
     }
 #endif
 
+#if USE_KVM
+    if (kvm_allowed) {
+	if (kvm_qemu_init() < 0) {
+	    fprintf(stderr, "Could not initialize KVM, will disable KVM support\n");
+	    kvm_allowed = 0;
+	}
+    }
+#endif
+
 #ifdef USE_KQEMU
     if (smp_cpus > 1)
         kqemu_allowed = 0;
@@ -7362,11 +7441,28 @@
 	phys_ram_size += ret;
     }
 
+#if USE_KVM
+    /* Initialize kvm */
+    if (kvm_allowed) {
+	    phys_ram_size += KVM_EXTRA_PAGES * 4096;
+	    if (kvm_qemu_create_context() < 0) {
+		    fprintf(stderr, "Could not create KVM context\n");
+		    exit(1);
+	    }
+    } else {
+	    phys_ram_base = qemu_vmalloc(phys_ram_size);
+	    if (!phys_ram_base) {
+		    fprintf(stderr, "Could not allocate physical memory\n");
+		    exit(1);
+	    }
+    }
+#else
     phys_ram_base = qemu_vmalloc(phys_ram_size);
     if (!phys_ram_base) {
         fprintf(stderr, "Could not allocate physical memory\n");
         exit(1);
     }
+#endif
 
     /* we always create the cdrom drive, even if no disk is there */
     bdrv_init();
@@ -7445,6 +7541,33 @@
     }
     monitor_init(monitor_hd, !nographic);
 
+#ifdef USE_KVM
+    for(i = 0; i < MAX_VMCHANNEL_DEVICES; i++) {
+        const char *devname = vmchannel_devices[i];
+        if (devname[0] != '\0' && strcmp(devname, "none")) {
+            int devid;
+            char *termn;
+
+            if (strstart(devname, "di:", &devname)) {
+                devid = strtol(devname, &termn, 16);
+                devname = termn + 1;
+            }
+            else {
+                fprintf(stderr, "qemu: could not find vmchannel device id '%s'\n", 
+                        devname);
+                exit(1);
+            }
+            vmchannel_hds[i] = qemu_chr_open(devname);
+            if (!vmchannel_hds[i]) {
+                fprintf(stderr, "qemu: could not open vmchannel device '%s'\n", 
+                        devname);
+                exit(1);
+            }
+            vmchannel_init(vmchannel_hds[i], devid, i);
+        }
+    }
+#endif
+
     for(i = 0; i < MAX_SERIAL_PORTS; i++) {
         const char *devname = serial_devices[i];
         if (devname[0] != '\0' && strcmp(devname, "none")) {
--- qemu-0.9.0/vl.h
+++ qemu-0.9.0/vl.h
@@ -157,6 +157,7 @@
 extern int graphic_depth;
 extern const char *keyboard_layout;
 extern int kqemu_allowed;
+extern int kvm_allowed;
 extern int win2k_install_hack;
 extern int usb_enabled;
 extern int smp_cpus;
@@ -177,6 +178,10 @@
 #define BIOS_SIZE ((256 + 64) * 1024)
 #endif
 
+#if USE_KVM
+#define KVM_EXTRA_PAGES 3
+#endif
+
 /* keyboard/mouse support */
 
 #define MOUSE_EVENT_LBUTTON 0x01
@@ -342,6 +347,10 @@
 CharDriverState *text_console_init(DisplayState *ds);
 void console_select(unsigned int index);
 
+/* vmchannel devices */
+
+#define MAX_VMCHANNEL_DEVICES 4
+
 /* serial ports */
 
 #define MAX_SERIAL_PORTS 4
@@ -1220,6 +1229,11 @@
 
 typedef struct ADBDevice ADBDevice;
 
+/* hypercall.c */
+
+void pci_hypercall_init(PCIBus *bus);
+void vmchannel_init(CharDriverState *hd, uint32_t deviceid, uint32_t index);
+
 /* buf = NULL means polling */
 typedef int ADBDeviceRequest(ADBDevice *d, uint8_t *buf_out,
                               const uint8_t *buf, int len);
openSUSE Build Service is sponsored by