File 47e5b5ae-lxc-keep-caps.patch of Package libvirt.11695

From 47e5b5ae3262f140955abd57bbb13337c65a3497 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Bosdonnat?= <cbosdonnat@suse.com>
Date: Fri, 18 Jul 2014 10:02:29 +0200
Subject: [PATCH] lxc: allow to keep or drop capabilities

Added <capabilities> in the <features> section of LXC domains
configuration. This section can contain elements named after the
capabilities like:

  <mknod state="on"/>, keep CAP_MKNOD capability
  <sys_chroot state="off"/> drop CAP_SYS_CHROOT capability

Users can restrict or give more capabilities than the default using
this mechanism.

Squashed commits 5acbb8f9, 0e6cacc4 and 251d75a8 into this one.
---
 docs/drvlxc.html.in                             |  47 +++++
 docs/schemas/domaincommon.rng                   | 207 ++++++++++++++++++++
 src/conf/domain_conf.c                          | 126 ++++++++++++-
 src/conf/domain_conf.h                          |  56 ++++++
 src/libvirt_private.syms                        |   3 +
 src/lxc/lxc_cgroup.c                            |   8 +
 src/lxc/lxc_container.c                         | 241 ++++++++++++++++++++++--
 src/util/vircgroup.c                            |  57 +++++-
 src/util/vircgroup.h                            |   2 +
 tests/domainschemadata/domain-caps-features.xml |  28 +++
 10 files changed, 755 insertions(+), 20 deletions(-)
 create mode 100644 tests/domainschemadata/domain-caps-features.xml

Index: libvirt-1.2.5/docs/drvlxc.html.in
===================================================================
--- libvirt-1.2.5.orig/docs/drvlxc.html.in
+++ libvirt-1.2.5/docs/drvlxc.html.in
@@ -540,6 +540,53 @@ debootstrap, whatever) under /opt/vm-1-r
 &lt;/domain&gt;
 </pre>
 
+<h2><a name="capabilities">Altering the available capabilities</a></h2>
+
+<p>
+By default the libvirt LXC driver drops some capabilities among which CAP_MKNOD.
+However <span class="since">since 1.2.6</span> libvirt can be told to keep or
+drop some capabilities using a domain configuration like the following:
+</p>
+<pre>
+...
+&lt;features&gt;
+  &lt;capabilities policy='default'&gt;
+    &lt;mknod state='on'/&gt;
+    &lt;sys_chroot state='off'/&gt;
+  &lt;/capabilities&gt;
+&lt;/features&gt;
+...
+</pre>
+<p>
+The capabilities children elements are named after the capabilities as defined in
+<code>man 7 capabilities</code>. An <code>off</code> state tells libvirt to drop the
+capability, while an <code>on</code> state will force to keep the capability even though
+this one is dropped by default.
+</p>
+<p>
+The <code>policy</code> attribute can be one of <code>default</code>, <code>allow</code>
+or <code>deny</code>. It defines the default rules for capabilities: either keep the
+default behavior that is dropping a few selected capabilities, or keep all capabilities
+or drop all capabilities. The interest of <code>allow</code> and <code>deny</code> is that
+they guarantee that all capabilities will be kept (or removed) even if new ones are added
+later.
+</p>
+<p>
+The following example, drops all capabilities but CAP_MKNOD:
+</p>
+<pre>
+...
+&lt;features&gt;
+  &lt;capabilities policy='deny'&gt;
+    &lt;mknod state='on'/&gt;
+  &lt;/capabilities&gt;
+&lt;/features&gt;
+...
+</pre>
+<p>
+Note that allowing capabilities that are normally dropped by default can seriously
+affect the security of the container and the host.
+</p>
 
 <h2><a name="usage">Container usage / management</a></h2>
 
Index: libvirt-1.2.5/docs/schemas/domaincommon.rng
===================================================================
--- libvirt-1.2.5.orig/docs/schemas/domaincommon.rng
+++ libvirt-1.2.5/docs/schemas/domaincommon.rng
@@ -3744,6 +3744,9 @@
               <empty/>
             </element>
           </optional>
+          <optional>
+            <ref name="capabilities"/>
+          </optional>
         </interleave>
       </element>
     </optional>
@@ -4290,6 +4293,200 @@
     </element>
   </define>
 
+  <!-- Optional capabilities features -->
+  <define name="capabilities">
+    <element name="capabilities">
+      <ref name="capabilitiespolicy"/>
+      <interleave>
+        <optional>
+          <element name="audit_control">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="audit_write">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="block_suspend">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="chown">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="dac_override">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="dac_read_search">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="fowner">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="fsetid">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="ipc_lock">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="ipc_owner">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="kill">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="lease">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="linux_immutable">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="mac_admin">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="mac_override">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="mknod">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="net_admin">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="net_bind_service">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="net_broadcast">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="net_raw">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="setgid">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="setfcap">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="setpcap">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="setuid">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="sys_admin">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="sys_boot">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="sys_chroot">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="sys_module">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="sys_nice">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="sys_pacct">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="sys_ptrace">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="sys_rawio">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="sys_resource">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="sys_time">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="sys_tty_config">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="syslog">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+        <optional>
+          <element name="wake_alarm">
+            <ref name="featurestate"/>
+          </element>
+        </optional>
+      </interleave>
+    </element>
+  </define>
+
   <define name="featurestate">
     <attribute name="state">
       <choice>
@@ -4298,6 +4495,16 @@
       </choice>
     </attribute>
   </define>
+
+  <define name="capabilitiespolicy">
+    <attribute name="policy">
+      <choice>
+        <value>default</value>
+        <value>allow</value>
+        <value>deny</value>
+      </choice>
+    </attribute>
+  </define>
 
   <!--
        Optional hypervisor extensions in their own namespace:
Index: libvirt-1.2.5/src/conf/domain_conf.c
===================================================================
--- libvirt-1.2.5.orig/src/conf/domain_conf.c
+++ libvirt-1.2.5/src/conf/domain_conf.c
@@ -147,18 +147,63 @@ VIR_ENUM_IMPL(virDomainFeature, VIR_DOMA
               "viridian",
               "privnet",
               "hyperv",
-              "pvspinlock")
+              "pvspinlock",
+              "capabilities")
 
 VIR_ENUM_IMPL(virDomainFeatureState, VIR_DOMAIN_FEATURE_STATE_LAST,
               "default",
               "on",
               "off")
 
+VIR_ENUM_IMPL(virDomainCapabilitiesPolicy, VIR_DOMAIN_CAPABILITIES_POLICY_LAST,
+              "default",
+              "allow",
+              "deny")
+
 VIR_ENUM_IMPL(virDomainHyperv, VIR_DOMAIN_HYPERV_LAST,
               "relaxed",
               "vapic",
               "spinlocks")
 
+VIR_ENUM_IMPL(virDomainCapsFeature, VIR_DOMAIN_CAPS_FEATURE_LAST,
+              "audit_control",
+              "audit_write",
+              "block_suspend",
+              "chown",
+              "dac_override",
+              "dac_read_search",
+              "fowner",
+              "fsetid",
+              "ipc_lock",
+              "ipc_owner",
+              "kill",
+              "lease",
+              "linux_immutable",
+              "mac_admin",
+              "mac_override",
+              "mknod",
+              "net_admin",
+              "net_bind_service",
+              "net_broadcast",
+              "net_raw",
+              "setgid",
+              "setfcap",
+              "setpcap",
+              "setuid",
+              "sys_admin",
+              "sys_boot",
+              "sys_chroot",
+              "sys_module",
+              "sys_nice",
+              "sys_pacct",
+              "sys_ptrace",
+              "sys_rawio",
+              "sys_resource",
+              "sys_time",
+              "sys_tty_config",
+              "syslog",
+              "wake_alarm")
+
 VIR_ENUM_IMPL(virDomainLifecycle, VIR_DOMAIN_LIFECYCLE_LAST,
               "destroy",
               "restart",
@@ -11835,6 +11880,22 @@ virDomainDefParseXML(xmlDocPtr xml,
             def->features[val] = VIR_DOMAIN_FEATURE_STATE_ON;
             break;
 
+        case VIR_DOMAIN_FEATURE_CAPABILITIES:
+            node = ctxt->node;
+            ctxt->node = nodes[i];
+            if ((tmp = virXPathString("string(./@policy)", ctxt))) {
+                if ((def->features[val] = virDomainCapabilitiesPolicyTypeFromString(tmp)) == -1) {
+                    virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                                   _("unknown state attribute '%s' of feature '%s'"),
+                                   tmp, virDomainFeatureTypeToString(val));
+                    goto error;
+                }
+                VIR_FREE(tmp);
+            } else {
+                def->features[val] = VIR_DOMAIN_FEATURE_STATE_DEFAULT;
+            }
+            ctxt->node = node;
+            break;
         case VIR_DOMAIN_FEATURE_PVSPINLOCK:
             node = ctxt->node;
             ctxt->node = nodes[i];
@@ -11943,6 +12004,37 @@ virDomainDefParseXML(xmlDocPtr xml,
         ctxt->node = node;
     }
 
+    if ((n = virXPathNodeSet("./features/capabilities/*", ctxt, &nodes)) < 0)
+        goto error;
+
+    for (i = 0; i < n; i++) {
+        int val = virDomainCapsFeatureTypeFromString((const char *)nodes[i]->name);
+        if (val < 0) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                           _("unexpected capability feature '%s'"), nodes[i]->name);
+            goto error;
+        }
+
+        if (val >= 0 && val < VIR_DOMAIN_CAPS_FEATURE_LAST) {
+            node = ctxt->node;
+            ctxt->node = nodes[i];
+
+            if ((tmp = virXPathString("string(./@state)", ctxt))) {
+                if ((def->caps_features[val] = virDomainFeatureStateTypeFromString(tmp)) == -1) {
+                    virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                                   _("unknown state attribute '%s' of feature capability '%s'"),
+                                   tmp, virDomainFeatureTypeToString(val));
+                    goto error;
+                }
+                VIR_FREE(tmp);
+            } else {
+                def->caps_features[val] = VIR_DOMAIN_FEATURE_STATE_ON;
+            }
+            ctxt->node = node;
+        }
+    }
+    VIR_FREE(nodes);
+
     if (virDomainEventActionParseXML(ctxt, "on_reboot",
                                      "string(./on_reboot[1])",
                                      &def->onReboot,
@@ -17125,6 +17217,19 @@ verify(((VIR_DOMAIN_XML_INTERNAL_STATUS
          VIR_DOMAIN_XML_INTERNAL_CLOCK_ADJUST)
         & DUMPXML_FLAGS) == 0);
 
+static bool
+virDomainDefHasCapabilitiesFeatures(virDomainDefPtr def)
+{
+    size_t i;
+
+    for (i = 0; i < VIR_DOMAIN_CAPS_FEATURE_LAST; i++) {
+        if (def->caps_features[i] != VIR_DOMAIN_FEATURE_STATE_DEFAULT)
+            return true;
+    }
+
+    return false;
+}
+
 /* This internal version can accept VIR_DOMAIN_XML_INTERNAL_*,
  * whereas the public version cannot.  Also, it appends to an existing
  * buffer (possibly with auto-indent), rather than flattening to string.
@@ -17550,7 +17655,8 @@ virDomainDefFormatInternal(virDomainDefP
             break;
     }
 
-    if (i != VIR_DOMAIN_FEATURE_LAST) {
+    if (i != VIR_DOMAIN_FEATURE_LAST ||
+        virDomainDefHasCapabilitiesFeatures(def)) {
         virBufferAddLit(buf, "<features>\n");
         virBufferAdjustIndent(buf, 2);
 
@@ -17655,6 +17761,25 @@ virDomainDefFormatInternal(virDomainDefP
                 virBufferAddLit(buf, "</hyperv>\n");
                 break;
 
+            case VIR_DOMAIN_FEATURE_CAPABILITIES:
+                if (def->features[i] == VIR_DOMAIN_CAPABILITIES_POLICY_DEFAULT &&
+                        !virDomainDefHasCapabilitiesFeatures(def))
+                    break;
+
+                virBufferAsprintf(buf, "<capabilities policy='%s'>\n",
+                                  virDomainCapabilitiesPolicyTypeToString(def->features[i]));
+                virBufferAdjustIndent(buf, 2);
+                for (j = 0; j < VIR_DOMAIN_CAPS_FEATURE_LAST; j++) {
+                    if (def->caps_features[j] != VIR_DOMAIN_FEATURE_STATE_DEFAULT)
+                        virBufferAsprintf(buf, "<%s state='%s'/>\n",
+                                          virDomainCapsFeatureTypeToString(j),
+                                          virDomainFeatureStateTypeToString(
+                                              def->caps_features[j]));
+                }
+                virBufferAdjustIndent(buf, -2);
+                virBufferAddLit(buf, "</capabilities>\n");
+                break;
+
             case VIR_DOMAIN_FEATURE_LAST:
                 break;
             }
Index: libvirt-1.2.5/src/conf/domain_conf.h
===================================================================
--- libvirt-1.2.5.orig/src/conf/domain_conf.h
+++ libvirt-1.2.5/src/conf/domain_conf.h
@@ -1527,6 +1527,7 @@ enum virDomainFeature {
     VIR_DOMAIN_FEATURE_PRIVNET,
     VIR_DOMAIN_FEATURE_HYPERV,
     VIR_DOMAIN_FEATURE_PVSPINLOCK,
+    VIR_DOMAIN_FEATURE_CAPABILITIES,
 
     VIR_DOMAIN_FEATURE_LAST
 };
@@ -1547,6 +1548,56 @@ enum virDomainHyperv {
     VIR_DOMAIN_HYPERV_LAST
 };
 
+typedef enum {
+    VIR_DOMAIN_CAPABILITIES_POLICY_DEFAULT = 0,
+    VIR_DOMAIN_CAPABILITIES_POLICY_ALLOW,
+    VIR_DOMAIN_CAPABILITIES_POLICY_DENY,
+
+    VIR_DOMAIN_CAPABILITIES_POLICY_LAST
+} virDomainCapabilitiesPolicy;
+
+/* The capabilities are ordered alphabetically to help check for new ones */
+typedef enum {
+    VIR_DOMAIN_CAPS_FEATURE_AUDIT_CONTROL = 0,
+    VIR_DOMAIN_CAPS_FEATURE_AUDIT_WRITE,
+    VIR_DOMAIN_CAPS_FEATURE_BLOCK_SUSPEND,
+    VIR_DOMAIN_CAPS_FEATURE_CHOWN,
+    VIR_DOMAIN_CAPS_FEATURE_DAC_OVERRIDE,
+    VIR_DOMAIN_CAPS_FEATURE_DAC_READ_SEARCH,
+    VIR_DOMAIN_CAPS_FEATURE_FOWNER,
+    VIR_DOMAIN_CAPS_FEATURE_FSETID,
+    VIR_DOMAIN_CAPS_FEATURE_IPC_LOCK,
+    VIR_DOMAIN_CAPS_FEATURE_IPC_OWNER,
+    VIR_DOMAIN_CAPS_FEATURE_KILL,
+    VIR_DOMAIN_CAPS_FEATURE_LEASE,
+    VIR_DOMAIN_CAPS_FEATURE_LINUX_IMMUTABLE,
+    VIR_DOMAIN_CAPS_FEATURE_MAC_ADMIN,
+    VIR_DOMAIN_CAPS_FEATURE_MAC_OVERRIDE,
+    VIR_DOMAIN_CAPS_FEATURE_MKNOD,
+    VIR_DOMAIN_CAPS_FEATURE_NET_ADMIN,
+    VIR_DOMAIN_CAPS_FEATURE_NET_BIND_SERVICE,
+    VIR_DOMAIN_CAPS_FEATURE_NET_BROADCAST,
+    VIR_DOMAIN_CAPS_FEATURE_NET_RAW,
+    VIR_DOMAIN_CAPS_FEATURE_SETGID,
+    VIR_DOMAIN_CAPS_FEATURE_SETFCAP,
+    VIR_DOMAIN_CAPS_FEATURE_SETPCAP,
+    VIR_DOMAIN_CAPS_FEATURE_SETUID,
+    VIR_DOMAIN_CAPS_FEATURE_SYS_ADMIN,
+    VIR_DOMAIN_CAPS_FEATURE_SYS_BOOT,
+    VIR_DOMAIN_CAPS_FEATURE_SYS_CHROOT,
+    VIR_DOMAIN_CAPS_FEATURE_SYS_MODULE,
+    VIR_DOMAIN_CAPS_FEATURE_SYS_NICE,
+    VIR_DOMAIN_CAPS_FEATURE_SYS_PACCT,
+    VIR_DOMAIN_CAPS_FEATURE_SYS_PTRACE,
+    VIR_DOMAIN_CAPS_FEATURE_SYS_RAWIO,
+    VIR_DOMAIN_CAPS_FEATURE_SYS_RESOURCE,
+    VIR_DOMAIN_CAPS_FEATURE_SYS_TIME,
+    VIR_DOMAIN_CAPS_FEATURE_SYS_TTY_CONFIG,
+    VIR_DOMAIN_CAPS_FEATURE_SYSLOG,
+    VIR_DOMAIN_CAPS_FEATURE_WAKE_ALARM,
+    VIR_DOMAIN_CAPS_FEATURE_LAST
+} virDomainCapsFeature;
+
 enum virDomainLifecycleAction {
     VIR_DOMAIN_LIFECYCLE_DESTROY,
     VIR_DOMAIN_LIFECYCLE_RESTART,
@@ -1916,6 +1967,9 @@ struct _virDomainDef {
     int hyperv_features[VIR_DOMAIN_HYPERV_LAST];
     unsigned int hyperv_spinlocks;
 
+    /* This options are of type virDomainFeatureState: ON = keep, OFF = drop */
+    int caps_features[VIR_DOMAIN_CAPS_FEATURE_LAST];
+
     virDomainClockDef clock;
 
     size_t ngraphics;
@@ -2535,6 +2589,8 @@ VIR_ENUM_DECL(virDomainBoot)
 VIR_ENUM_DECL(virDomainBootMenu)
 VIR_ENUM_DECL(virDomainFeature)
 VIR_ENUM_DECL(virDomainFeatureState)
+VIR_ENUM_DECL(virDomainCapabilitiesPolicy)
+VIR_ENUM_DECL(virDomainCapsFeature)
 VIR_ENUM_DECL(virDomainLifecycle)
 VIR_ENUM_DECL(virDomainLifecycleCrash)
 VIR_ENUM_DECL(virDomainPMState)
Index: libvirt-1.2.5/src/libvirt_private.syms
===================================================================
--- libvirt-1.2.5.orig/src/libvirt_private.syms
+++ libvirt-1.2.5/src/libvirt_private.syms
@@ -129,6 +129,8 @@ virDomainBlockedReasonTypeFromString;
 virDomainBlockedReasonTypeToString;
 virDomainBootMenuTypeFromString;
 virDomainBootMenuTypeToString;
+virDomainCapabilitiesPolicyTypeToString;
+virDomainCapsFeatureTypeToString;
 virDomainChrConsoleTargetTypeFromString;
 virDomainChrConsoleTargetTypeToString;
 virDomainChrDefForeach;
@@ -1013,6 +1015,7 @@ virBufferVasprintf;
 # util/vircgroup.h
 virCgroupAddTask;
 virCgroupAddTaskController;
+virCgroupAllowAllDevices;
 virCgroupAllowDevice;
 virCgroupAllowDeviceMajor;
 virCgroupAllowDevicePath;
Index: libvirt-1.2.5/src/lxc/lxc_cgroup.c
===================================================================
--- libvirt-1.2.5.orig/src/lxc/lxc_cgroup.c
+++ libvirt-1.2.5/src/lxc/lxc_cgroup.c
@@ -351,6 +351,7 @@ virLXCTeardownHostUSBDeviceCgroup(virUSB
 static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
                                       virCgroupPtr cgroup)
 {
+    int capMknod = def->caps_features[VIR_DOMAIN_CAPS_FEATURE_MKNOD];
     int ret = -1;
     size_t i;
     static virLXCCgroupDevicePolicy devices[] = {
@@ -367,6 +368,13 @@ static int virLXCCgroupSetupDeviceACL(vi
     if (virCgroupDenyAllDevices(cgroup) < 0)
         goto cleanup;
 
+    /* white list mknod if CAP_MKNOD has to be kept */
+    if (capMknod == VIR_DOMAIN_FEATURE_STATE_ON) {
+        if (virCgroupAllowAllDevices(cgroup,
+                                    VIR_CGROUP_DEVICE_MKNOD) < 0)
+            goto cleanup;
+    }
+
     for (i = 0; devices[i].type != 0; i++) {
         virLXCCgroupDevicePolicyPtr dev = &devices[i];
         if (virCgroupAllowDevice(cgroup,
Index: libvirt-1.2.5/src/lxc/lxc_container.c
===================================================================
--- libvirt-1.2.5.orig/src/lxc/lxc_container.c
+++ libvirt-1.2.5/src/lxc/lxc_container.c
@@ -1787,25 +1787,233 @@ static int lxcContainerResolveAllSymlink
  * host system, since they are not currently "containerized"
  */
 #if WITH_CAPNG
-static int lxcContainerDropCapabilities(bool keepReboot)
+
+/* Define capabilities to -1 if those aren't defined in the kernel:
+ * this will help us ignore them. */
+# ifndef CAP_AUDIT_CONTROL
+#  define CAP_AUDIT_CONTROL -1
+# endif
+# ifndef CAP_AUDIT_WRITE
+#  define CAP_AUDIT_WRITE -1
+# endif
+# ifndef CAP_BLOCK_SUSPEND
+#  define CAP_BLOCK_SUSPEND -1
+# endif
+# ifndef CAP_CHOWN
+#  define CAP_CHOWN -1
+# endif
+# ifndef CAP_DAC_OVERRIDE
+#  define CAP_DAC_OVERRIDE -1
+# endif
+# ifndef CAP_DAC_READ_SEARCH
+#  define CAP_DAC_READ_SEARCH -1
+# endif
+# ifndef CAP_FOWNER
+#  define CAP_FOWNER -1
+# endif
+# ifndef CAP_FSETID
+#  define CAP_FSETID -1
+# endif
+# ifndef CAP_IPC_LOCK
+#  define CAP_IPC_LOCK -1
+# endif
+# ifndef CAP_IPC_OWNER
+#  define CAP_IPC_OWNER -1
+# endif
+# ifndef CAP_KILL
+#  define CAP_KILL -1
+# endif
+# ifndef CAP_LEASE
+#  define CAP_LEASE -1
+# endif
+# ifndef CAP_LINUX_IMMUTABLE
+#  define CAP_LINUX_IMMUTABLE -1
+# endif
+# ifndef CAP_MAC_ADMIN
+#  define CAP_MAC_ADMIN -1
+# endif
+# ifndef CAP_MAC_OVERRIDE
+#  define CAP_MAC_OVERRIDE -1
+# endif
+# ifndef CAP_MKNOD
+#  define CAP_MKNOD -1
+# endif
+# ifndef CAP_NET_ADMIN
+#  define CAP_NET_ADMIN -1
+# endif
+# ifndef CAP_NET_BIND_SERVICE
+#  define CAP_NET_BIND_SERVICE -1
+# endif
+# ifndef CAP_NET_BROADCAST
+#  define CAP_NET_BROADCAST -1
+# endif
+# ifndef CAP_NET_RAW
+#  define CAP_NET_RAW -1
+# endif
+# ifndef CAP_SETGID
+#  define CAP_SETGID -1
+# endif
+# ifndef CAP_SETFCAP
+#  define CAP_SETFCAP -1
+# endif
+# ifndef CAP_SETPCAP
+#  define CAP_SETPCAP -1
+# endif
+# ifndef CAP_SETUID
+#  define CAP_SETUID -1
+# endif
+# ifndef CAP_SYS_ADMIN
+#  define CAP_SYS_ADMIN -1
+# endif
+# ifndef CAP_SYS_BOOT
+#  define CAP_SYS_BOOT -1
+# endif
+# ifndef CAP_SYS_CHROOT
+#  define CAP_SYS_CHROOT -1
+# endif
+# ifndef CAP_SYS_MODULE
+#  define CAP_SYS_MODULE -1
+# endif
+# ifndef CAP_SYS_NICE
+#  define CAP_SYS_NICE -1
+# endif
+# ifndef CAP_SYS_PACCT
+#  define CAP_SYS_PACCT -1
+# endif
+# ifndef CAP_SYS_PTRACE
+#  define CAP_SYS_PTRACE -1
+# endif
+# ifndef CAP_SYS_RAWIO
+#  define CAP_SYS_RAWIO -1
+# endif
+# ifndef CAP_SYS_RESOURCE
+#  define CAP_SYS_RESOURCE -1
+# endif
+# ifndef CAP_SYS_TIME
+#  define CAP_SYS_TIME -1
+# endif
+# ifndef CAP_SYS_TTY_CONFIG
+#  define CAP_SYS_TTY_CONFIG -1
+# endif
+# ifndef CAP_SYSLOG
+#  define CAP_SYSLOG -1
+# endif
+# ifndef CAP_WAKE_ALARM
+#  define CAP_WAKE_ALARM -1
+# endif
+
+static int lxcContainerDropCapabilities(virDomainDefPtr def,
+                                        bool keepReboot)
 {
     int ret;
+    size_t i;
+    int policy = def->features[VIR_DOMAIN_FEATURE_CAPABILITIES];
+
+    /* Maps virDomainCapsFeature to CAPS_* */
+    static int capsMapping[] = {CAP_AUDIT_CONTROL,
+                                CAP_AUDIT_WRITE,
+                                CAP_BLOCK_SUSPEND,
+                                CAP_CHOWN,
+                                CAP_DAC_OVERRIDE,
+                                CAP_DAC_READ_SEARCH,
+                                CAP_FOWNER,
+                                CAP_FSETID,
+                                CAP_IPC_LOCK,
+                                CAP_IPC_OWNER,
+                                CAP_KILL,
+                                CAP_LEASE,
+                                CAP_LINUX_IMMUTABLE,
+                                CAP_MAC_ADMIN,
+                                CAP_MAC_OVERRIDE,
+                                CAP_MKNOD,
+                                CAP_NET_ADMIN,
+                                CAP_NET_BIND_SERVICE,
+                                CAP_NET_BROADCAST,
+                                CAP_NET_RAW,
+                                CAP_SETGID,
+                                CAP_SETFCAP,
+                                CAP_SETPCAP,
+                                CAP_SETUID,
+                                CAP_SYS_ADMIN,
+                                CAP_SYS_BOOT,
+                                CAP_SYS_CHROOT,
+                                CAP_SYS_MODULE,
+                                CAP_SYS_NICE,
+                                CAP_SYS_PACCT,
+                                CAP_SYS_PTRACE,
+                                CAP_SYS_RAWIO,
+                                CAP_SYS_RESOURCE,
+                                CAP_SYS_TIME,
+                                CAP_SYS_TTY_CONFIG,
+                                CAP_SYSLOG,
+                                CAP_WAKE_ALARM};
 
     capng_get_caps_process();
 
-    if ((ret = capng_updatev(CAPNG_DROP,
-                             CAPNG_EFFECTIVE | CAPNG_PERMITTED |
-                             CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
-                             CAP_SYS_MODULE, /* No kernel module loading */
-                             CAP_SYS_TIME, /* No changing the clock */
-                             CAP_MKNOD, /* No creating device nodes */
-                             CAP_AUDIT_CONTROL, /* No messing with auditing status */
-                             CAP_MAC_ADMIN, /* No messing with LSM config */
-                             keepReboot ? -1 : CAP_SYS_BOOT, /* No use of reboot */
-                             -1)) < 0) {
-        virReportError(VIR_ERR_INTERNAL_ERROR,
-                       _("Failed to remove capabilities: %d"), ret);
-        return -1;
+    /* Make sure we drop everything if required by the user */
+    if (policy == VIR_DOMAIN_CAPABILITIES_POLICY_DENY)
+        capng_clear(CAPNG_SELECT_BOTH);
+
+    /* Apply all single capabilities changes */
+    for (i = 0; i < VIR_DOMAIN_CAPS_FEATURE_LAST; i++) {
+        bool toDrop = false;
+        int state = def->caps_features[i];
+
+        if (!cap_valid(capsMapping[i]))
+            continue;
+
+        switch ((virDomainCapabilitiesPolicy) policy) {
+
+        case VIR_DOMAIN_CAPABILITIES_POLICY_DENY:
+            if (state == VIR_DOMAIN_FEATURE_STATE_ON &&
+                    (ret = capng_update(CAPNG_ADD,
+                                        CAPNG_EFFECTIVE | CAPNG_PERMITTED |
+                                        CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
+                                        capsMapping[i])) < 0) {
+                virReportError(VIR_ERR_INTERNAL_ERROR,
+                               _("Failed to add capability %s: %d"),
+                               virDomainCapsFeatureTypeToString(i), ret);
+                return -1;
+            }
+            break;
+
+        case VIR_DOMAIN_CAPABILITIES_POLICY_DEFAULT:
+            switch ((virDomainCapsFeature) i) {
+            case VIR_DOMAIN_CAPS_FEATURE_SYS_BOOT: /* No use of reboot */
+                toDrop = !keepReboot && (state != VIR_DOMAIN_FEATURE_STATE_ON);
+                break;
+            case VIR_DOMAIN_CAPS_FEATURE_SYS_MODULE: /* No kernel module loading */
+            case VIR_DOMAIN_CAPS_FEATURE_SYS_TIME: /* No changing the clock */
+            case VIR_DOMAIN_CAPS_FEATURE_MKNOD: /* No creating device nodes */
+            case VIR_DOMAIN_CAPS_FEATURE_AUDIT_CONTROL: /* No messing with auditing status */
+            case VIR_DOMAIN_CAPS_FEATURE_MAC_ADMIN: /* No messing with LSM config */
+                toDrop = (state != VIR_DOMAIN_FEATURE_STATE_ON);
+                break;
+            default: /* User specified capabilities to drop */
+                toDrop = (state == VIR_DOMAIN_FEATURE_STATE_OFF);
+            }
+            /* Fallthrough */
+
+        case VIR_DOMAIN_CAPABILITIES_POLICY_ALLOW:
+            if (policy == VIR_DOMAIN_CAPABILITIES_POLICY_ALLOW)
+                toDrop = state == VIR_DOMAIN_FEATURE_STATE_OFF;
+
+            if (toDrop && (ret = capng_update(CAPNG_DROP,
+                                              CAPNG_EFFECTIVE | CAPNG_PERMITTED |
+                                              CAPNG_INHERITABLE | CAPNG_BOUNDING_SET,
+                                              capsMapping[i])) < 0) {
+                virReportError(VIR_ERR_INTERNAL_ERROR,
+                               _("Failed to remove capability %s: %d"),
+                               virDomainCapsFeatureTypeToString(i), ret);
+                return -1;
+            }
+            break;
+
+        default:
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                           _("Unsupported capabilities policy: %s"),
+                           virDomainCapabilitiesPolicyTypeToString(policy));
+        }
     }
 
     if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
@@ -1823,7 +2031,8 @@ static int lxcContainerDropCapabilities(
     return 0;
 }
 #else
-static int lxcContainerDropCapabilities(bool keepReboot ATTRIBUTE_UNUSED)
+static int lxcContainerDropCapabilities(virDomainDefPtr def ATTRIBUTE_UNUSED,
+                                        bool keepReboot ATTRIBUTE_UNUSED)
 {
     VIR_WARN("libcap-ng support not compiled in, unable to clear capabilities");
     return 0;
@@ -1929,7 +2138,7 @@ static int lxcContainerChild(void *data)
     }
 
     /* drop a set of root capabilities */
-    if (lxcContainerDropCapabilities(!!hasReboot) < 0)
+    if (lxcContainerDropCapabilities(vmDef, !!hasReboot) < 0)
         goto cleanup;
 
     if (lxcContainerSendContinue(argv->handshakefd) < 0) {
Index: libvirt-1.2.5/src/util/vircgroup.c
===================================================================
--- libvirt-1.2.5.orig/src/util/vircgroup.c
+++ libvirt-1.2.5/src/util/vircgroup.c
@@ -2633,14 +2633,45 @@ virCgroupDenyAllDevices(virCgroupPtr gro
                                 "a");
 }
 
+/**
+ * virCgroupAllowAllDevices:
+ *
+ * Allows the permissiong for all devices by setting lines similar
+ * to these ones (obviously the 'm' permission is an example):
+ *
+ * 'b *:* m'
+ * 'c *:* m'
+ *
+ * @group: The cgroup to allow devices for
+ * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
+ *
+ * Returns: 0 on success
+ */
+int
+virCgroupAllowAllDevices(virCgroupPtr group, int perms)
+{
+    int ret = -1;
+
+    if (virCgroupAllowDevice(group, 'b', -1, -1, perms) < 0)
+        goto cleanup;
+
+    if (virCgroupAllowDevice(group, 'c', -1, -1, perms) < 0)
+        goto cleanup;
+
+    ret = 0;
+
+ cleanup:
+    return ret;
+}
+
 
 /**
  * virCgroupAllowDevice:
  *
  * @group: The cgroup to allow a device for
  * @type: The device type (i.e., 'c' or 'b')
- * @major: The major number of the device
- * @minor: The minor number of the device
+ * @major: The major number of the device, a negative value means '*'
+ * @minor: The minor number of the device, a negative value means '*'
  * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
  *
  * Returns: 0 on success
@@ -2651,8 +2682,18 @@ virCgroupAllowDevice(virCgroupPtr group,
 {
     int ret = -1;
     char *devstr = NULL;
+    char *majorstr = NULL;
+    char *minorstr = NULL;
 
-    if (virAsprintf(&devstr, "%c %i:%i %s%s%s", type, major, minor,
+    if ((major < 0 && VIR_STRDUP(majorstr, "*") < 0) ||
+        (major >= 0 && virAsprintf(&majorstr, "%i", major) < 0))
+        goto cleanup;
+
+    if ((minor < 0 && VIR_STRDUP(minorstr, "*") < 0) ||
+        (minor >= 0 && virAsprintf(&minorstr, "%i", minor) < 0))
+        goto cleanup;
+
+    if (virAsprintf(&devstr, "%c %s:%s %s%s%s", type, majorstr, minorstr,
                     perms & VIR_CGROUP_DEVICE_READ ? "r" : "",
                     perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "",
                     perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") < 0)
@@ -2668,6 +2709,8 @@ virCgroupAllowDevice(virCgroupPtr group,
 
  cleanup:
     VIR_FREE(devstr);
+    VIR_FREE(majorstr);
+    VIR_FREE(minorstr);
     return ret;
 }
 
@@ -4213,6 +4256,14 @@ virCgroupGetCpusetCpus(virCgroupPtr grou
     return -1;
 }
 
+int
+virCgroupAllowAllDevices(virCgroupPtr group ATTRIBUTE_UNUSED,
+                         int perms ATTRIBUTE_UNUSED)
+{
+    virReportSystemError(ENOSYS, "%s",
+                         _("Control groups not supported on this platform"));
+    return -1;
+}
 
 int
 virCgroupDenyAllDevices(virCgroupPtr group ATTRIBUTE_UNUSED)
Index: libvirt-1.2.5/src/util/vircgroup.h
===================================================================
--- libvirt-1.2.5.orig/src/util/vircgroup.h
+++ libvirt-1.2.5/src/util/vircgroup.h
@@ -180,6 +180,8 @@ enum {
 
 int virCgroupDenyAllDevices(virCgroupPtr group);
 
+int virCgroupAllowAllDevices(virCgroupPtr group, int perms);
+
 int virCgroupAllowDevice(virCgroupPtr group,
                          char type,
                          int major,
Index: libvirt-1.2.5/tests/domainschemadata/domain-caps-features.xml
===================================================================
--- /dev/null
+++ libvirt-1.2.5/tests/domainschemadata/domain-caps-features.xml
@@ -0,0 +1,28 @@
+<domain type='lxc'>
+    <name>demo</name>
+    <uuid>8369f1ac-7e46-e869-4ca5-759d51478066</uuid>
+    <os>
+        <type>exe</type>
+        <init>/sh</init>
+    </os>
+    <features>
+        <capabilities policy="deny">
+            <mknod state="on"/>
+        </capabilities>
+    </features>
+    <resource>
+      <partition>/virtualmachines</partition>
+    </resource>
+    <memory unit='KiB'>500000</memory>
+    <devices>
+        <filesystem type='mount'>
+            <source dir='/root/container'/>
+            <target dir='/'/>
+        </filesystem>
+        <filesystem type='mount'>
+            <source dir='/home'/>
+            <target dir='/home'/>
+        </filesystem>
+        <console type='pty'/>
+    </devices>
+</domain>
Index: libvirt-1.2.5/tests/lxcxml2xmldata/lxc-capabilities.xml
===================================================================
--- /dev/null
+++ libvirt-1.2.5/tests/lxcxml2xmldata/lxc-capabilities.xml
@@ -0,0 +1,34 @@
+<domain type='lxc'>
+  <name>jessie</name>
+  <uuid>e21987a5-e98e-9c99-0e35-803e4d9ad1fe</uuid>
+  <memory unit='KiB'>1048576</memory>
+  <currentMemory unit='KiB'>1048576</currentMemory>
+  <vcpu placement='static'>1</vcpu>
+  <resource>
+    <partition>/machine</partition>
+  </resource>
+  <os>
+    <type arch='x86_64'>exe</type>
+    <init>/sbin/init</init>
+  </os>
+  <features>
+    <capabilities policy='default'>
+      <mknod state='on'/>
+    </capabilities>
+  </features>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>restart</on_crash>
+  <devices>
+    <emulator>/usr/libexec/libvirt_lxc</emulator>
+    <filesystem type='mount' accessmode='passthrough'>
+      <source dir='/mach/jessie'/>
+      <target dir='/'/>
+    </filesystem>
+    <console type='pty'>
+      <target type='lxc' port='0'/>
+    </console>
+  </devices>
+  <seclabel type='none'/>
+</domain>
Index: libvirt-1.2.5/tests/lxcxml2xmltest.c
===================================================================
--- libvirt-1.2.5.orig/tests/lxcxml2xmltest.c
+++ libvirt-1.2.5/tests/lxcxml2xmltest.c
@@ -144,6 +144,7 @@ mymain(void)
     DO_TEST_DIFFERENT("filesystem-ram");
     DO_TEST("filesystem-root");
     DO_TEST("idmap");
+    DO_TEST("capabilities");
 
     virObjectUnref(caps);
     virObjectUnref(xmlopt);