File bsc1055676-0001-daemon-oci-obey-CL_UNPRIVILEGED-for-user-namespaced-.patch of Package docker

From ff7b94c76f343931463b5916fb3fbd2610869a1a Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <asarai@suse.de>
Date: Sun, 15 Oct 2017 17:06:20 +1100
Subject: [PATCH] daemon: oci: obey CL_UNPRIVILEGED for user namespaced daemon

When runc is bind-mounting a particular path "with options", it has to
do so by first creating a bind-mount and the modifying the options of
said bind-mount via remount. However, in a user namespace, there are
restrictions on which flags you can change with a remount (due to
CL_UNPRIVILEGED being set in this instance). Docker historically has
ignored this, and as a result, internal Docker mounts (such as secrets)
haven't worked with --userns-remap. Fix this by preserving
CL_UNPRIVILEGED mount flags when Docker is spawning containers with user
namespaces enabled.

SUSE-Bug: https://bugzilla.suse.com/show_bug.cgi?id=1055676
Signed-off-by: Aleksa Sarai <asarai@suse.de>
---
 components/engine/daemon/oci_linux.go | 46 +++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/components/engine/daemon/oci_linux.go b/components/engine/daemon/oci_linux.go
index 6917b4841429..936cb8f998ca 100644
--- a/components/engine/daemon/oci_linux.go
+++ b/components/engine/daemon/oci_linux.go
@@ -27,6 +27,7 @@ import (
 	"github.com/opencontainers/runc/libcontainer/user"
 	specs "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
 )
 
 var (
@@ -469,6 +470,38 @@ func ensureSharedOrSlave(path string) error {
 	return nil
 }
 
+// Get the set of mount flags that are set on the mount that contains the given
+// path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that
+// bind-mounting "with options" will not fail with user namespaces, due to
+// kernel restrictions that require user namespace mounts to preserve
+// CL_UNPRIVILEGED locked flags.
+func getUnprivilegedMountFlags(path string) ([]string, error) {
+	var statfs unix.Statfs_t
+	if err := unix.Statfs(path, &statfs); err != nil {
+		return nil, err
+	}
+
+	// The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048.
+	unprivilegedFlags := map[uint64]string{
+		unix.MS_RDONLY:     "ro",
+		unix.MS_NODEV:      "nodev",
+		unix.MS_NOEXEC:     "noexec",
+		unix.MS_NOSUID:     "nosuid",
+		unix.MS_NOATIME:    "noatime",
+		unix.MS_RELATIME:   "relatime",
+		unix.MS_NODIRATIME: "nodiratime",
+	}
+
+	var flags []string
+	for mask, flag := range unprivilegedFlags {
+		if uint64(statfs.Flags)&mask == mask {
+			flags = append(flags, flag)
+		}
+	}
+
+	return flags, nil
+}
+
 var (
 	mountPropagationMap = map[string]int{
 		"private":  mount.PRIVATE,
@@ -586,6 +619,19 @@ func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []c
 			opts = append(opts, mountPropagationReverseMap[pFlag])
 		}
 
+		// If we are using user namespaces, then we must make sure that we
+		// don't drop any of the CL_UNPRIVILEGED "locked" flags of the source
+		// "mount" when we bind-mount. The reason for this is that at the point
+		// when runc sets up the root filesystem, it is already inside a user
+		// namespace, and thus cannot change any flags that are locked.
+		if daemon.configStore.RemappedRoot != "" {
+			unprivOpts, err := getUnprivilegedMountFlags(m.Source)
+			if err != nil {
+				return err
+			}
+			opts = append(opts, unprivOpts...)
+		}
+
 		mt.Options = opts
 		s.Mounts = append(s.Mounts, mt)
 	}
-- 
2.16.1

openSUSE Build Service is sponsored by