From c61a2810a2161986353705b44d9503e6bb079f4f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 28 Dec 2012 18:58:39 -0800
Subject: userns: Avoid recursion in put_user_ns

When freeing a deeply nested user namespace free_user_ns calls
put_user_ns on it's parent which may in turn call free_user_ns again.
When -fno-optimize-sibling-calls is passed to gcc one stack frame per
user namespace is left on the stack, potentially overflowing the
kernel stack.  CONFIG_FRAME_POINTER forces -fno-optimize-sibling-calls
so we can't count on gcc to optimize this code.

Remove struct kref and use a plain atomic_t.  Making the code more
flexible and easier to comprehend.  Make the loop in free_user_ns
explict to guarantee that the stack does not overflow with
CONFIG_FRAME_POINTER enabled.

I have tested this fix with a simple program that uses unshare to
create a deeply nested user namespace structure and then calls exit.
With 1000 nesteuser namespaces before this change running my test
program causes the kernel to die a horrible death.  With 10,000,000
nested user namespaces after this change my test program runs to
completion and causes no harm.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Pointed-out-by: Vasily Kulikov <segoon@openwall.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/user_namespace.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'kernel/user_namespace.c')
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 2b042c42fbc4..24f8ec3b64d8 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -78,7 +78,7 @@ int create_user_ns(struct cred *new)
 		return ret;
 	}
 
-	kref_init(&ns->kref);
+	atomic_set(&ns->count, 1);
 	/* Leave the new->user_ns reference with the new user namespace. */
 	ns->parent = parent_ns;
 	ns->owner = owner;
@@ -104,15 +104,16 @@ int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
 	return create_user_ns(cred);
 }
 
-void free_user_ns(struct kref *kref)
+void free_user_ns(struct user_namespace *ns)
 {
-	struct user_namespace *parent, *ns =
-		container_of(kref, struct user_namespace, kref);
+	struct user_namespace *parent;
 
-	parent = ns->parent;
-	proc_free_inum(ns->proc_inum);
-	kmem_cache_free(user_ns_cachep, ns);
-	put_user_ns(parent);
+	do {
+		parent = ns->parent;
+		proc_free_inum(ns->proc_inum);
+		kmem_cache_free(user_ns_cachep, ns);
+		ns = parent;
+	} while (atomic_dec_and_test(&parent->count));
 }
 EXPORT_SYMBOL(free_user_ns);
 
-- 
cgit v1.2.3


From 0bd14b4fd72afd5df41e9fd59f356740f22fceba Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 27 Dec 2012 22:27:29 -0800
Subject: userns: Allow any uid or gid mappings that don't overlap.

When I initially wrote the code for /proc/<pid>/uid_map.  I was lazy
and avoided duplicate mappings by the simple expedient of ensuring the
first number in a new extent was greater than any number in the
previous extent.

Unfortunately that precludes a number of valid mappings, and someone
noticed and complained.  So use a simple check to ensure that ranges
in the mapping extents don't overlap.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/user_namespace.c | 45 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 6 deletions(-)

(limited to 'kernel/user_namespace.c')

diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 24f8ec3b64d8..8b650837083e 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -520,6 +520,42 @@ struct seq_operations proc_projid_seq_operations = {
 	.show = projid_m_show,
 };
 
+static bool mappings_overlap(struct uid_gid_map *new_map, struct uid_gid_extent *extent)
+{
+	u32 upper_first, lower_first, upper_last, lower_last;
+	unsigned idx;
+
+	upper_first = extent->first;
+	lower_first = extent->lower_first;
+	upper_last = upper_first + extent->count - 1;
+	lower_last = lower_first + extent->count - 1;
+
+	for (idx = 0; idx < new_map->nr_extents; idx++) {
+		u32 prev_upper_first, prev_lower_first;
+		u32 prev_upper_last, prev_lower_last;
+		struct uid_gid_extent *prev;
+
+		prev = &new_map->extent[idx];
+
+		prev_upper_first = prev->first;
+		prev_lower_first = prev->lower_first;
+		prev_upper_last = prev_upper_first + prev->count - 1;
+		prev_lower_last = prev_lower_first + prev->count - 1;
+
+		/* Does the upper range intersect a previous extent? */
+		if ((prev_upper_first <= upper_last) &&
+		    (prev_upper_last >= upper_first))
+			return true;
+
+		/* Does the lower range intersect a previous extent? */
+		if ((prev_lower_first <= lower_last) &&
+		    (prev_lower_last >= lower_first))
+			return true;
+	}
+	return false;
+}
+
+
 static DEFINE_MUTEX(id_map_mutex);
 
 static ssize_t map_write(struct file *file, const char __user *buf,
@@ -532,7 +568,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	struct user_namespace *ns = seq->private;
 	struct uid_gid_map new_map;
 	unsigned idx;
-	struct uid_gid_extent *extent, *last = NULL;
+	struct uid_gid_extent *extent = NULL;
 	unsigned long page = 0;
 	char *kbuf, *pos, *next_line;
 	ssize_t ret = -EINVAL;
@@ -635,14 +671,11 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 		if ((extent->lower_first + extent->count) <= extent->lower_first)
 			goto out;
 
-		/* For now only accept extents that are strictly in order */
-		if (last &&
-		    (((last->first + last->count) > extent->first) ||
-		     ((last->lower_first + last->count) > extent->lower_first)))
+		/* Do the ranges in extent overlap any previous extents? */
+		if (mappings_overlap(&new_map, extent))
 			goto out;
 
 		new_map.nr_extents++;
-		last = extent;
 
 		/* Fail if the file contains too many extents */
 		if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
-- 
cgit v1.2.3


From e66eded8309ebf679d3d3c1f5820d1f2ca332c71 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 13 Mar 2013 11:51:49 -0700
Subject: userns: Don't allow CLONE_NEWUSER | CLONE_FS

Don't allowing sharing the root directory with processes in a
different user namespace.  There doesn't seem to be any point, and to
allow it would require the overhead of putting a user namespace
reference in fs_struct (for permission checks) and incrementing that
reference count on practically every call to fork.

So just perform the inexpensive test of forbidding sharing fs_struct
acrosss processes in different user namespaces.  We already disallow
other forms of threading when unsharing a user namespace so this
should be no real burden in practice.

This updates setns, clone, and unshare to disallow multiple user
namespaces sharing an fs_struct.

Cc: stable@vger.kernel.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/user_namespace.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel/user_namespace.c')

diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 8b650837083e..b14f4d342043 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -21,6 +21,7 @@
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
 #include <linux/projid.h>
+#include <linux/fs_struct.h>
 
 static struct kmem_cache *user_ns_cachep __read_mostly;
 
@@ -837,6 +838,9 @@ static int userns_install(struct nsproxy *nsproxy, void *ns)
 	if (atomic_read(&current->mm->mm_users) > 1)
 		return -EINVAL;
 
+	if (current->fs->users != 1)
+		return -EINVAL;
+
 	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
-- 
cgit v1.2.3