From c61a2810a2161986353705b44d9503e6bb079f4f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Dec 2012 18:58:39 -0800 Subject: userns: Avoid recursion in put_user_ns When freeing a deeply nested user namespace free_user_ns calls put_user_ns on it's parent which may in turn call free_user_ns again. When -fno-optimize-sibling-calls is passed to gcc one stack frame per user namespace is left on the stack, potentially overflowing the kernel stack. CONFIG_FRAME_POINTER forces -fno-optimize-sibling-calls so we can't count on gcc to optimize this code. Remove struct kref and use a plain atomic_t. Making the code more flexible and easier to comprehend. Make the loop in free_user_ns explict to guarantee that the stack does not overflow with CONFIG_FRAME_POINTER enabled. I have tested this fix with a simple program that uses unshare to create a deeply nested user namespace structure and then calls exit. With 1000 nesteuser namespaces before this change running my test program causes the kernel to die a horrible death. With 10,000,000 nested user namespaces after this change my test program runs to completion and causes no harm. Acked-by: Serge Hallyn Pointed-out-by: Vasily Kulikov Signed-off-by: "Eric W. Biederman" --- kernel/user_namespace.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'kernel/user_namespace.c') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 2b042c42fbc4..24f8ec3b64d8 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -78,7 +78,7 @@ int create_user_ns(struct cred *new) return ret; } - kref_init(&ns->kref); + atomic_set(&ns->count, 1); /* Leave the new->user_ns reference with the new user namespace. */ ns->parent = parent_ns; ns->owner = owner; @@ -104,15 +104,16 @@ int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) return create_user_ns(cred); } -void free_user_ns(struct kref *kref) +void free_user_ns(struct user_namespace *ns) { - struct user_namespace *parent, *ns = - container_of(kref, struct user_namespace, kref); + struct user_namespace *parent; - parent = ns->parent; - proc_free_inum(ns->proc_inum); - kmem_cache_free(user_ns_cachep, ns); - put_user_ns(parent); + do { + parent = ns->parent; + proc_free_inum(ns->proc_inum); + kmem_cache_free(user_ns_cachep, ns); + ns = parent; + } while (atomic_dec_and_test(&parent->count)); } EXPORT_SYMBOL(free_user_ns); -- cgit v1.2.3 From 0bd14b4fd72afd5df41e9fd59f356740f22fceba Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Dec 2012 22:27:29 -0800 Subject: userns: Allow any uid or gid mappings that don't overlap. When I initially wrote the code for /proc//uid_map. I was lazy and avoided duplicate mappings by the simple expedient of ensuring the first number in a new extent was greater than any number in the previous extent. Unfortunately that precludes a number of valid mappings, and someone noticed and complained. So use a simple check to ensure that ranges in the mapping extents don't overlap. Acked-by: Serge Hallyn Signed-off-by: "Eric W. Biederman" --- kernel/user_namespace.c | 45 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) (limited to 'kernel/user_namespace.c') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 24f8ec3b64d8..8b650837083e 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -520,6 +520,42 @@ struct seq_operations proc_projid_seq_operations = { .show = projid_m_show, }; +static bool mappings_overlap(struct uid_gid_map *new_map, struct uid_gid_extent *extent) +{ + u32 upper_first, lower_first, upper_last, lower_last; + unsigned idx; + + upper_first = extent->first; + lower_first = extent->lower_first; + upper_last = upper_first + extent->count - 1; + lower_last = lower_first + extent->count - 1; + + for (idx = 0; idx < new_map->nr_extents; idx++) { + u32 prev_upper_first, prev_lower_first; + u32 prev_upper_last, prev_lower_last; + struct uid_gid_extent *prev; + + prev = &new_map->extent[idx]; + + prev_upper_first = prev->first; + prev_lower_first = prev->lower_first; + prev_upper_last = prev_upper_first + prev->count - 1; + prev_lower_last = prev_lower_first + prev->count - 1; + + /* Does the upper range intersect a previous extent? */ + if ((prev_upper_first <= upper_last) && + (prev_upper_last >= upper_first)) + return true; + + /* Does the lower range intersect a previous extent? */ + if ((prev_lower_first <= lower_last) && + (prev_lower_last >= lower_first)) + return true; + } + return false; +} + + static DEFINE_MUTEX(id_map_mutex); static ssize_t map_write(struct file *file, const char __user *buf, @@ -532,7 +568,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, struct user_namespace *ns = seq->private; struct uid_gid_map new_map; unsigned idx; - struct uid_gid_extent *extent, *last = NULL; + struct uid_gid_extent *extent = NULL; unsigned long page = 0; char *kbuf, *pos, *next_line; ssize_t ret = -EINVAL; @@ -635,14 +671,11 @@ static ssize_t map_write(struct file *file, const char __user *buf, if ((extent->lower_first + extent->count) <= extent->lower_first) goto out; - /* For now only accept extents that are strictly in order */ - if (last && - (((last->first + last->count) > extent->first) || - ((last->lower_first + last->count) > extent->lower_first))) + /* Do the ranges in extent overlap any previous extents? */ + if (mappings_overlap(&new_map, extent)) goto out; new_map.nr_extents++; - last = extent; /* Fail if the file contains too many extents */ if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) && -- cgit v1.2.3 From e66eded8309ebf679d3d3c1f5820d1f2ca332c71 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 Mar 2013 11:51:49 -0700 Subject: userns: Don't allow CLONE_NEWUSER | CLONE_FS Don't allowing sharing the root directory with processes in a different user namespace. There doesn't seem to be any point, and to allow it would require the overhead of putting a user namespace reference in fs_struct (for permission checks) and incrementing that reference count on practically every call to fork. So just perform the inexpensive test of forbidding sharing fs_struct acrosss processes in different user namespaces. We already disallow other forms of threading when unsharing a user namespace so this should be no real burden in practice. This updates setns, clone, and unshare to disallow multiple user namespaces sharing an fs_struct. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" Signed-off-by: Linus Torvalds --- kernel/user_namespace.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/user_namespace.c') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 8b650837083e..b14f4d342043 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -21,6 +21,7 @@ #include #include #include +#include static struct kmem_cache *user_ns_cachep __read_mostly; @@ -837,6 +838,9 @@ static int userns_install(struct nsproxy *nsproxy, void *ns) if (atomic_read(¤t->mm->mm_users) > 1) return -EINVAL; + if (current->fs->users != 1) + return -EINVAL; + if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return -EPERM; -- cgit v1.2.3