From 923c7538236564c46ee80c253a416705321f13e3 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 27 Dec 2012 11:39:12 +0800 Subject: userns: Allow unprivileged reboot In a container with its own pid namespace and user namespace, rebooting the system won't reboot the host, but terminate all the processes in it and thus have the container shutdown, so it's safe. Signed-off-by: Li Zefan Signed-off-by: Eric W. Biederman --- kernel/sys.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 265b37690421..24d1ef56cd95 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -433,11 +433,12 @@ static DEFINE_MUTEX(reboot_mutex); SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg) { + struct pid_namespace *pid_ns = task_active_pid_ns(current); char buffer[256]; int ret = 0; /* We only trust the superuser with rebooting the system. */ - if (!capable(CAP_SYS_BOOT)) + if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT)) return -EPERM; /* For safety, we require "magic" arguments. */ @@ -453,7 +454,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, * pid_namespace, the command is handled by reboot_pid_ns() which will * call do_exit(). */ - ret = reboot_pid_ns(task_active_pid_ns(current), cmd); + ret = reboot_pid_ns(pid_ns, cmd); if (ret) return ret; -- cgit v1.2.3 From 7fe5e04292e71af34ae171b88caa2a139e0b6125 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Thu, 21 Feb 2013 16:43:06 -0800 Subject: sys_prctl(): arg2 is unsigned long which is never < 0 arg2 will never < 0, for its type is 'unsigned long' Also, use the provided macros. Signed-off-by: Chen Gang Reported-by: Cyrill Gorcunov Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 265b37690421..83261059676c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -47,6 +47,7 @@ #include #include #include +#include #include /* Move somewhere else to avoid recompiling? */ @@ -2026,7 +2027,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = get_dumpable(me->mm); break; case PR_SET_DUMPABLE: - if (arg2 < 0 || arg2 > 1) { + if (arg2 != SUID_DUMP_DISABLE && + arg2 != SUID_DUMP_USER) { error = -EINVAL; break; } -- cgit v1.2.3 From f3cbd435b02fb45efc2c8a39c2ea19816669c412 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 21 Feb 2013 16:43:07 -0800 Subject: sys_prctl(): coding-style cleanup Remove a tabstop from the switch statement, in the usual fashion. A few instances of weirdwrapping were removed as a result. Cc: Chen Gang Cc: Cyrill Gorcunov Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 288 +++++++++++++++++++++++++++++------------------------------ 1 file changed, 143 insertions(+), 145 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 83261059676c..840cfdad7bfc 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2013,161 +2013,159 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = 0; switch (option) { - case PR_SET_PDEATHSIG: - if (!valid_signal(arg2)) { - error = -EINVAL; - break; - } - me->pdeath_signal = arg2; - break; - case PR_GET_PDEATHSIG: - error = put_user(me->pdeath_signal, (int __user *)arg2); - break; - case PR_GET_DUMPABLE: - error = get_dumpable(me->mm); + case PR_SET_PDEATHSIG: + if (!valid_signal(arg2)) { + error = -EINVAL; break; - case PR_SET_DUMPABLE: - if (arg2 != SUID_DUMP_DISABLE && - arg2 != SUID_DUMP_USER) { - error = -EINVAL; - break; - } - set_dumpable(me->mm, arg2); + } + me->pdeath_signal = arg2; + break; + case PR_GET_PDEATHSIG: + error = put_user(me->pdeath_signal, (int __user *)arg2); + break; + case PR_GET_DUMPABLE: + error = get_dumpable(me->mm); + break; + case PR_SET_DUMPABLE: + if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) { + error = -EINVAL; break; + } + set_dumpable(me->mm, arg2); + break; - case PR_SET_UNALIGN: - error = SET_UNALIGN_CTL(me, arg2); - break; - case PR_GET_UNALIGN: - error = GET_UNALIGN_CTL(me, arg2); - break; - case PR_SET_FPEMU: - error = SET_FPEMU_CTL(me, arg2); - break; - case PR_GET_FPEMU: - error = GET_FPEMU_CTL(me, arg2); - break; - case PR_SET_FPEXC: - error = SET_FPEXC_CTL(me, arg2); - break; - case PR_GET_FPEXC: - error = GET_FPEXC_CTL(me, arg2); - break; - case PR_GET_TIMING: - error = PR_TIMING_STATISTICAL; - break; - case PR_SET_TIMING: - if (arg2 != PR_TIMING_STATISTICAL) - error = -EINVAL; - break; - case PR_SET_NAME: - comm[sizeof(me->comm)-1] = 0; - if (strncpy_from_user(comm, (char __user *)arg2, - sizeof(me->comm) - 1) < 0) - return -EFAULT; - set_task_comm(me, comm); - proc_comm_connector(me); - break; - case PR_GET_NAME: - get_task_comm(comm, me); - if (copy_to_user((char __user *)arg2, comm, - sizeof(comm))) - return -EFAULT; - break; - case PR_GET_ENDIAN: - error = GET_ENDIAN(me, arg2); - break; - case PR_SET_ENDIAN: - error = SET_ENDIAN(me, arg2); - break; - case PR_GET_SECCOMP: - error = prctl_get_seccomp(); - break; - case PR_SET_SECCOMP: - error = prctl_set_seccomp(arg2, (char __user *)arg3); - break; - case PR_GET_TSC: - error = GET_TSC_CTL(arg2); - break; - case PR_SET_TSC: - error = SET_TSC_CTL(arg2); - break; - case PR_TASK_PERF_EVENTS_DISABLE: - error = perf_event_task_disable(); - break; - case PR_TASK_PERF_EVENTS_ENABLE: - error = perf_event_task_enable(); - break; - case PR_GET_TIMERSLACK: - error = current->timer_slack_ns; - break; - case PR_SET_TIMERSLACK: - if (arg2 <= 0) - current->timer_slack_ns = + case PR_SET_UNALIGN: + error = SET_UNALIGN_CTL(me, arg2); + break; + case PR_GET_UNALIGN: + error = GET_UNALIGN_CTL(me, arg2); + break; + case PR_SET_FPEMU: + error = SET_FPEMU_CTL(me, arg2); + break; + case PR_GET_FPEMU: + error = GET_FPEMU_CTL(me, arg2); + break; + case PR_SET_FPEXC: + error = SET_FPEXC_CTL(me, arg2); + break; + case PR_GET_FPEXC: + error = GET_FPEXC_CTL(me, arg2); + break; + case PR_GET_TIMING: + error = PR_TIMING_STATISTICAL; + break; + case PR_SET_TIMING: + if (arg2 != PR_TIMING_STATISTICAL) + error = -EINVAL; + break; + case PR_SET_NAME: + comm[sizeof(me->comm) - 1] = 0; + if (strncpy_from_user(comm, (char __user *)arg2, + sizeof(me->comm) - 1) < 0) + return -EFAULT; + set_task_comm(me, comm); + proc_comm_connector(me); + break; + case PR_GET_NAME: + get_task_comm(comm, me); + if (copy_to_user((char __user *)arg2, comm, sizeof(comm))) + return -EFAULT; + break; + case PR_GET_ENDIAN: + error = GET_ENDIAN(me, arg2); + break; + case PR_SET_ENDIAN: + error = SET_ENDIAN(me, arg2); + break; + case PR_GET_SECCOMP: + error = prctl_get_seccomp(); + break; + case PR_SET_SECCOMP: + error = prctl_set_seccomp(arg2, (char __user *)arg3); + break; + case PR_GET_TSC: + error = GET_TSC_CTL(arg2); + break; + case PR_SET_TSC: + error = SET_TSC_CTL(arg2); + break; + case PR_TASK_PERF_EVENTS_DISABLE: + error = perf_event_task_disable(); + break; + case PR_TASK_PERF_EVENTS_ENABLE: + error = perf_event_task_enable(); + break; + case PR_GET_TIMERSLACK: + error = current->timer_slack_ns; + break; + case PR_SET_TIMERSLACK: + if (arg2 <= 0) + current->timer_slack_ns = current->default_timer_slack_ns; - else - current->timer_slack_ns = arg2; - break; - case PR_MCE_KILL: - if (arg4 | arg5) - return -EINVAL; - switch (arg2) { - case PR_MCE_KILL_CLEAR: - if (arg3 != 0) - return -EINVAL; - current->flags &= ~PF_MCE_PROCESS; - break; - case PR_MCE_KILL_SET: - current->flags |= PF_MCE_PROCESS; - if (arg3 == PR_MCE_KILL_EARLY) - current->flags |= PF_MCE_EARLY; - else if (arg3 == PR_MCE_KILL_LATE) - current->flags &= ~PF_MCE_EARLY; - else if (arg3 == PR_MCE_KILL_DEFAULT) - current->flags &= - ~(PF_MCE_EARLY|PF_MCE_PROCESS); - else - return -EINVAL; - break; - default: + else + current->timer_slack_ns = arg2; + break; + case PR_MCE_KILL: + if (arg4 | arg5) + return -EINVAL; + switch (arg2) { + case PR_MCE_KILL_CLEAR: + if (arg3 != 0) return -EINVAL; - } + current->flags &= ~PF_MCE_PROCESS; break; - case PR_MCE_KILL_GET: - if (arg2 | arg3 | arg4 | arg5) - return -EINVAL; - if (current->flags & PF_MCE_PROCESS) - error = (current->flags & PF_MCE_EARLY) ? - PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; + case PR_MCE_KILL_SET: + current->flags |= PF_MCE_PROCESS; + if (arg3 == PR_MCE_KILL_EARLY) + current->flags |= PF_MCE_EARLY; + else if (arg3 == PR_MCE_KILL_LATE) + current->flags &= ~PF_MCE_EARLY; + else if (arg3 == PR_MCE_KILL_DEFAULT) + current->flags &= + ~(PF_MCE_EARLY|PF_MCE_PROCESS); else - error = PR_MCE_KILL_DEFAULT; - break; - case PR_SET_MM: - error = prctl_set_mm(arg2, arg3, arg4, arg5); - break; - case PR_GET_TID_ADDRESS: - error = prctl_get_tid_address(me, (int __user **)arg2); - break; - case PR_SET_CHILD_SUBREAPER: - me->signal->is_child_subreaper = !!arg2; - break; - case PR_GET_CHILD_SUBREAPER: - error = put_user(me->signal->is_child_subreaper, - (int __user *) arg2); - break; - case PR_SET_NO_NEW_PRIVS: - if (arg2 != 1 || arg3 || arg4 || arg5) return -EINVAL; - - current->no_new_privs = 1; break; - case PR_GET_NO_NEW_PRIVS: - if (arg2 || arg3 || arg4 || arg5) - return -EINVAL; - return current->no_new_privs ? 1 : 0; default: - error = -EINVAL; - break; + return -EINVAL; + } + break; + case PR_MCE_KILL_GET: + if (arg2 | arg3 | arg4 | arg5) + return -EINVAL; + if (current->flags & PF_MCE_PROCESS) + error = (current->flags & PF_MCE_EARLY) ? + PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; + else + error = PR_MCE_KILL_DEFAULT; + break; + case PR_SET_MM: + error = prctl_set_mm(arg2, arg3, arg4, arg5); + break; + case PR_GET_TID_ADDRESS: + error = prctl_get_tid_address(me, (int __user **)arg2); + break; + case PR_SET_CHILD_SUBREAPER: + me->signal->is_child_subreaper = !!arg2; + break; + case PR_GET_CHILD_SUBREAPER: + error = put_user(me->signal->is_child_subreaper, + (int __user *)arg2); + break; + case PR_SET_NO_NEW_PRIVS: + if (arg2 != 1 || arg3 || arg4 || arg5) + return -EINVAL; + + current->no_new_privs = 1; + break; + case PR_GET_NO_NEW_PRIVS: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + return current->no_new_privs ? 1 : 0; + default: + error = -EINVAL; + break; } return error; } -- cgit v1.2.3 From 496ad9aa8ef448058e36ca7a787c61f2e63f0f54 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Jan 2013 17:07:38 -0500 Subject: new helper: file_inode(file) Signed-off-by: Al Viro --- kernel/sys.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index 265b37690421..e3932ea50ec8 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1792,14 +1792,14 @@ SYSCALL_DEFINE1(umask, int, mask) static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { struct fd exe; - struct dentry *dentry; + struct inode *inode; int err; exe = fdget(fd); if (!exe.file) return -EBADF; - dentry = exe.file->f_path.dentry; + inode = file_inode(exe.file); /* * Because the original mm->exe_file points to executable file, make @@ -1807,11 +1807,11 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) * overall picture. */ err = -EACCES; - if (!S_ISREG(dentry->d_inode->i_mode) || + if (!S_ISREG(inode->i_mode) || exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - err = inode_permission(dentry->d_inode, MAY_EXEC); + err = inode_permission(inode, MAY_EXEC); if (err) goto exit; -- cgit v1.2.3 From 7ff6764061ecd4a4ef91db7b8b30aacc6a8573c9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Feb 2013 17:02:52 -0800 Subject: usermodehelper: cleanup/fix __orderly_poweroff() && argv_free() __orderly_poweroff() does argv_free() if call_usermodehelper_fns() returns -ENOMEM. As Lucas pointed out, this can be wrong if -ENOMEM was not triggered by the failing call_usermodehelper_setup(), in this case both __orderly_poweroff() and argv_cleanup() can do kfree(). Kill argv_cleanup() and change __orderly_poweroff() to call argv_free() unconditionally like do_coredump() does. This info->cleanup() is not needed (and wrong) since 6c0c0d4d "fix bug in orderly_poweroff() which did the UMH_NO_WAIT => UMH_WAIT_EXEC change, we can rely on the fact that CLONE_VFORK can't return until do_execve() succeeds/fails. Signed-off-by: Oleg Nesterov Reported-by: Lucas De Marchi Cc: David Howells Cc: James Morris Cc: hongfeng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'kernel/sys.c') diff --git a/kernel/sys.c b/kernel/sys.c index e10566bee399..81f56445fba9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2185,11 +2185,6 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; -static void argv_cleanup(struct subprocess_info *info) -{ - argv_free(info->argv); -} - static int __orderly_poweroff(void) { int argc; @@ -2209,9 +2204,8 @@ static int __orderly_poweroff(void) } ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, - NULL, argv_cleanup, NULL); - if (ret == -ENOMEM) - argv_free(argv); + NULL, NULL, NULL); + argv_free(argv); return ret; } -- cgit v1.2.3