From 73c101011926c5832e6e141682180c4debe2cf45 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Mar 2011 13:19:51 +0100 Subject: block: initial patch for on-stack per-task plugging This patch adds support for creating a queuing context outside of the queue itself. This enables us to batch up pieces of IO before grabbing the block device queue lock and submitting them to the IO scheduler. The context is created on the stack of the process and assigned in the task structure, so that we can auto-unplug it if we hit a schedule event. The current queue plugging happens implicitly if IO is submitted to an empty device, yet callers have to remember to unplug that IO when they are going to wait for it. This is an ugly API and has caused bugs in the past. Additionally, it requires hacks in the vm (->sync_page() callback) to handle that logic. By switching to an explicit plugging scheme we make the API a lot nicer and can get rid of the ->sync_page() hack in the vm. Signed-off-by: Jens Axboe --- kernel/sched.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 18d38e4ec7ba..ca098bf4cc65 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3978,6 +3978,16 @@ need_resched_nonpreemptible: switch_count = &prev->nvcsw; } + /* + * If we are going to sleep and we have plugged IO queued, make + * sure to submit it to avoid deadlocks. + */ + if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) { + raw_spin_unlock(&rq->lock); + blk_flush_plug(prev); + raw_spin_lock(&rq->lock); + } + pre_schedule(rq, prev); if (unlikely(!rq->nr_running)) @@ -5333,6 +5343,7 @@ void __sched io_schedule(void) delayacct_blkio_start(); atomic_inc(&rq->nr_iowait); + blk_flush_plug(current); current->in_iowait = 1; schedule(); current->in_iowait = 0; @@ -5348,6 +5359,7 @@ long __sched io_schedule_timeout(long timeout) delayacct_blkio_start(); atomic_inc(&rq->nr_iowait); + blk_flush_plug(current); current->in_iowait = 1; ret = schedule_timeout(timeout); current->in_iowait = 0; -- cgit v1.2.3 From 16addf954d3954a72fd56abc02ffcba3c18529a1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 18 Mar 2011 09:34:53 -0700 Subject: sched: Fix yield_to kernel-doc Add missing function parameters for yield_to(): Warning(kernel/sched.c:5470): No description found for parameter 'p' Warning(kernel/sched.c:5470): No description found for parameter 'preempt' Signed-off-by: Randy Dunlap Cc: Peter Zijlstra LKML-Reference: <20110318093453.8f7489a4.randy.dunlap@oracle.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 58d66ea7d200..052120d67706 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5467,6 +5467,8 @@ EXPORT_SYMBOL(yield); * yield_to - yield the current processor to another thread in * your thread group, or accelerate that thread toward the * processor it's on. + * @p: target task + * @preempt: whether task preemption is allowed or not * * It's the caller's job to ensure that the target task struct * can't go away on us before we can do any checks. -- cgit v1.2.3 From 20dd67407160eac577656cd2f8ee9a1fead960b8 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 23 Mar 2011 13:17:23 +0200 Subject: sched: Remove unused 'rq' variable and cpu_rq() call from alloc_fair_sched_group() Signed-off-by: Sergey Senozhatsky Cc: Steven Rostedt Cc: Peter Zijlstra LKML-Reference: <20110323111722.GA4244@swordfish.minsk.epam.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 052120d67706..a361e20ec2cd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8443,7 +8443,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) { struct cfs_rq *cfs_rq; struct sched_entity *se; - struct rq *rq; int i; tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); @@ -8456,8 +8455,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) tg->shares = NICE_0_LOAD; for_each_possible_cpu(i) { - rq = cpu_rq(i); - cfs_rq = kzalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, cpu_to_node(i)); if (!cfs_rq) -- cgit v1.2.3 From b0e77598f87107001a00b8a4ece9c95e4254ccc4 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 23 Mar 2011 16:43:24 -0700 Subject: userns: user namespaces: convert several capable() calls CAP_IPC_OWNER and CAP_IPC_LOCK can be checked against current_user_ns(), because the resource comes from current's own ipc namespace. setuid/setgid are to uids in own namespace, so again checks can be against current_user_ns(). Changelog: Jan 11: Use task_ns_capable() in place of sched_capable(). Jan 11: Use nsown_capable() as suggested by Bastian Blank. Jan 11: Clarify (hopefully) some logic in futex and sched.c Feb 15: use ns_capable for ipc, not nsown_capable Feb 23: let copy_ipcs handle setting ipc_ns->user_ns Feb 23: pass ns down rather than taking it from current [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Serge E. Hallyn Acked-by: "Eric W. Biederman" Acked-by: Daniel Lezcano Acked-by: David Howells Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index a172494a9a63..480adeb63f8f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4892,8 +4892,11 @@ static bool check_same_owner(struct task_struct *p) rcu_read_lock(); pcred = __task_cred(p); - match = (cred->euid == pcred->euid || - cred->euid == pcred->uid); + if (cred->user->user_ns == pcred->user->user_ns) + match = (cred->euid == pcred->euid || + cred->euid == pcred->uid); + else + match = false; rcu_read_unlock(); return match; } @@ -5221,7 +5224,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) goto out_free_cpus_allowed; } retval = -EPERM; - if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) + if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE)) goto out_unlock; retval = security_task_setscheduler(p); -- cgit v1.2.3