summaryrefslogtreecommitdiff
path: root/kernel/cgroup/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup/cgroup.c')
-rw-r--r--kernel/cgroup/cgroup.c149
1 files changed, 104 insertions, 45 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 109c32c56de7..829943aad7be 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -187,7 +187,7 @@ static u64 css_serial_nr_next = 1;
*/
static u16 have_fork_callback __read_mostly;
static u16 have_exit_callback __read_mostly;
-static u16 have_free_callback __read_mostly;
+static u16 have_release_callback __read_mostly;
static u16 have_canfork_callback __read_mostly;
/* cgroup namespace for init task */
@@ -204,7 +204,8 @@ static struct cftype cgroup_base_files[];
static int cgroup_apply_control(struct cgroup *cgrp);
static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
-static void css_task_iter_advance(struct css_task_iter *it);
+static void css_task_iter_skip(struct css_task_iter *it,
+ struct task_struct *task);
static int cgroup_destroy_locked(struct cgroup *cgrp);
static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
struct cgroup_subsys *ss);
@@ -642,6 +643,7 @@ struct css_set init_css_set = {
.dom_cset = &init_css_set,
.tasks = LIST_HEAD_INIT(init_css_set.tasks),
.mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
+ .dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks),
.task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
.threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
.cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
@@ -737,6 +739,21 @@ static void css_set_update_populated(struct css_set *cset, bool populated)
cgroup_update_populated(link->cgrp, populated);
}
+/*
+ * @task is leaving, advance task iterators which are pointing to it so
+ * that they can resume at the next position. Advancing an iterator might
+ * remove it from the list, use safe walk. See css_task_iter_skip() for
+ * details.
+ */
+static void css_set_skip_task_iters(struct css_set *cset,
+ struct task_struct *task)
+{
+ struct css_task_iter *it, *pos;
+
+ list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
+ css_task_iter_skip(it, task);
+}
+
/**
* css_set_move_task - move a task from one css_set to another
* @task: task being moved
@@ -762,22 +779,9 @@ static void css_set_move_task(struct task_struct *task,
css_set_update_populated(to_cset, true);
if (from_cset) {
- struct css_task_iter *it, *pos;
-
WARN_ON_ONCE(list_empty(&task->cg_list));
- /*
- * @task is leaving, advance task iterators which are
- * pointing to it so that they can resume at the next
- * position. Advancing an iterator might remove it from
- * the list, use safe walk. See css_task_iter_advance*()
- * for details.
- */
- list_for_each_entry_safe(it, pos, &from_cset->task_iters,
- iters_node)
- if (it->task_pos == &task->cg_list)
- css_task_iter_advance(it);
-
+ css_set_skip_task_iters(from_cset, task);
list_del_init(&task->cg_list);
if (!css_set_populated(from_cset))
css_set_update_populated(from_cset, false);
@@ -1104,6 +1108,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
cset->dom_cset = cset;
INIT_LIST_HEAD(&cset->tasks);
INIT_LIST_HEAD(&cset->mg_tasks);
+ INIT_LIST_HEAD(&cset->dying_tasks);
INIT_LIST_HEAD(&cset->task_iters);
INIT_LIST_HEAD(&cset->threaded_csets);
INIT_HLIST_NODE(&cset->hlist);
@@ -1692,7 +1697,7 @@ static int parse_cgroup_root_flags(char *data, unsigned int *root_flags)
*root_flags = 0;
- if (!data)
+ if (!data || *data == '\0')
return 0;
while ((token = strsep(&data, ",")) != NULL) {
@@ -1942,7 +1947,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
struct cgroup_namespace *ns)
{
struct dentry *dentry;
- bool new_sb;
+ bool new_sb = false;
dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb);
@@ -1952,6 +1957,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
*/
if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
struct dentry *nsdentry;
+ struct super_block *sb = dentry->d_sb;
struct cgroup *cgrp;
mutex_lock(&cgroup_mutex);
@@ -1962,12 +1968,14 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
spin_unlock_irq(&css_set_lock);
mutex_unlock(&cgroup_mutex);
- nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
+ nsdentry = kernfs_node_dentry(cgrp->kn, sb);
dput(dentry);
+ if (IS_ERR(nsdentry))
+ deactivate_locked_super(sb);
dentry = nsdentry;
}
- if (IS_ERR(dentry) || !new_sb)
+ if (!new_sb)
cgroup_put(&root->cgrp);
return dentry;
@@ -2876,8 +2884,6 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp)
for_each_subsys(ss, ssid) {
struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
- WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
-
if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
continue;
@@ -2887,6 +2893,8 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp)
return PTR_ERR(css);
}
+ WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
+
if (css_visible(css)) {
ret = css_populate_dir(css);
if (ret)
@@ -2922,11 +2930,11 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp)
for_each_subsys(ss, ssid) {
struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
- WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
-
if (!css)
continue;
+ WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
+
if (css->parent &&
!(cgroup_ss_mask(dsct) & (1 << ss->id))) {
kill_css(css);
@@ -3213,7 +3221,8 @@ static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
if (strcmp(strstrip(buf), "threaded"))
return -EINVAL;
- cgrp = cgroup_kn_lock_live(of->kn, false);
+ /* drain dying csses before we re-apply (threaded) subtree control */
+ cgrp = cgroup_kn_lock_live(of->kn, true);
if (!cgrp)
return -ENOENT;
@@ -4040,15 +4049,18 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
it->task_pos = NULL;
return;
}
- } while (!css_set_populated(cset));
+ } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
if (!list_empty(&cset->tasks))
it->task_pos = cset->tasks.next;
- else
+ else if (!list_empty(&cset->mg_tasks))
it->task_pos = cset->mg_tasks.next;
+ else
+ it->task_pos = cset->dying_tasks.next;
it->tasks_head = &cset->tasks;
it->mg_tasks_head = &cset->mg_tasks;
+ it->dying_tasks_head = &cset->dying_tasks;
/*
* We don't keep css_sets locked across iteration steps and thus
@@ -4074,9 +4086,20 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
list_add(&it->iters_node, &cset->task_iters);
}
+static void css_task_iter_skip(struct css_task_iter *it,
+ struct task_struct *task)
+{
+ lockdep_assert_held(&css_set_lock);
+
+ if (it->task_pos == &task->cg_list) {
+ it->task_pos = it->task_pos->next;
+ it->flags |= CSS_TASK_ITER_SKIPPED;
+ }
+}
+
static void css_task_iter_advance(struct css_task_iter *it)
{
- struct list_head *next;
+ struct task_struct *task;
lockdep_assert_held(&css_set_lock);
repeat:
@@ -4086,25 +4109,40 @@ repeat:
* consumed first and then ->mg_tasks. After ->mg_tasks,
* we move onto the next cset.
*/
- next = it->task_pos->next;
-
- if (next == it->tasks_head)
- next = it->mg_tasks_head->next;
+ if (it->flags & CSS_TASK_ITER_SKIPPED)
+ it->flags &= ~CSS_TASK_ITER_SKIPPED;
+ else
+ it->task_pos = it->task_pos->next;
- if (next == it->mg_tasks_head)
+ if (it->task_pos == it->tasks_head)
+ it->task_pos = it->mg_tasks_head->next;
+ if (it->task_pos == it->mg_tasks_head)
+ it->task_pos = it->dying_tasks_head->next;
+ if (it->task_pos == it->dying_tasks_head)
css_task_iter_advance_css_set(it);
- else
- it->task_pos = next;
} else {
/* called from start, proceed to the first cset */
css_task_iter_advance_css_set(it);
}
- /* if PROCS, skip over tasks which aren't group leaders */
- if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
- !thread_group_leader(list_entry(it->task_pos, struct task_struct,
- cg_list)))
- goto repeat;
+ if (!it->task_pos)
+ return;
+
+ task = list_entry(it->task_pos, struct task_struct, cg_list);
+
+ if (it->flags & CSS_TASK_ITER_PROCS) {
+ /* if PROCS, skip over tasks which aren't group leaders */
+ if (!thread_group_leader(task))
+ goto repeat;
+
+ /* and dying leaders w/o live member threads */
+ if (!atomic_read(&task->signal->live))
+ goto repeat;
+ } else {
+ /* skip all dying ones */
+ if (task->flags & PF_EXITING)
+ goto repeat;
+ }
}
/**
@@ -4160,6 +4198,10 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
spin_lock_irq(&css_set_lock);
+ /* @it may be half-advanced by skips, finish advancing */
+ if (it->flags & CSS_TASK_ITER_SKIPPED)
+ css_task_iter_advance(it);
+
if (it->task_pos) {
it->cur_task = list_entry(it->task_pos, struct task_struct,
cg_list);
@@ -4543,9 +4585,11 @@ static void css_release_work_fn(struct work_struct *work)
/* cgroup release path */
trace_cgroup_release(cgrp);
+ spin_lock_irq(&css_set_lock);
for (tcgrp = cgroup_parent(cgrp); tcgrp;
tcgrp = cgroup_parent(tcgrp))
tcgrp->nr_dying_descendants--;
+ spin_unlock_irq(&css_set_lock);
cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
cgrp->id = -1;
@@ -4742,12 +4786,14 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
cgrp->root = root;
cgrp->level = level;
+ spin_lock_irq(&css_set_lock);
for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
if (tcgrp != cgrp)
tcgrp->nr_descendants++;
}
+ spin_unlock_irq(&css_set_lock);
if (notify_on_release(parent))
set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -5030,10 +5076,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
if (parent && cgroup_is_threaded(cgrp))
parent->nr_threaded_children--;
+ spin_lock_irq(&css_set_lock);
for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
tcgrp->nr_descendants--;
tcgrp->nr_dying_descendants++;
}
+ spin_unlock_irq(&css_set_lock);
cgroup1_check_for_release(parent);
@@ -5109,7 +5157,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
have_fork_callback |= (bool)ss->fork << ss->id;
have_exit_callback |= (bool)ss->exit << ss->id;
- have_free_callback |= (bool)ss->free << ss->id;
+ have_release_callback |= (bool)ss->release << ss->id;
have_canfork_callback |= (bool)ss->can_fork << ss->id;
/* At system boot, before all subsystems have been
@@ -5531,6 +5579,7 @@ void cgroup_exit(struct task_struct *tsk)
if (!list_empty(&tsk->cg_list)) {
spin_lock_irq(&css_set_lock);
css_set_move_task(tsk, cset, NULL, false);
+ list_add_tail(&tsk->cg_list, &cset->dying_tasks);
cset->nr_tasks--;
spin_unlock_irq(&css_set_lock);
} else {
@@ -5543,16 +5592,26 @@ void cgroup_exit(struct task_struct *tsk)
} while_each_subsys_mask();
}
-void cgroup_free(struct task_struct *task)
+void cgroup_release(struct task_struct *task)
{
- struct css_set *cset = task_css_set(task);
struct cgroup_subsys *ss;
int ssid;
- do_each_subsys_mask(ss, ssid, have_free_callback) {
- ss->free(task);
+ do_each_subsys_mask(ss, ssid, have_release_callback) {
+ ss->release(task);
} while_each_subsys_mask();
+ if (use_task_css_set_links) {
+ spin_lock_irq(&css_set_lock);
+ css_set_skip_task_iters(task_css_set(task), task);
+ list_del_init(&task->cg_list);
+ spin_unlock_irq(&css_set_lock);
+ }
+}
+
+void cgroup_free(struct task_struct *task)
+{
+ struct css_set *cset = task_css_set(task);
put_css_set(cset);
}