From 857a2beb09ab83e9a8185821ae16db7dfbe8b837 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 14 Apr 2013 20:50:08 -0700 Subject: cgroup: implement task_cgroup_path_from_hierarchy() kdbus folks want a sane way to determine the cgroup path that a given task belongs to on a given hierarchy, which is a reasonble thing to expect from cgroup core. Implement task_cgroup_path_from_hierarchy(). v2: Dropped unnecessary NULL check on the return value of task_cgroup_from_root() as suggested by Li Zefan. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Acked-by: Li Zefan Cc: Kay Sievers Cc: Lennart Poettering Cc: Daniel Mack --- include/linux/cgroup.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5047355b9a0f..383c630f36f9 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -542,6 +542,8 @@ int cgroup_is_removed(const struct cgroup *cgrp); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); +int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id, + char *buf, size_t buflen); int cgroup_task_count(const struct cgroup *cgrp); -- cgit v1.2.3 From 23958e729e7029678e746bf8f4094c8863a79c3d Mon Sep 17 00:00:00 2001 From: Greg KH Date: Fri, 3 May 2013 16:26:59 -0700 Subject: cgroup.h: remove some functions that are now gone cgroup_lock() and cgroup_unlock() are now no longer exported, so fix cgroup.h to not declare them if CONFIG_CGROUPS is not enabled. Signed-off-by: Greg Kroah-Hartman Acked-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 383c630f36f9..4f6f5138c340 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -840,8 +840,6 @@ static inline void cgroup_fork(struct task_struct *p) {} static inline void cgroup_post_fork(struct task_struct *p) {} static inline void cgroup_exit(struct task_struct *p, int callbacks) {} -static inline void cgroup_lock(void) {} -static inline void cgroup_unlock(void) {} static inline int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) { -- cgit v1.2.3 From 9138125beabbb76b4a373d4a619870f6f5d86fc5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 14 May 2013 13:52:38 -0700 Subject: blk-throttle: implement proper hierarchy support With the recent updates, blk-throttle is finally ready for proper hierarchy support. Dispatching now honors service_queue->parent_sq and propagates correctly. The only thing missing is setting ->parent_sq correctly so that throtl_grp hierarchy matches the cgroup hierarchy. This patch updates throtl_pd_init() such that service_queues form the same hierarchy as the cgroup hierarchy if sane_behavior is enabled. As this concludes proper hierarchy support for blkcg, the shameful .broken_hierarchy tag is removed from blkio_subsys. v2: Updated blkio-controller.txt as suggested by Vivek. Signed-off-by: Tejun Heo Acked-by: Vivek Goyal Cc: Li Zefan --- include/linux/cgroup.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5047355b9a0f..09f1a1408ae0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -272,6 +272,8 @@ enum { * - memcg: use_hierarchy is on by default and the cgroup file for * the flag is not created. * + * - blkcg: blk-throttle becomes properly hierarchical. + * * The followings are planned changes. * * - release_agent will be disallowed once replacement notification -- cgit v1.2.3 From bdc7119f1bdd0632d42f435941dc290216a436e7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 May 2013 10:55:38 +0900 Subject: cgroup: make cgroup_is_removed() static cgroup_is_removed() no longer has external users and it shouldn't grow any - controllers should deal with cgroup_subsys_state on/offline state instead of cgroup removal state. Make it static. While at it, make it return bool. Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 1df5f699be61..8d9f3c911fca 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -538,7 +538,6 @@ static inline const char *cgroup_name(const struct cgroup *cgrp) int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); -int cgroup_is_removed(const struct cgroup *cgrp); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); -- cgit v1.2.3 From 53fa5261747a90746531e8a1c81eeb78fedc2f71 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 May 2013 10:55:38 +0900 Subject: cgroup: add cgroup->serial_nr and implement cgroup_next_sibling() Currently, there's no easy way to find out the next sibling cgroup unless it's known that the current cgroup is accessed from the parent's children list in a single RCU critical section. This in turn forces all iterators to require whole iteration to be enclosed in a single RCU critical section, which sometimes is too restrictive. This patch implements cgroup_next_sibling() which can reliably determine the next sibling regardless of the state of the current cgroup as long as it's accessible. It currently is impossible to determine the next sibling after dropping RCU read lock because the cgroup being iterated could be removed anytime and if RCU read lock is dropped, nothing guarantess its ->sibling.next pointer is accessible. A removed cgroup would continue to point to its next sibling for RCU accesses but stop receiving updates from the sibling. IOW, the next sibling could be removed and then complete its grace period while RCU read lock is dropped, making it unsafe to dereference ->sibling.next after dropping and re-acquiring RCU read lock. This can be solved by adding a way to traverse to the next sibling without dereferencing ->sibling.next. This patch adds a monotonically increasing cgroup serial number, cgroup->serial_nr, which guarantees that all cgroup->children lists are kept in increasing serial_nr order. A new function, cgroup_next_sibling(), is implemented, which, if CGRP_REMOVED is not set on the current cgroup, follows ->sibling.next; otherwise, traverses the parent's ->children list until it sees a sibling with higher ->serial_nr. This allows the function to always return the next sibling regardless of the state of the current cgroup without adding overhead in the fast path. Further patches will update the iterators to use cgroup_next_sibling() so that they allow dropping RCU read lock and blocking while iteration is in progress which in turn will be used to simplify controllers. v2: Typo fix as per Serge. Signed-off-by: Tejun Heo Acked-by: Serge E. Hallyn --- include/linux/cgroup.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8d9f3c911fca..ee041a01a67e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -188,6 +188,14 @@ struct cgroup { struct cgroup *parent; /* my parent */ struct dentry *dentry; /* cgroup fs entry, RCU protected */ + /* + * Monotonically increasing unique serial number which defines a + * uniform order among all cgroups. It's guaranteed that all + * ->children lists are in the ascending order of ->serial_nr. + * It's used to allow interrupting and resuming iterations. + */ + u64 serial_nr; + /* * This is a copy of dentry->d_name, and it's needed because * we can't use dentry->d_name in cgroup_path(). @@ -675,6 +683,8 @@ static inline struct cgroup* task_cgroup(struct task_struct *task, return task_subsys_state(task, subsys_id)->cgroup; } +struct cgroup *cgroup_next_sibling(struct cgroup *pos); + /** * cgroup_for_each_child - iterate through children of a cgroup * @pos: the cgroup * to use as the loop cursor -- cgit v1.2.3 From 75501a6d59e989e5c286716e5b3b66ace4660e83 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 May 2013 10:55:38 +0900 Subject: cgroup: update iterators to use cgroup_next_sibling() This patch converts cgroup_for_each_child(), cgroup_next_descendant_pre/post() and thus cgroup_for_each_descendant_pre/post() to use cgroup_next_sibling() instead of manually dereferencing ->sibling.next. The only reason the iterators couldn't allow dropping RCU read lock while iteration is in progress was because they couldn't determine the next sibling safely once RCU read lock is dropped. Using cgroup_next_sibling() removes that problem and enables all iterators to allow dropping RCU read lock in the middle. Comments are updated accordingly. This makes the iterators easier to use and will simplify controllers. Note that @cgroup argument is renamed to @cgrp in cgroup_for_each_child() because it conflicts with "struct cgroup" used in the new macro body. Signed-off-by: Tejun Heo Acked-by: Serge E. Hallyn Reviewed-by: Michal Hocko --- include/linux/cgroup.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ee041a01a67e..d0ad3794b947 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -688,9 +688,9 @@ struct cgroup *cgroup_next_sibling(struct cgroup *pos); /** * cgroup_for_each_child - iterate through children of a cgroup * @pos: the cgroup * to use as the loop cursor - * @cgroup: cgroup whose children to walk + * @cgrp: cgroup whose children to walk * - * Walk @cgroup's children. Must be called under rcu_read_lock(). A child + * Walk @cgrp's children. Must be called under rcu_read_lock(). A child * cgroup which hasn't finished ->css_online() or already has finished * ->css_offline() may show up during traversal and it's each subsystem's * responsibility to verify that each @pos is alive. @@ -698,9 +698,15 @@ struct cgroup *cgroup_next_sibling(struct cgroup *pos); * If a subsystem synchronizes against the parent in its ->css_online() and * before starting iterating, a cgroup which finished ->css_online() is * guaranteed to be visible in the future iterations. + * + * It is allowed to temporarily drop RCU read lock during iteration. The + * caller is responsible for ensuring that @pos remains accessible until + * the start of the next iteration by, for example, bumping the css refcnt. */ -#define cgroup_for_each_child(pos, cgroup) \ - list_for_each_entry_rcu(pos, &(cgroup)->children, sibling) +#define cgroup_for_each_child(pos, cgrp) \ + for ((pos) = list_first_or_null_rcu(&(cgrp)->children, \ + struct cgroup, sibling); \ + (pos); (pos) = cgroup_next_sibling((pos))) struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, struct cgroup *cgroup); @@ -759,6 +765,10 @@ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos); * Alternatively, a subsystem may choose to use a single global lock to * synchronize ->css_online() and ->css_offline() against tree-walking * operations. + * + * It is allowed to temporarily drop RCU read lock during iteration. The + * caller is responsible for ensuring that @pos remains accessible until + * the start of the next iteration by, for example, bumping the css refcnt. */ #define cgroup_for_each_descendant_pre(pos, cgroup) \ for (pos = cgroup_next_descendant_pre(NULL, (cgroup)); (pos); \ -- cgit v1.2.3 From 5c5cc62321d9df7a9a608346fc649c4528380c8f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 9 Jun 2013 17:16:29 +0800 Subject: cpuset: allow to keep tasks in empty cpusets To achieve this: - We call update_tasks_cpumask/nodemask() for empty cpusets when hotplug happens, instead of moving tasks out of them. - When a cpuset's masks are changed by writing cpuset.cpus/mems, we also update tasks in child cpusets which are empty. v3: - do propagation work in one place for both hotplug and unplug v2: - drop rcu_read_lock before calling update_task_nodemask() and update_task_cpumask(), instead of using workqueue. - add documentation in include/linux/cgroup.h Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index d0ad3794b947..53e81a61be57 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -277,6 +277,10 @@ enum { * * - Remount is disallowed. * + * - cpuset: tasks will be kept in empty cpusets when hotplug happens + * and take masks of ancestors with non-empty cpus/mems, instead of + * being moved to an ancestor. + * * - memcg: use_hierarchy is on by default and the cgroup file for * the flag is not created. * -- cgit v1.2.3 From 88fa523bff295f1d60244a54833480b02f775152 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 9 Jun 2013 17:16:46 +0800 Subject: cpuset: allow to move tasks to empty cpusets Currently some cpuset behaviors are not friendly when cpuset is co-mounted with other cgroup controllers. Now with this patchset if cpuset is mounted with sane_behavior option, it behaves differently: - Tasks will be kept in empty cpusets when hotplug happens and take masks of ancestors with non-empty cpus/mems, instead of being moved to an ancestor. - A task can be moved into an empty cpuset, and again it takes masks of ancestors, so the user can drop a task into a newly created cgroup without having to do anything for it. As tasks can reside in empy cpusets, here're some rules: - They can be moved to another cpuset, regardless it's empty or not. - Though it takes masks from ancestors, it takes other configs from the empty cpuset. - If the ancestors' masks are changed, those tasks will also be updated to take new masks. v2: add documentation in include/linux/cgroup.h Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 53e81a61be57..74e8b8e4cd7f 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -281,6 +281,9 @@ enum { * and take masks of ancestors with non-empty cpus/mems, instead of * being moved to an ancestor. * + * - cpuset: a task can be moved into an empty cpuset, and again it + * takes masks of ancestors. + * * - memcg: use_hierarchy is on by default and the cgroup file for * the flag is not created. * -- cgit v1.2.3 From 3fc3db9a3ae0ce108badf31a4a00e41b4236f5fc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 12 Jun 2013 21:04:48 -0700 Subject: cgroup: remove now unused css_depth() Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index d0ad3794b947..5830592258dc 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -848,7 +848,6 @@ bool css_is_ancestor(struct cgroup_subsys_state *cg, /* Get id and depth of css */ unsigned short css_id(struct cgroup_subsys_state *css); -unsigned short css_depth(struct cgroup_subsys_state *css); struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id); #else /* !CONFIG_CGROUPS */ -- cgit v1.2.3 From 69d0206c793a17431eacee2694ee7a4b25df76b7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 12 Jun 2013 21:04:50 -0700 Subject: cgroup: bring some sanity to naming around cg_cgroup_link cgroups and css_sets are mapped M:N and this M:N mapping is represented by struct cg_cgroup_link which forms linked lists on both sides. The naming around this mapping is already confusing and struct cg_cgroup_link exacerbates the situation quite a bit. >From cgroup side, it starts off ->css_sets and runs through ->cgrp_link_list. From css_set side, it starts off ->cg_links and runs through ->cg_link_list. This is rather reversed as cgrp_link_list is used to iterate css_sets and cg_link_list cgroups. Also, this is the only place which is still using the confusing "cg" for css_sets. This patch cleans it up a bit. * s/cgroup->css_sets/cgroup->cset_links/ s/css_set->cg_links/css_set->cgrp_links/ s/cgroup_iter->cg_link/cgroup_iter->cset_link/ * s/cg_cgroup_link/cgrp_cset_link/ * s/cgrp_cset_link->cg/cgrp_cset_link->cset/ s/cgrp_cset_link->cgrp_link_list/cgrp_cset_link->cset_link/ s/cgrp_cset_link->cg_link_list/cgrp_cset_link->cgrp_link/ * s/init_css_set_link/init_cgrp_cset_link/ s/free_cg_links/free_cgrp_cset_links/ s/allocate_cg_links/allocate_cgrp_cset_links/ * s/cgl[12]/link[12]/ in compare_css_sets() * s/saved_link/tmp_link/ s/tmp/tmp_links/ and a couple similar adustments. * Comment and whiteline adjustments. After the changes, we have list_for_each_entry(link, &cont->cset_links, cset_link) { struct css_set *cset = link->cset; instead of list_for_each_entry(link, &cont->css_sets, cgrp_link_list) { struct css_set *cset = link->cg; This patch is purely cosmetic. v2: Fix broken sentences in the patch description. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5830592258dc..0e32855edc92 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -215,10 +215,10 @@ struct cgroup { struct cgroupfs_root *root; /* - * List of cg_cgroup_links pointing at css_sets with - * tasks in this cgroup. Protected by css_set_lock + * List of cgrp_cset_links pointing at css_sets with tasks in this + * cgroup. Protected by css_set_lock. */ - struct list_head css_sets; + struct list_head cset_links; struct list_head allcg_node; /* cgroupfs_root->allcg_list */ struct list_head cft_q_node; /* used during cftype add/rm */ @@ -365,11 +365,10 @@ struct css_set { struct list_head tasks; /* - * List of cg_cgroup_link objects on link chains from - * cgroups referenced from this css_set. Protected by - * css_set_lock + * List of cgrp_cset_links pointing at cgroups referenced from this + * css_set. Protected by css_set_lock. */ - struct list_head cg_links; + struct list_head cgrp_links; /* * Set of subsystem states, one for each subsystem. This array @@ -792,7 +791,7 @@ struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, /* A cgroup_iter should be treated as an opaque object */ struct cgroup_iter { - struct list_head *cg_link; + struct list_head *cset_link; struct list_head *task; }; -- cgit v1.2.3 From 5de0107e634ce862f16360139709d9d3a656463e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 12 Jun 2013 21:04:52 -0700 Subject: cgroup: clean up css_[try]get() and css_put() * __css_get() isn't used by anyone. Fold it into css_get(). * Add proper function comments to all css reference functions. This patch is purely cosmetic. v2: Typo fix as per Li. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0e32855edc92..a494636a34da 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -94,33 +94,31 @@ enum { CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ }; -/* Caller must verify that the css is not for root cgroup */ -static inline void __css_get(struct cgroup_subsys_state *css, int count) -{ - atomic_add(count, &css->refcnt); -} - -/* - * Call css_get() to hold a reference on the css; it can be used - * for a reference obtained via: - * - an existing ref-counted reference to the css - * - task->cgroups for a locked task +/** + * css_get - obtain a reference on the specified css + * @css: target css + * + * The caller must already have a reference. */ - static inline void css_get(struct cgroup_subsys_state *css) { /* We don't need to reference count the root state */ if (!(css->flags & CSS_ROOT)) - __css_get(css, 1); + atomic_inc(&css->refcnt); } -/* - * Call css_tryget() to take a reference on a css if your existing - * (known-valid) reference isn't already ref-counted. Returns false if - * the css has been destroyed. - */ - extern bool __css_tryget(struct cgroup_subsys_state *css); + +/** + * css_tryget - try to obtain a reference on the specified css + * @css: target css + * + * Obtain a reference on @css if it's alive. The caller naturally needs to + * ensure that @css is accessible but doesn't have to be holding a + * reference on it - IOW, RCU protected access is good enough for this + * function. Returns %true if a reference count was successfully obtained; + * %false otherwise. + */ static inline bool css_tryget(struct cgroup_subsys_state *css) { if (css->flags & CSS_ROOT) @@ -128,12 +126,14 @@ static inline bool css_tryget(struct cgroup_subsys_state *css) return __css_tryget(css); } -/* - * css_put() should be called to release a reference taken by - * css_get() or css_tryget() - */ - extern void __css_put(struct cgroup_subsys_state *css); + +/** + * css_put - put a css reference + * @css: target css + * + * Put a reference obtained via css_get() and css_tryget(). + */ static inline void css_put(struct cgroup_subsys_state *css) { if (!(css->flags & CSS_ROOT)) -- cgit v1.2.3 From 54766d4a1d3d6f84ff8fa475cd8f165c0a0000eb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 12 Jun 2013 21:04:53 -0700 Subject: cgroup: rename CGRP_REMOVED to CGRP_DEAD We will add another flag indicating that the cgroup is in the process of being killed. REMOVING / REMOVED is more difficult to distinguish and cgroup_is_removing()/cgroup_is_removed() are a bit awkward. Also, later percpu_ref usage will involve "kill"ing the refcnt. s/CGRP_REMOVED/CGRP_DEAD/ s/cgroup_is_removed()/cgroup_is_dead() This patch is purely cosmetic. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a494636a34da..c86a93abe83d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -143,7 +143,7 @@ static inline void css_put(struct cgroup_subsys_state *css) /* bits in struct cgroup flags field */ enum { /* Control Group is dead */ - CGRP_REMOVED, + CGRP_DEAD, /* * Control Group has previously had a child cgroup or a task, * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) -- cgit v1.2.3 From 6f3d828f0fb7fdaffc6f32cb8a1cb7fcf8824598 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 12 Jun 2013 21:04:55 -0700 Subject: cgroup: remove cgroup->count and use cgroup->count tracks the number of css_sets associated with the cgroup and used only to verify that no css_set is associated when the cgroup is being destroyed. It's superflous as the destruction path can simply check whether cgroup->cset_links is empty instead. Drop cgroup->count and check ->cset_links directly from cgroup_destroy_locked(). Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c86a93abe83d..81bfd0268e93 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -169,12 +169,6 @@ struct cgroup_name { struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ - /* - * count users of this cgroup. >0 means busy, but doesn't - * necessarily indicate the number of tasks in the cgroup - */ - atomic_t count; - int id; /* ida allocated in-hierarchy ID */ /* -- cgit v1.2.3 From ea15f8ccdb430af1e8bc9b4e19a230eb4c356777 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 13 Jun 2013 19:27:42 -0700 Subject: cgroup: split cgroup destruction into two steps Split cgroup_destroy_locked() into two steps and put the latter half into cgroup_offline_fn() which is executed from a work item. The latter half is responsible for offlining the css's, removing the cgroup from internal lists, and propagating release notification to the parent. The separation is to allow using percpu refcnt for css. Note that this allows for other cgroup operations to happen between the first and second halves of destruction, including creating a new cgroup with the same name. As the target cgroup is marked DEAD in the first half and cgroup internals don't care about the names of cgroups, this should be fine. A comment explaining this will be added by the next patch which implements the actual percpu refcnting. As RCU freeing is guaranteed to happen after the second step of destruction, we can use the same work item for both. This patch renames cgroup->free_work to ->destroy_work and uses it for both purposes. INIT_WORK() is now performed right before queueing the work item. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 81bfd0268e93..e345d8b90046 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -233,7 +233,7 @@ struct cgroup { /* For RCU-protected deletion */ struct rcu_head rcu_head; - struct work_struct free_work; + struct work_struct destroy_work; /* List of events which userspace want to receive */ struct list_head event_list; -- cgit v1.2.3 From d3daf28da16a30af95bfb303189a634a87606725 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 13 Jun 2013 19:39:16 -0700 Subject: cgroup: use percpu refcnt for cgroup_subsys_states A css (cgroup_subsys_state) is how each cgroup is represented to a controller. As such, it can be used in hot paths across the various subsystems different controllers are associated with. One of the common operations is reference counting, which up until now has been implemented using a global atomic counter and can have significant adverse impact on scalability. For example, css refcnt can be gotten and put multiple times by blkcg for each IO request. For highops configurations which try to do as much per-cpu as possible, the global frequent refcnting can be very expensive. In general, given the various and hugely diverse paths css's end up being used from, we need to make it cheap and highly scalable. In its usage, css refcnting isn't very different from module refcnting. This patch converts css refcnting to use the recently added percpu_ref. css_get/tryget/put() directly maps to the matching percpu_ref operations and the deactivation logic is no longer necessary as percpu_ref already has refcnt killing. The only complication is that as the refcnt is per-cpu, percpu_ref_kill() in itself doesn't ensure that further tryget operations will fail, which we need to guarantee before invoking ->css_offline()'s. This is resolved collecting kill confirmation using percpu_ref_kill_and_confirm() and initiating the offline phase of destruction after all css refcnt's are confirmed to be seen as killed on all CPUs. The previous patches already splitted destruction into two phases, so percpu_ref_kill_and_confirm() can be hooked up easily. This patch removes css_refcnt() which is used for rcu dereference sanity check in css_id(). While we can add a percpu refcnt API to ask the same question, css_id() itself is scheduled to be removed fairly soon, so let's not bother with it. Just drop the sanity check and use rcu_dereference_raw() instead. v2: - init_cgroup_css() was calling percpu_ref_init() without checking the return value. This causes two problems - the obvious lack of error handling and percpu_ref_init() being called from cgroup_init_subsys() before the allocators are up, which triggers warnings but doesn't cause actual problems as the refcnt isn't used for roots anyway. Fix both by moving percpu_ref_init() to cgroup_create(). - The base references were put too early by percpu_ref_kill_and_confirm() and cgroup_offline_fn() put the refs one extra time. This wasn't noticeable because css's go through another RCU grace period before being freed. Update cgroup_destroy_locked() to grab an extra reference before killing the refcnts. This problem was noticed by Kent. Signed-off-by: Tejun Heo Reviewed-by: Kent Overstreet Acked-by: Li Zefan Cc: Michal Hocko Cc: Mike Snitzer Cc: Vivek Goyal Cc: "Alasdair G. Kergon" Cc: Jens Axboe Cc: Mikulas Patocka Cc: Glauber Costa --- include/linux/cgroup.h | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e345d8b90046..b7bd4beae294 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef CONFIG_CGROUPS @@ -72,13 +73,8 @@ struct cgroup_subsys_state { */ struct cgroup *cgroup; - /* - * State maintained by the cgroup system to allow subsystems - * to be "busy". Should be accessed via css_get(), - * css_tryget() and css_put(). - */ - - atomic_t refcnt; + /* reference count - access via css_[try]get() and css_put() */ + struct percpu_ref refcnt; unsigned long flags; /* ID for this css, if possible */ @@ -104,11 +100,9 @@ static inline void css_get(struct cgroup_subsys_state *css) { /* We don't need to reference count the root state */ if (!(css->flags & CSS_ROOT)) - atomic_inc(&css->refcnt); + percpu_ref_get(&css->refcnt); } -extern bool __css_tryget(struct cgroup_subsys_state *css); - /** * css_tryget - try to obtain a reference on the specified css * @css: target css @@ -123,11 +117,9 @@ static inline bool css_tryget(struct cgroup_subsys_state *css) { if (css->flags & CSS_ROOT) return true; - return __css_tryget(css); + return percpu_ref_tryget(&css->refcnt); } -extern void __css_put(struct cgroup_subsys_state *css); - /** * css_put - put a css reference * @css: target css @@ -137,7 +129,7 @@ extern void __css_put(struct cgroup_subsys_state *css); static inline void css_put(struct cgroup_subsys_state *css) { if (!(css->flags & CSS_ROOT)) - __css_put(css); + percpu_ref_put(&css->refcnt); } /* bits in struct cgroup flags field */ @@ -231,9 +223,10 @@ struct cgroup { struct list_head pidlists; struct mutex pidlist_mutex; - /* For RCU-protected deletion */ + /* For css percpu_ref killing and RCU-protected deletion */ struct rcu_head rcu_head; struct work_struct destroy_work; + atomic_t css_kill_cnt; /* List of events which userspace want to receive */ struct list_head event_list; -- cgit v1.2.3 From f63674fd0d6afa1ba24309aee1f8c60195d39041 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 13 Jun 2013 19:58:38 -0700 Subject: cgroup: update sane_behavior documentation f12dc02014 ("cgroup: mark "tasks" cgroup file as insane") and cc5943a781 ("cgroup: mark "notify_on_release" and "release_agent" cgroup files insane") forgot to update the changed behavior documentation in cgroup.h. Update it. Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b7bd4beae294..17604767adfd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -264,13 +264,14 @@ enum { * * - Remount is disallowed. * - * - memcg: use_hierarchy is on by default and the cgroup file for - * the flag is not created. + * - "tasks" is removed. Everything should be at process + * granularity. Use "cgroup.procs" instead. * - * The followings are planned changes. + * - "release_agent" and "notify_on_release" are removed. + * Replacement notification mechanism will be implemented. * - * - release_agent will be disallowed once replacement notification - * mechanism is implemented. + * - memcg: use_hierarchy is on by default and the cgroup file for + * the flag is not created. */ CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), -- cgit v1.2.3 From 6db8e85c5c1f89cd0183b76dab027c81009f129f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Jun 2013 11:18:22 -0700 Subject: cgroup: disallow rename(2) if sane_behavior cgroup's rename(2) isn't a proper migration implementation - it can't move the cgroup to a different parent in the hierarchy. All it can do is swapping the name string for that cgroup. This isn't useful and can mislead users to think that cgroup supports proper cgroup-level migration. Disallow rename(2) if sane_behavior. v2: Fail with -EPERM instead of -EINVAL so that it matches the vfs return value when ->rename is not implemented as suggested by Li. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 17604767adfd..f97522790682 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -270,6 +270,8 @@ enum { * - "release_agent" and "notify_on_release" are removed. * Replacement notification mechanism will be implemented. * + * - rename(2) is disallowed. + * * - memcg: use_hierarchy is on by default and the cgroup file for * the flag is not created. */ -- cgit v1.2.3 From e8c82d20a9f729cf4b9f73043f7fd4e0872bebfd Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 18 Jun 2013 18:48:37 +0800 Subject: cgroup: convert cgroup_cft_commit() to use cgroup_for_each_descendant_pre() We used root->allcg_list to iterate cgroup hierarchy because at that time cgroup_for_each_descendant_pre() hasn't been invented. tj: In cgroup_cfts_commit(), s/@serial_nr/@update_upto/, move the assignment right above releasing cgroup_mutex and explain what's going on there. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f97522790682..b28365890646 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -206,9 +206,6 @@ struct cgroup { */ struct list_head cset_links; - struct list_head allcg_node; /* cgroupfs_root->allcg_list */ - struct list_head cft_q_node; /* used during cftype add/rm */ - /* * Linked list running through all cgroups that can * potentially be reaped by the release agent. Protected by @@ -313,9 +310,6 @@ struct cgroupfs_root { /* A list running through the active hierarchies */ struct list_head root_list; - /* All cgroups on this root, cgroup_mutex protected */ - struct list_head allcg_list; - /* Hierarchy-specific flags */ unsigned long flags; -- cgit v1.2.3 From 03c78cbebb323fc97295ff97dc5e009d56371d57 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 14 Jun 2013 11:17:19 +0800 Subject: cgroup: rename cont to cgrp Cont is short for container. control group was named process container at first, but then people found container already has a meaning in linux kernel. Clean up the leftover variable name @cont. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b28365890646..6c2ba52fc5d4 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -433,13 +433,13 @@ struct cftype { * entry. The key/value pairs (and their ordering) should not * change between reboots. */ - int (*read_map)(struct cgroup *cont, struct cftype *cft, + int (*read_map)(struct cgroup *cgrp, struct cftype *cft, struct cgroup_map_cb *cb); /* * read_seq_string() is used for outputting a simple sequence * using seqfile. */ - int (*read_seq_string)(struct cgroup *cont, struct cftype *cft, + int (*read_seq_string)(struct cgroup *cgrp, struct cftype *cft, struct seq_file *m); ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft, -- cgit v1.2.3 From 02c402d98588bdfd3bebd267db574e13afdef722 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 24 Jun 2013 15:21:47 -0700 Subject: cgroup: convert CFTYPE_* flags to enums Purely cosmetic. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 6c2ba52fc5d4..ab27001a2c4a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -385,9 +385,11 @@ struct cgroup_map_cb { */ /* cftype->flags */ -#define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ -#define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */ -#define CFTYPE_INSANE (1U << 2) /* don't create if sane_behavior */ +enum { + CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cg */ + CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cg */ + CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */ +}; #define MAX_CFTYPE_NAME 64 -- cgit v1.2.3 From a8a648c4acee2095262f7fa65b0d8a68a03c32e4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 24 Jun 2013 15:21:47 -0700 Subject: cgroup: remove cgroup->actual_subsys_mask cgroup curiously has two subsystem masks, ->subsys_mask and ->actual_subsys_mask. The latter only exists because the new target subsys_mask is passed into rebind_subsystems() via @root>subsys_mask. rebind_subsystems() needs to know what the current mask is to decide how to reach the target mask so ->actual_subsys_mask is used as the temp location to remember the current state. Adding a temporary field to a permanent data structure is rather silly and can be misleading. Update rebind_subsystems() to take @added_mask and @removed_mask instead and remove @root->actual_subsys_mask. This patch shouldn't introduce any behavior changes. v2: Comment and description updated as suggested by Li. Signed-off-by: Tejun Heo Acked-by: Li Zefan --- include/linux/cgroup.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ab27001a2c4a..4c1eceb8c439 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -286,18 +286,12 @@ enum { struct cgroupfs_root { struct super_block *sb; - /* - * The bitmask of subsystems intended to be attached to this - * hierarchy - */ + /* The bitmask of subsystems attached to this hierarchy */ unsigned long subsys_mask; /* Unique id for this hierarchy. */ int hierarchy_id; - /* The bitmask of subsystems currently attached to this hierarchy */ - unsigned long actual_subsys_mask; - /* A list running through the attached subsystems */ struct list_head subsys_list; -- cgit v1.2.3 From 1672d040709b789671c0502e7aac9d632c2f9175 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 25 Jun 2013 18:04:54 -0700 Subject: cgroup: fix cgroupfs_root early destruction path cgroupfs_root used to have ->actual_subsys_mask in addition to ->subsys_mask. a8a648c4ac ("cgroup: remove cgroup->actual_subsys_mask") removed it noting that the subsys_mask is essentially temporary and doesn't belong in cgroupfs_root; however, the patch made it impossible to tell whether a cgroupfs_root actually has the subsystems bound or just have the bits set leading to the following BUG when trying to mount with subsystems which are already mounted elsewhere. kernel BUG at kernel/cgroup.c:1038! invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC ... CPU: 1 PID: 7973 Comm: mount Tainted: G W 3.10.0-rc7-next-20130625-sasha-00011-g1c1dc0e #1105 task: ffff880fc0ae8000 ti: ffff880fc0b9a000 task.ti: ffff880fc0b9a000 RIP: 0010:[] [] rebind_subsystems+0x409/0x5f0 ... Call Trace: [] cgroup_kill_sb+0xff/0x210 [] deactivate_locked_super+0x4f/0x90 [] cgroup_mount+0x673/0x6e0 [] cpuset_mount+0xd9/0x110 [] mount_fs+0xb0/0x2d0 [] vfs_kern_mount+0xbd/0x180 [] do_new_mount+0x145/0x2c0 [] do_mount+0x356/0x3c0 [] SyS_mount+0xfd/0x140 [] tracesys+0xdd/0xe2 We still want rebind_subsystems() to take added/removed masks, so let's fix it by marking whether a cgroupfs_root has finished binding or not. Also, document what's going on around ->subsys_mask initialization so that similar mistakes aren't repeated. Signed-off-by: Tejun Heo Reported-by: Sasha Levin Acked-by: Li Zefan --- include/linux/cgroup.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4c1eceb8c439..8e4fd5e67384 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -276,6 +276,7 @@ enum { CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ + CGRP_ROOT_SUBSYS_BOUND = (1 << 3), /* subsystems finished binding */ }; /* -- cgit v1.2.3 From 14611e51a57df10240817d8ada510842faf0ec51 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 25 Jun 2013 11:48:32 -0700 Subject: cgroup: fix RCU accesses to task->cgroups task->cgroups is a RCU pointer pointing to struct css_set. A task switches to a different css_set on cgroup migration but a css_set doesn't change once created and its pointers to cgroup_subsys_states aren't RCU protected. task_subsys_state[_check]() is the macro to acquire css given a task and subsys_id pair. It RCU-dereferences task->cgroups->subsys[] not task->cgroups, so the RCU pointer task->cgroups ends up being dereferenced without read_barrier_depends() after it. It's broken. Fix it by introducing task_css_set[_check]() which does RCU-dereference on task->cgroups. task_subsys_state[_check]() is reimplemented to directly dereference ->subsys[] of the css_set returned from task_css_set[_check](). This removes some of sparse RCU warnings in cgroup. v2: Fixed unbalanced parenthsis and there's no need to use rcu_dereference_raw() when !CONFIG_PROVE_RCU. Both spotted by Li. Signed-off-by: Tejun Heo Reported-by: Fengguang Wu Acked-by: Li Zefan Cc: stable@vger.kernel.org --- include/linux/cgroup.h | 58 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 10 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8e4fd5e67384..ad3555bc21f4 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -635,22 +635,60 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state( return cgrp->subsys[subsys_id]; } -/* - * function to get the cgroup_subsys_state which allows for extra - * rcu_dereference_check() conditions, such as locks used during the - * cgroup_subsys::attach() methods. +/** + * task_css_set_check - obtain a task's css_set with extra access conditions + * @task: the task to obtain css_set for + * @__c: extra condition expression to be passed to rcu_dereference_check() + * + * A task's css_set is RCU protected, initialized and exited while holding + * task_lock(), and can only be modified while holding both cgroup_mutex + * and task_lock() while the task is alive. This macro verifies that the + * caller is inside proper critical section and returns @task's css_set. + * + * The caller can also specify additional allowed conditions via @__c, such + * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU extern struct mutex cgroup_mutex; -#define task_subsys_state_check(task, subsys_id, __c) \ - rcu_dereference_check((task)->cgroups->subsys[(subsys_id)], \ - lockdep_is_held(&(task)->alloc_lock) || \ - lockdep_is_held(&cgroup_mutex) || (__c)) +#define task_css_set_check(task, __c) \ + rcu_dereference_check((task)->cgroups, \ + lockdep_is_held(&(task)->alloc_lock) || \ + lockdep_is_held(&cgroup_mutex) || (__c)) #else -#define task_subsys_state_check(task, subsys_id, __c) \ - rcu_dereference((task)->cgroups->subsys[(subsys_id)]) +#define task_css_set_check(task, __c) \ + rcu_dereference((task)->cgroups) #endif +/** + * task_subsys_state_check - obtain css for (task, subsys) w/ extra access conds + * @task: the target task + * @subsys_id: the target subsystem ID + * @__c: extra condition expression to be passed to rcu_dereference_check() + * + * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The + * synchronization rules are the same as task_css_set_check(). + */ +#define task_subsys_state_check(task, subsys_id, __c) \ + task_css_set_check((task), (__c))->subsys[(subsys_id)] + +/** + * task_css_set - obtain a task's css_set + * @task: the task to obtain css_set for + * + * See task_css_set_check(). + */ +static inline struct css_set *task_css_set(struct task_struct *task) +{ + return task_css_set_check(task, false); +} + +/** + * task_subsys_state - obtain css for (task, subsys) + * @task: the target task + * @subsys_id: the target subsystem ID + * + * See task_subsys_state_check(). + */ static inline struct cgroup_subsys_state * task_subsys_state(struct task_struct *task, int subsys_id) { -- cgit v1.2.3 From 0ce6cba35777cf96a54ce0d5856dc962566b8717 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 27 Jun 2013 19:37:26 -0700 Subject: cgroup: CGRP_ROOT_SUBSYS_BOUND should be ignored when comparing mount options 1672d04070 ("cgroup: fix cgroupfs_root early destruction path") introduced CGRP_ROOT_SUBSYS_BOUND which is used to mark completion of subsys binding on a new root; however, this broke remounts. cgroup_remount() doesn't allow changing root options via remount and CGRP_ROOT_SUBSYS_BOUND, which is set on all fully initialized roots, makes the function reject all remounts. Fix it by putting the options part in the lower 16 bits of root->flags and masking the comparions. While at it, make cgroup_remount() emit an error message explaining why it's rejecting a remount request, so that it's less of a mystery. Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ad3555bc21f4..8db53974f7b5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -276,7 +276,11 @@ enum { CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ - CGRP_ROOT_SUBSYS_BOUND = (1 << 3), /* subsystems finished binding */ + + /* mount options live below bit 16 */ + CGRP_ROOT_OPTION_MASK = (1 << 16) - 1, + + CGRP_ROOT_SUBSYS_BOUND = (1 << 16), /* subsystems finished binding */ }; /* -- cgit v1.2.3 From 913ffdb54366f94eec65c656cae8c6e00e1ab1b0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 11 Jul 2013 16:34:48 -0700 Subject: cgroup: replace task_cgroup_path_from_hierarchy() with task_cgroup_path() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit task_cgroup_path_from_hierarchy() was added for the planned new users and none of the currently planned users wants to know about multiple hierarchies. This patch drops the multiple hierarchy part and makes it always return the path in the first non-dummy hierarchy. As unified hierarchy will always have id 1, this is guaranteed to return the path for the unified hierarchy if mounted; otherwise, it will return the path from the hierarchy which happens to occupy the lowest hierarchy id, which will usually be the first hierarchy mounted after boot. Signed-off-by: Tejun Heo Acked-by: Li Zefan Cc: Lennart Poettering Cc: Kay Sievers Cc: Jan Kaluža --- include/linux/cgroup.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index fd097ecfcd97..21cfaff7e002 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -540,8 +540,7 @@ int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); -int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id, - char *buf, size_t buflen); +int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); int cgroup_task_count(const struct cgroup *cgrp); -- cgit v1.2.3