From 215e262f2aeba378aa192da07c30770f9925a4bf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 31 May 2013 15:26:45 -0700 Subject: percpu: implement generic percpu refcounting This implements a refcount with similar semantics to atomic_get()/atomic_dec_and_test() - but percpu. It also implements two stage shutdown, as we need it to tear down the percpu counts. Before dropping the initial refcount, you must call percpu_ref_kill(); this puts the refcount in "shutting down mode" and switches back to a single atomic refcount with the appropriate barriers (synchronize_rcu()). It's also legal to call percpu_ref_kill() multiple times - it only returns true once, so callers don't have to reimplement shutdown synchronization. [akpm@linux-foundation.org: fix build] [akpm@linux-foundation.org: coding-style tweak] Signed-off-by: Kent Overstreet Cc: Zach Brown Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Mark Fasheh Cc: Joel Becker Cc: Rusty Russell Cc: Jens Axboe Cc: Asai Thambi S P Cc: Selvan Mani Cc: Sam Bradshaw Cc: Jeff Moyer Cc: Al Viro Cc: Benjamin LaHaise Cc: Tejun Heo Cc: Oleg Nesterov Cc: Christoph Lameter Cc: Ingo Molnar Reviewed-by: "Theodore Ts'o" Signed-off-by: Tejun Heo --- include/linux/percpu-refcount.h | 122 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 include/linux/percpu-refcount.h (limited to 'include/linux/percpu-refcount.h') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h new file mode 100644 index 000000000000..24b31ef15932 --- /dev/null +++ b/include/linux/percpu-refcount.h @@ -0,0 +1,122 @@ +/* + * Percpu refcounts: + * (C) 2012 Google, Inc. + * Author: Kent Overstreet + * + * This implements a refcount with similar semantics to atomic_t - atomic_inc(), + * atomic_dec_and_test() - but percpu. + * + * There's one important difference between percpu refs and normal atomic_t + * refcounts; you have to keep track of your initial refcount, and then when you + * start shutting down you call percpu_ref_kill() _before_ dropping the initial + * refcount. + * + * The refcount will have a range of 0 to ((1U << 31) - 1), i.e. one bit less + * than an atomic_t - this is because of the way shutdown works, see + * percpu_ref_kill()/PCPU_COUNT_BIAS. + * + * Before you call percpu_ref_kill(), percpu_ref_put() does not check for the + * refcount hitting 0 - it can't, if it was in percpu mode. percpu_ref_kill() + * puts the ref back in single atomic_t mode, collecting the per cpu refs and + * issuing the appropriate barriers, and then marks the ref as shutting down so + * that percpu_ref_put() will check for the ref hitting 0. After it returns, + * it's safe to drop the initial ref. + * + * USAGE: + * + * See fs/aio.c for some example usage; it's used there for struct kioctx, which + * is created when userspaces calls io_setup(), and destroyed when userspace + * calls io_destroy() or the process exits. + * + * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it + * calls percpu_ref_kill(), then hlist_del_rcu() and sychronize_rcu() to remove + * the kioctx from the proccess's list of kioctxs - after that, there can't be + * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop + * the initial ref with percpu_ref_put(). + * + * Code that does a two stage shutdown like this often needs some kind of + * explicit synchronization to ensure the initial refcount can only be dropped + * once - percpu_ref_kill() does this for you, it returns true once and false if + * someone else already called it. The aio code uses it this way, but it's not + * necessary if the code has some other mechanism to synchronize teardown. + * around. + */ + +#ifndef _LINUX_PERCPU_REFCOUNT_H +#define _LINUX_PERCPU_REFCOUNT_H + +#include +#include +#include +#include + +struct percpu_ref; +typedef void (percpu_ref_release)(struct percpu_ref *); + +struct percpu_ref { + atomic_t count; + /* + * The low bit of the pointer indicates whether the ref is in percpu + * mode; if set, then get/put will manipulate the atomic_t (this is a + * hack because we need to keep the pointer around for + * percpu_ref_kill_rcu()) + */ + unsigned __percpu *pcpu_count; + percpu_ref_release *release; + struct rcu_head rcu; +}; + +int percpu_ref_init(struct percpu_ref *, percpu_ref_release *); +void percpu_ref_kill(struct percpu_ref *ref); + +#define PCPU_STATUS_BITS 2 +#define PCPU_STATUS_MASK ((1 << PCPU_STATUS_BITS) - 1) +#define PCPU_REF_PTR 0 +#define PCPU_REF_DEAD 1 + +#define REF_STATUS(count) (((unsigned long) count) & PCPU_STATUS_MASK) + +/** + * percpu_ref_get - increment a percpu refcount + * + * Analagous to atomic_inc(). + */ +static inline void percpu_ref_get(struct percpu_ref *ref) +{ + unsigned __percpu *pcpu_count; + + preempt_disable(); + + pcpu_count = ACCESS_ONCE(ref->pcpu_count); + + if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) + __this_cpu_inc(*pcpu_count); + else + atomic_inc(&ref->count); + + preempt_enable(); +} + +/** + * percpu_ref_put - decrement a percpu refcount + * + * Decrement the refcount, and if 0, call the release function (which was passed + * to percpu_ref_init()) + */ +static inline void percpu_ref_put(struct percpu_ref *ref) +{ + unsigned __percpu *pcpu_count; + + preempt_disable(); + + pcpu_count = ACCESS_ONCE(ref->pcpu_count); + + if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) + __this_cpu_dec(*pcpu_count); + else if (unlikely(atomic_dec_and_test(&ref->count))) + ref->release(ref); + + preempt_enable(); +} + +#endif -- cgit v1.2.3 From 6a24474da83ea7c8b7d32f05f858b1259994067a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 12 Jun 2013 20:43:06 -0700 Subject: percpu-refcount: consistently use plain (non-sched) RCU percpu_ref_get/put() are using preempt_disable/enable() while percpu_ref_kill() is using plain call_rcu() instead of call_rcu_sched(). This is buggy as grace periods of the two may not match. Fix it by using plain RCU in percpu_ref_get/put(). (I suggested using sched RCU in the first place but there's no actual benefit in doing so unless we're gonna introduce different variants of get/put to be called while preemption is alredy disabled, which we definitely shouldn't.) Signed-off-by: Tejun Heo Reported-by: Rusty Russell Acked-by: Kent Overstreet --- include/linux/percpu-refcount.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/percpu-refcount.h') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 24b31ef15932..abe141172d96 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -85,7 +85,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) { unsigned __percpu *pcpu_count; - preempt_disable(); + rcu_read_lock(); pcpu_count = ACCESS_ONCE(ref->pcpu_count); @@ -94,7 +94,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) else atomic_inc(&ref->count); - preempt_enable(); + rcu_read_unlock(); } /** @@ -107,7 +107,7 @@ static inline void percpu_ref_put(struct percpu_ref *ref) { unsigned __percpu *pcpu_count; - preempt_disable(); + rcu_read_lock(); pcpu_count = ACCESS_ONCE(ref->pcpu_count); @@ -116,7 +116,7 @@ static inline void percpu_ref_put(struct percpu_ref *ref) else if (unlikely(atomic_dec_and_test(&ref->count))) ref->release(ref); - preempt_enable(); + rcu_read_unlock(); } #endif -- cgit v1.2.3 From ac899061a93250c28562f05ad94d5c74603415bc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 12 Jun 2013 20:43:06 -0700 Subject: percpu-refcount: cosmetic updates * s/percpu_ref_release/percpu_ref_func_t/ as it's customary to have _t postfix for types and the type is gonna be used for a different type of callback too. * Add @ARG to function comments. * Drop unnecessary and unaligned indentation from percpu_ref_init() function comment. Signed-off-by: Tejun Heo Acked-by: Kent Overstreet --- include/linux/percpu-refcount.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux/percpu-refcount.h') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index abe141172d96..b61bd6f23985 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -51,7 +51,7 @@ #include struct percpu_ref; -typedef void (percpu_ref_release)(struct percpu_ref *); +typedef void (percpu_ref_func_t)(struct percpu_ref *); struct percpu_ref { atomic_t count; @@ -62,11 +62,11 @@ struct percpu_ref { * percpu_ref_kill_rcu()) */ unsigned __percpu *pcpu_count; - percpu_ref_release *release; + percpu_ref_func_t *release; struct rcu_head rcu; }; -int percpu_ref_init(struct percpu_ref *, percpu_ref_release *); +int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release); void percpu_ref_kill(struct percpu_ref *ref); #define PCPU_STATUS_BITS 2 @@ -78,6 +78,7 @@ void percpu_ref_kill(struct percpu_ref *ref); /** * percpu_ref_get - increment a percpu refcount + * @ref: percpu_ref to get * * Analagous to atomic_inc(). */ @@ -99,6 +100,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) /** * percpu_ref_put - decrement a percpu refcount + * @ref: percpu_ref to put * * Decrement the refcount, and if 0, call the release function (which was passed * to percpu_ref_init()) -- cgit v1.2.3 From acac7883ee7bcc32476963bce7baf73d44574dd1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 12 Jun 2013 20:52:01 -0700 Subject: percpu-refcount: add __must_check to percpu_ref_init() and don't use ACCESS_ONCE() in percpu_ref_kill_rcu() Two small changes. * Unlike most init functions, percpu_ref_init() allocates memory and may fail. Let's mark it with __must_check in case the caller forgets. * percpu_ref_kill_rcu() is unnecessarily using ACCESS_ONCE() to dereference @ref->pcpu_count, which can be misleading. The pointer is guaranteed to be valid and visible and can't change underneath the function. Drop ACCESS_ONCE(). Signed-off-by: Tejun Heo --- include/linux/percpu-refcount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/percpu-refcount.h') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index b61bd6f23985..8146aa9cd89e 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -66,7 +66,8 @@ struct percpu_ref { struct rcu_head rcu; }; -int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release); +int __must_check percpu_ref_init(struct percpu_ref *ref, + percpu_ref_func_t *release); void percpu_ref_kill(struct percpu_ref *ref); #define PCPU_STATUS_BITS 2 -- cgit v1.2.3 From bc497bd33b2d6a6f07bc8574b4764edbd7fdffa8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 12 Jun 2013 20:52:35 -0700 Subject: percpu-refcount: implement percpu_ref_cancel_init() Normally, percpu_ref_init() initializes and percpu_ref_kill() initiates destruction which completes asynchronously. The asynchronous destruction can be problematic in init failure path where the caller wants to destroy half-constructed object - distinguishing half-constructed objects from the usual release method can be painful for complex objects. This patch implements percpu_ref_cancel_init() which synchronously destroys the percpu_ref without invoking release. To avoid unintentional misuses, the function requires the ref to have finished percpu_ref_init() but never used and triggers WARN otherwise. v2: Explain the weird name and usage restriction in the function comment. Signed-off-by: Tejun Heo Acked-by: Kent Overstreet --- include/linux/percpu-refcount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/percpu-refcount.h') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 8146aa9cd89e..6d843d60690d 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -68,6 +68,7 @@ struct percpu_ref { int __must_check percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release); +void percpu_ref_cancel_init(struct percpu_ref *ref); void percpu_ref_kill(struct percpu_ref *ref); #define PCPU_STATUS_BITS 2 -- cgit v1.2.3 From dbece3a0f1ef0b19aff1cc6ed0942fec9ab98de1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 13 Jun 2013 19:23:53 -0700 Subject: percpu-refcount: implement percpu_tryget() along with percpu_ref_kill_and_confirm() Implement percpu_tryget() which stops giving out references once the percpu_ref is visible as killed. Because the refcnt is per-cpu, different CPUs will start to see a refcnt as killed at different points in time and tryget() may continue to succeed on subset of cpus for a while after percpu_ref_kill() returns. For use cases where it's necessary to know when all CPUs start to see the refcnt as dead, percpu_ref_kill_and_confirm() is added. The new function takes an extra argument @confirm_kill which is invoked when the refcnt is guaranteed to be viewed as killed on all CPUs. While this isn't the prettiest interface, it doesn't force synchronous wait and is much safer than requiring the caller to do its own call_rcu(). v2: Patch description rephrased to emphasize that tryget() may continue to succeed on some CPUs after kill() returns as suggested by Kent. v3: Function comment in percpu_ref_kill_and_confirm() updated warning people to not depend on the implied RCU grace period from the confirm callback as it's an implementation detail. Signed-off-by: Tejun Heo Slightly-Grumpily-Acked-by: Kent Overstreet --- include/linux/percpu-refcount.h | 50 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) (limited to 'include/linux/percpu-refcount.h') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 6d843d60690d..dd2a08600453 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -63,13 +63,30 @@ struct percpu_ref { */ unsigned __percpu *pcpu_count; percpu_ref_func_t *release; + percpu_ref_func_t *confirm_kill; struct rcu_head rcu; }; int __must_check percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release); void percpu_ref_cancel_init(struct percpu_ref *ref); -void percpu_ref_kill(struct percpu_ref *ref); +void percpu_ref_kill_and_confirm(struct percpu_ref *ref, + percpu_ref_func_t *confirm_kill); + +/** + * percpu_ref_kill - drop the initial ref + * @ref: percpu_ref to kill + * + * Must be used to drop the initial ref on a percpu refcount; must be called + * precisely once before shutdown. + * + * Puts @ref in non percpu mode, then does a call_rcu() before gathering up the + * percpu counters and dropping the initial ref. + */ +static inline void percpu_ref_kill(struct percpu_ref *ref) +{ + return percpu_ref_kill_and_confirm(ref, NULL); +} #define PCPU_STATUS_BITS 2 #define PCPU_STATUS_MASK ((1 << PCPU_STATUS_BITS) - 1) @@ -100,6 +117,37 @@ static inline void percpu_ref_get(struct percpu_ref *ref) rcu_read_unlock(); } +/** + * percpu_ref_tryget - try to increment a percpu refcount + * @ref: percpu_ref to try-get + * + * Increment a percpu refcount unless it has already been killed. Returns + * %true on success; %false on failure. + * + * Completion of percpu_ref_kill() in itself doesn't guarantee that tryget + * will fail. For such guarantee, percpu_ref_kill_and_confirm() should be + * used. After the confirm_kill callback is invoked, it's guaranteed that + * no new reference will be given out by percpu_ref_tryget(). + */ +static inline bool percpu_ref_tryget(struct percpu_ref *ref) +{ + unsigned __percpu *pcpu_count; + int ret = false; + + rcu_read_lock(); + + pcpu_count = ACCESS_ONCE(ref->pcpu_count); + + if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) { + __this_cpu_inc(*pcpu_count); + ret = true; + } + + rcu_read_unlock(); + + return ret; +} + /** * percpu_ref_put - decrement a percpu refcount * @ref: percpu_ref to put -- cgit v1.2.3 From a4244454df1296e90cc961c1b636b1176ef0d9a0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 16 Jun 2013 16:12:26 -0700 Subject: percpu-refcount: use RCU-sched insted of normal RCU percpu-refcount was incorrectly using preempt_disable/enable() for RCU critical sections against call_rcu(). 6a24474da8 ("percpu-refcount: consistently use plain (non-sched) RCU") fixed it by converting the preepmtion operations with rcu_read_[un]lock() citing that there isn't any advantage in using sched-RCU over using the usual one; however, rcu_read_[un]lock() for the preemptible RCU implementation - CONFIG_TREE_PREEMPT_RCU, chosen when CONFIG_PREEMPT - are slightly more expensive than preempt_disable/enable(). In a contrived microbench which repeats the followings, - percpu_ref_get() - copy 32 bytes of data into percpu buffer - percpu_put_get() - copy 32 bytes of data into percpu buffer rcu_read_[un]lock() used in percpu_ref_get/put() makes it go slower by about 15% when compared to using sched-RCU. As the RCU critical sections are extremely short, using sched-RCU shouldn't have any latency implications. Convert to RCU-sched. Signed-off-by: Tejun Heo Acked-by: Kent Overstreet Acked-by: "Paul E. McKenney" Cc: Michal Hocko Cc: Rusty Russell --- include/linux/percpu-refcount.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux/percpu-refcount.h') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index dd2a08600453..95961f0bf62d 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -105,7 +105,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) { unsigned __percpu *pcpu_count; - rcu_read_lock(); + rcu_read_lock_sched(); pcpu_count = ACCESS_ONCE(ref->pcpu_count); @@ -114,7 +114,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) else atomic_inc(&ref->count); - rcu_read_unlock(); + rcu_read_unlock_sched(); } /** @@ -134,7 +134,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) unsigned __percpu *pcpu_count; int ret = false; - rcu_read_lock(); + rcu_read_lock_sched(); pcpu_count = ACCESS_ONCE(ref->pcpu_count); @@ -143,7 +143,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) ret = true; } - rcu_read_unlock(); + rcu_read_unlock_sched(); return ret; } @@ -159,7 +159,7 @@ static inline void percpu_ref_put(struct percpu_ref *ref) { unsigned __percpu *pcpu_count; - rcu_read_lock(); + rcu_read_lock_sched(); pcpu_count = ACCESS_ONCE(ref->pcpu_count); @@ -168,7 +168,7 @@ static inline void percpu_ref_put(struct percpu_ref *ref) else if (unlikely(atomic_dec_and_test(&ref->count))) ref->release(ref); - rcu_read_unlock(); + rcu_read_unlock_sched(); } #endif -- cgit v1.2.3