From d0d04b78f403b0bcfe03315e16b50d196610720d Mon Sep 17 00:00:00 2001 From: Zhouping Liu Date: Thu, 16 May 2013 11:36:23 +0800 Subject: mm, slab: moved kmem_cache_alloc_node comment to correct place After several fixing about kmem_cache_alloc_node(), its comment was splitted. This patch moved it on top of kmem_cache_alloc_node() definition. Signed-off-by: Zhouping Liu Signed-off-by: Pekka Enberg --- mm/slab.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index a98f8db93670..273a5ac2ade3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3340,18 +3340,6 @@ done: return obj; } -/** - * kmem_cache_alloc_node - Allocate an object on the specified node - * @cachep: The cache to allocate from. - * @flags: See kmalloc(). - * @nodeid: node number of the target node. - * @caller: return address of caller, used for debug information - * - * Identical to kmem_cache_alloc but it will allocate memory on the given - * node, which can improve the performance for cpu bound structures. - * - * Fallback to other node is possible if __GFP_THISNODE is not set. - */ static __always_inline void * slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, unsigned long caller) @@ -3645,6 +3633,17 @@ EXPORT_SYMBOL(kmem_cache_alloc_trace); #endif #ifdef CONFIG_NUMA +/** + * kmem_cache_alloc_node - Allocate an object on the specified node + * @cachep: The cache to allocate from. + * @flags: See kmalloc(). + * @nodeid: node number of the target node. + * + * Identical to kmem_cache_alloc but it will allocate memory on the given + * node, which can improve the performance for cpu bound structures. + * + * Fallback to other node is possible if __GFP_THISNODE is not set. + */ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_); -- cgit v1.2.3 From 069e2b351de67e7a837b15b3d26c65c19b790cc3 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 14 Jun 2013 19:55:13 +0000 Subject: slob: Rework #ifdeffery in slab.h Make the SLOB specific stuff harmonize more with the way the other allocators do it. Create the typical kmalloc constants for that purpose. SLOB does not support it but the constants help us avoid #ifdefs. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slab.h | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 0c621752caa6..9690c14eb7fb 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -169,11 +169,7 @@ struct kmem_cache { struct list_head list; /* List of all slab caches on the system */ }; -#define KMALLOC_MAX_SIZE (1UL << 30) - -#include - -#else /* CONFIG_SLOB */ +#endif /* CONFIG_SLOB */ /* * Kmalloc array related definitions @@ -195,7 +191,9 @@ struct kmem_cache { #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 5 #endif -#else +#endif + +#ifdef CONFIG_SLUB /* * SLUB allocates up to order 2 pages directly and otherwise * passes the request to the page allocator. @@ -207,6 +205,19 @@ struct kmem_cache { #endif #endif +#ifdef CONFIG_SLOB +/* + * SLOB passes all page size and larger requests to the page allocator. + * No kmalloc array is necessary since objects of different sizes can + * be allocated from the same page. + */ +#define KMALLOC_SHIFT_MAX 30 +#define KMALLOC_SHIFT_HIGH PAGE_SHIFT +#ifndef KMALLOC_SHIFT_LOW +#define KMALLOC_SHIFT_LOW 3 +#endif +#endif + /* Maximum allocatable size */ #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) /* Maximum size for which we actually use a slab cache */ @@ -221,6 +232,7 @@ struct kmem_cache { #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) #endif +#ifndef CONFIG_SLOB extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; #ifdef CONFIG_ZONE_DMA extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; @@ -275,13 +287,18 @@ static __always_inline int kmalloc_index(size_t size) /* Will never be reached. Needed because the compiler may complain */ return -1; } +#endif /* !CONFIG_SLOB */ #ifdef CONFIG_SLAB #include -#elif defined(CONFIG_SLUB) +#endif + +#ifdef CONFIG_SLUB #include -#else -#error "Unknown slab allocator" +#endif + +#ifdef CONFIG_SLOB +#include #endif /* @@ -291,6 +308,7 @@ static __always_inline int kmalloc_index(size_t size) */ static __always_inline int kmalloc_size(int n) { +#ifndef CONFIG_SLOB if (n > 2) return 1 << n; @@ -299,10 +317,9 @@ static __always_inline int kmalloc_size(int n) if (n == 2 && KMALLOC_MIN_SIZE <= 64) return 192; - +#endif return 0; } -#endif /* !CONFIG_SLOB */ /* * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. -- cgit v1.2.3 From 0fa8103be4c20f893486c533e4c6dfbc5ccddeb4 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 4 Jul 2013 08:33:22 +0800 Subject: mm/slab: Fix drain freelist excessively The drain_freelist is called to drain slabs_free lists for cache reap, cache shrink, memory hotplug callback etc. The tofree parameter should be the number of slab to free instead of the number of slab objects to free. This patch fix the callers that pass # of objects. Make sure they pass # of slabs. Acked-by: Christoph Lameter Signed-off-by: Wanpeng Li Signed-off-by: Pekka Enberg --- mm/slab.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 273a5ac2ade3..c9b4da9a1fe5 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1180,6 +1180,12 @@ static int init_cache_node_node(int node) return 0; } +static inline int slabs_tofree(struct kmem_cache *cachep, + struct kmem_cache_node *n) +{ + return (n->free_objects + cachep->num - 1) / cachep->num; +} + static void __cpuinit cpuup_canceled(long cpu) { struct kmem_cache *cachep; @@ -1241,7 +1247,7 @@ free_array_cache: n = cachep->node[node]; if (!n) continue; - drain_freelist(cachep, n, n->free_objects); + drain_freelist(cachep, n, slabs_tofree(cachep, n)); } } @@ -1408,7 +1414,7 @@ static int __meminit drain_cache_node_node(int node) if (!n) continue; - drain_freelist(cachep, n, n->free_objects); + drain_freelist(cachep, n, slabs_tofree(cachep, n)); if (!list_empty(&n->slabs_full) || !list_empty(&n->slabs_partial)) { @@ -2534,7 +2540,7 @@ static int __cache_shrink(struct kmem_cache *cachep) if (!n) continue; - drain_freelist(cachep, n, n->free_objects); + drain_freelist(cachep, n, slabs_tofree(cachep, n)); ret += !list_empty(&n->slabs_full) || !list_empty(&n->slabs_partial); -- cgit v1.2.3 From e25839f67948ca54fa55a45686d72c266f65f099 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 4 Jul 2013 08:33:23 +0800 Subject: mm/slab: Sharing s_next and s_stop between slab and slub This patch shares s_next and s_stop between slab and slub. Acked-by: Christoph Lameter Signed-off-by: Wanpeng Li Signed-off-by: Pekka Enberg --- mm/slab.c | 10 ---------- mm/slab.h | 3 +++ mm/slab_common.c | 4 ++-- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index c9b4da9a1fe5..4a907a072669 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4438,16 +4438,6 @@ static int leaks_show(struct seq_file *m, void *p) return 0; } -static void *s_next(struct seq_file *m, void *p, loff_t *pos) -{ - return seq_list_next(p, &slab_caches, pos); -} - -static void s_stop(struct seq_file *m, void *p) -{ - mutex_unlock(&slab_mutex); -} - static const struct seq_operations slabstats_op = { .start = leaks_start, .next = s_next, diff --git a/mm/slab.h b/mm/slab.h index f96b49e4704e..95c88604aab7 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -271,3 +271,6 @@ struct kmem_cache_node { #endif }; + +void *s_next(struct seq_file *m, void *p, loff_t *pos); +void s_stop(struct seq_file *m, void *p); diff --git a/mm/slab_common.c b/mm/slab_common.c index d2517b05d5bc..68518eb67229 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -529,12 +529,12 @@ static void *s_start(struct seq_file *m, loff_t *pos) return seq_list_start(&slab_caches, *pos); } -static void *s_next(struct seq_file *m, void *p, loff_t *pos) +void *s_next(struct seq_file *m, void *p, loff_t *pos) { return seq_list_next(p, &slab_caches, pos); } -static void s_stop(struct seq_file *m, void *p) +void s_stop(struct seq_file *m, void *p) { mutex_unlock(&slab_mutex); } -- cgit v1.2.3 From e9b4db2b8dba6b6c666e54b20ce46f3e597a6d96 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 4 Jul 2013 08:33:24 +0800 Subject: mm/slab: Fix /proc/slabinfo unwriteable for slab Slab have some tunables like limit, batchcount, and sharedfactor can be tuned through function slabinfo_write. Commit (b7454ad3: mm/sl[au]b: Move slabinfo processing to slab_common.c) uncorrectly change /proc/slabinfo unwriteable for slab, this patch fix it by revert to original mode. Acked-by: Christoph Lameter Signed-off-by: Wanpeng Li Signed-off-by: Pekka Enberg --- mm/slab_common.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 68518eb67229..13ae037c71d4 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -495,6 +495,13 @@ void __init create_kmalloc_caches(unsigned long flags) #ifdef CONFIG_SLABINFO + +#ifdef CONFIG_SLAB +#define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) +#else +#define SLABINFO_RIGHTS S_IRUSR +#endif + void print_slabinfo_header(struct seq_file *m) { /* @@ -631,7 +638,8 @@ static const struct file_operations proc_slabinfo_operations = { static int __init slab_proc_init(void) { - proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations); + proc_create("slabinfo", SLABINFO_RIGHTS, NULL, + &proc_slabinfo_operations); return 0; } module_init(slab_proc_init); -- cgit v1.2.3 From a446336454cf9ce3234a6013d1c3b482358d9459 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 4 Jul 2013 08:33:25 +0800 Subject: mm/slub: Drop unnecessary nr_partials This patch remove unused nr_partials variable. Acked-by: Christoph Lameter Signed-off-by: Wanpeng Li Signed-off-by: Pekka Enberg --- mm/slub.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 4df2c0c337fb..f788be3a0b12 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5266,7 +5266,6 @@ __initcall(slab_sysfs_init); #ifdef CONFIG_SLABINFO void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo) { - unsigned long nr_partials = 0; unsigned long nr_slabs = 0; unsigned long nr_objs = 0; unsigned long nr_free = 0; @@ -5278,7 +5277,6 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo) if (!n) continue; - nr_partials += n->nr_partial; nr_slabs += atomic_long_read(&n->nr_slabs); nr_objs += atomic_long_read(&n->total_objects); nr_free += count_partial(n, count_free); -- cgit v1.2.3 From c17fd13ec0677e61f3692ecb9d4b21f79848fa04 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 4 Jul 2013 08:33:26 +0800 Subject: mm/slub: Use node_nr_slabs and node_nr_objs in get_slabinfo Use existing interface node_nr_slabs and node_nr_objs to get nr_slabs and nr_objs. Acked-by: Christoph Lameter Signed-off-by: Wanpeng Li Signed-off-by: Pekka Enberg --- mm/slub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index f788be3a0b12..5ee6c7cd9fc4 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5277,8 +5277,8 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo) if (!n) continue; - nr_slabs += atomic_long_read(&n->nr_slabs); - nr_objs += atomic_long_read(&n->total_objects); + nr_slabs += node_nr_slabs(n); + nr_objs += node_nr_objs(n); nr_free += count_partial(n, count_free); } -- cgit v1.2.3 From 318df36e57c0ca9f2146660d41ff28e8650af423 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 19 Jun 2013 15:33:55 +0900 Subject: slub: do not put a slab to cpu partial list when cpu_partial is 0 In free path, we don't check number of cpu_partial, so one slab can be linked in cpu partial list even if cpu_partial is 0. To prevent this, we should check number of cpu_partial in put_cpu_partial(). Acked-by: Christoph Lameeter Reviewed-by: Wanpeng Li Signed-off-by: Joonsoo Kim Signed-off-by: Pekka Enberg --- mm/slub.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 5ee6c7cd9fc4..54cc4d544f3c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1954,6 +1954,9 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) int pages; int pobjects; + if (!s->cpu_partial) + return; + do { pages = 0; pobjects = 0; -- cgit v1.2.3 From a6d78159f8a717263bea71bef738256dafe6260d Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 20 Dec 2012 14:11:39 -0500 Subject: slob: use DIV_ROUND_UP where possible Acked-by: Christoph Lameter Signed-off-by: Sasha Levin Signed-off-by: Pekka Enberg --- mm/slob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slob.c b/mm/slob.c index a99fdf7a0907..f729c46639fa 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -122,7 +122,7 @@ static inline void clear_slob_page_free(struct page *sp) } #define SLOB_UNIT sizeof(slob_t) -#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT) +#define SLOB_UNITS(size) DIV_ROUND_UP(size, SLOB_UNIT) /* * struct slob_rcu is inserted at the tail of allocated slob blocks, which -- cgit v1.2.3 From 0f8f8094d28eb53368ac09186ea6b3a324cc7d44 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 2 Jul 2013 12:12:10 -0700 Subject: slab: fix init_lock_keys Some architectures (e.g. powerpc built with CONFIG_PPC_256K_PAGES=y CONFIG_FORCE_MAX_ZONEORDER=11) get PAGE_SHIFT + MAX_ORDER > 26. In 3.10 kernels, CONFIG_LOCKDEP=y with PAGE_SHIFT + MAX_ORDER > 26 makes init_lock_keys() dereference beyond kmalloc_caches[26]. This leads to an unbootable system (kernel panic at initializing SLAB) if one of kmalloc_caches[26...PAGE_SHIFT+MAX_ORDER-1] is not NULL. Fix this by making sure that init_lock_keys() does not dereference beyond kmalloc_caches[26] arrays. Signed-off-by: Christoph Lameter Reported-by: Tetsuo Handa Cc: Pekka Enberg Cc: [3.10.x] Signed-off-by: Andrew Morton Signed-off-by: Pekka Enberg --- mm/slab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index 4a907a072669..9bf225162fcb 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -565,7 +565,7 @@ static void init_node_lock_keys(int q) if (slab_state < UP) return; - for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) { + for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) { struct kmem_cache_node *n; struct kmem_cache *cache = kmalloc_caches[i]; -- cgit v1.2.3 From e7efa615ccf78394338144ff0187be331240748a Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Tue, 25 Jun 2013 18:16:55 +0200 Subject: slab: add kmalloc() to kernel API documentation At the moment, kmalloc() isn't even listed in the kernel API documentation (DocBook/kernel-api.html after running "make htmldocs"). Another issue is that the documentation for kmalloc_node() refers to kcalloc()'s documentation to describe its 'flags' parameter, while kcalloc() refered to kmalloc()'s documentation, which doesn't exist! This patch is a proposed fix for this. It also removes the documentation for kmalloc() in include/linux/slob_def.h which isn't included to generate the documentation anyway. This way, kmalloc() is described in only one place. Acked-by: Christoph Lameter Acked-by: Randy Dunlap Signed-off-by: Michael Opdenacker Signed-off-by: Pekka Enberg --- include/linux/slab.h | 18 ++++++++++++++---- include/linux/slob_def.h | 8 -------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 9690c14eb7fb..6c5cc0ea8713 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -373,9 +373,8 @@ int cache_show(struct kmem_cache *s, struct seq_file *m); void print_slabinfo_header(struct seq_file *m); /** - * kmalloc_array - allocate memory for an array. - * @n: number of elements. - * @size: element size. + * kmalloc - allocate memory + * @size: how many bytes of memory are required. * @flags: the type of memory to allocate. * * The @flags argument may be one of: @@ -422,6 +421,17 @@ void print_slabinfo_header(struct seq_file *m); * There are other flags available as well, but these are not intended * for general use, and so are not documented here. For a full list of * potential flags, always refer to linux/gfp.h. + * + * kmalloc is the normal method of allocating memory + * in the kernel. + */ +static __always_inline void *kmalloc(size_t size, gfp_t flags); + +/** + * kmalloc_array - allocate memory for an array. + * @n: number of elements. + * @size: element size. + * @flags: the type of memory to allocate (see kmalloc). */ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) { @@ -445,7 +455,7 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) /** * kmalloc_node - allocate memory from a specific node * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate (see kcalloc). + * @flags: the type of memory to allocate (see kmalloc). * @node: node to allocate from. * * kmalloc() for non-local nodes, used to allocate from a specific node diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h index f28e14a12e3f..095a5a4a8516 100644 --- a/include/linux/slob_def.h +++ b/include/linux/slob_def.h @@ -18,14 +18,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) return __kmalloc_node(size, flags, node); } -/** - * kmalloc - allocate memory - * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate (see kcalloc). - * - * kmalloc is the normal method of allocating memory - * in the kernel. - */ static __always_inline void *kmalloc(size_t size, gfp_t flags) { return __kmalloc_node(size, flags, NUMA_NO_NODE); -- cgit v1.2.3 From 345c905d13a4ec9f774b6b4bc038fe4aef26cced Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 19 Jun 2013 14:05:52 +0900 Subject: slub: Make cpu partial slab support configurable CPU partial support can introduce level of indeterminism that is not wanted in certain context (like a realtime kernel). Make it configurable. This patch is based on Christoph Lameter's "slub: Make cpu partial slab support configurable V2". Acked-by: Christoph Lameter Signed-off-by: Joonsoo Kim Signed-off-by: Pekka Enberg --- init/Kconfig | 11 +++++++++++ mm/slub.c | 27 +++++++++++++++++++++------ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 7d30240e5bfe..3b34a88cf34e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1511,6 +1511,17 @@ config SLOB endchoice +config SLUB_CPU_PARTIAL + default y + depends on SLUB + bool "SLUB per cpu partial cache" + help + Per cpu partial caches accellerate objects allocation and freeing + that is local to a processor at the price of more indeterminism + in the latency of the free. On overflow these caches will be cleared + which requires the taking of locks that may cause latency spikes. + Typically one would choose no for a realtime system. + config MMAP_ALLOW_UNINITIALIZED bool "Allow mmapped anonymous memory to be uninitialized" depends on EXPERT && !MMU diff --git a/mm/slub.c b/mm/slub.c index 54cc4d544f3c..ef60536c5d69 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -122,6 +122,15 @@ static inline int kmem_cache_debug(struct kmem_cache *s) #endif } +static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) +{ +#ifdef CONFIG_SLUB_CPU_PARTIAL + return !kmem_cache_debug(s); +#else + return false; +#endif +} + /* * Issues still to be resolved: * @@ -1572,7 +1581,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, put_cpu_partial(s, page, 0); stat(s, CPU_PARTIAL_NODE); } - if (kmem_cache_debug(s) || available > s->cpu_partial / 2) + if (!kmem_cache_has_cpu_partial(s) + || available > s->cpu_partial / 2) break; } @@ -1883,6 +1893,7 @@ redo: static void unfreeze_partials(struct kmem_cache *s, struct kmem_cache_cpu *c) { +#ifdef CONFIG_SLUB_CPU_PARTIAL struct kmem_cache_node *n = NULL, *n2 = NULL; struct page *page, *discard_page = NULL; @@ -1937,6 +1948,7 @@ static void unfreeze_partials(struct kmem_cache *s, discard_slab(s, page); stat(s, FREE_SLAB); } +#endif } /* @@ -1950,6 +1962,7 @@ static void unfreeze_partials(struct kmem_cache *s, */ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) { +#ifdef CONFIG_SLUB_CPU_PARTIAL struct page *oldpage; int pages; int pobjects; @@ -1989,6 +2002,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) page->next = oldpage; } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); +#endif } static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) @@ -2497,7 +2511,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, new.inuse--; if ((!new.inuse || !prior) && !was_frozen) { - if (!kmem_cache_debug(s) && !prior) + if (kmem_cache_has_cpu_partial(s) && !prior) /* * Slab was on no list before and will be partially empty @@ -2552,8 +2566,9 @@ static void __slab_free(struct kmem_cache *s, struct page *page, * Objects left in the slab. If it was not on the partial list before * then add it. */ - if (kmem_cache_debug(s) && unlikely(!prior)) { - remove_full(s, page); + if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) { + if (kmem_cache_debug(s)) + remove_full(s, page); add_partial(n, page, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } @@ -3061,7 +3076,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) * per node list when we run out of per cpu objects. We only fetch 50% * to keep some capacity around for frees. */ - if (kmem_cache_debug(s)) + if (!kmem_cache_has_cpu_partial(s)) s->cpu_partial = 0; else if (s->size >= PAGE_SIZE) s->cpu_partial = 2; @@ -4456,7 +4471,7 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf, err = strict_strtoul(buf, 10, &objects); if (err) return err; - if (objects && kmem_cache_debug(s)) + if (objects && !kmem_cache_has_cpu_partial(s)) return -EINVAL; s->cpu_partial = objects; -- cgit v1.2.3 From c1e854e924f354657ea2ad08fd7b38aac81c59b1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 17 Jan 2013 12:13:46 -0500 Subject: slob: Check for NULL pointer before calling ctor() While doing some code inspection, I noticed that the slob constructor method can be called with a NULL pointer. If memory is tight and slob fails to allocate with slob_alloc() or slob_new_pages() it still calls the ctor() method with a NULL pointer. Looking at the first ctor() method I found, I noticed that it can not handle a NULL pointer (I'm sure others probably can't either): static void sighand_ctor(void *data) { struct sighand_struct *sighand = data; spin_lock_init(&sighand->siglock); init_waitqueue_head(&sighand->signalfd_wqh); } The solution is to only call the ctor() method if allocation succeeded. Acked-by: Christoph Lameter Signed-off-by: Steven Rostedt Signed-off-by: Pekka Enberg --- mm/slob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slob.c b/mm/slob.c index f729c46639fa..3d73b3b8fb1d 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -554,7 +554,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) flags, node); } - if (c->ctor) + if (b && c->ctor) c->ctor(b); kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags); -- cgit v1.2.3 From 276a2439ce7917b8c3043af7ad6bf17bbcc24030 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 8 Jul 2013 08:08:28 +0800 Subject: mm/slab: Give s_next and s_stop slab-specific names Give s_next and s_stop slab-specific names instead of exporting "s_next" and "s_stop". Signed-off-by: Wanpeng Li Signed-off-by: Pekka Enberg --- mm/slab.c | 4 ++-- mm/slab.h | 4 ++-- mm/slab_common.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 9bf225162fcb..57ab42297d96 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4440,8 +4440,8 @@ static int leaks_show(struct seq_file *m, void *p) static const struct seq_operations slabstats_op = { .start = leaks_start, - .next = s_next, - .stop = s_stop, + .next = slab_next, + .stop = slab_stop, .show = leaks_show, }; diff --git a/mm/slab.h b/mm/slab.h index 95c88604aab7..620ceeddbe1a 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -272,5 +272,5 @@ struct kmem_cache_node { }; -void *s_next(struct seq_file *m, void *p, loff_t *pos); -void s_stop(struct seq_file *m, void *p); +void *slab_next(struct seq_file *m, void *p, loff_t *pos); +void slab_stop(struct seq_file *m, void *p); diff --git a/mm/slab_common.c b/mm/slab_common.c index 13ae037c71d4..eacdffaf71c9 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -536,12 +536,12 @@ static void *s_start(struct seq_file *m, loff_t *pos) return seq_list_start(&slab_caches, *pos); } -void *s_next(struct seq_file *m, void *p, loff_t *pos) +void *slab_next(struct seq_file *m, void *p, loff_t *pos) { return seq_list_next(p, &slab_caches, pos); } -void s_stop(struct seq_file *m, void *p) +void slab_stop(struct seq_file *m, void *p) { mutex_unlock(&slab_mutex); } @@ -618,8 +618,8 @@ static int s_show(struct seq_file *m, void *p) */ static const struct seq_operations slabinfo_op = { .start = s_start, - .next = s_next, - .stop = s_stop, + .next = slab_next, + .stop = slab_stop, .show = s_show, }; -- cgit v1.2.3 From c25f195e828f847735c7626b5693ddc3b853d245 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 17 Jan 2013 13:10:58 -0500 Subject: slub: Check for page NULL before doing the node_match check In the -rt kernel (mrg), we hit the following dump: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] kmem_cache_alloc_node+0x51/0x180 PGD a2d39067 PUD b1641067 PMD 0 Oops: 0000 [#1] PREEMPT SMP Modules linked in: sunrpc cpufreq_ondemand ipv6 tg3 joydev sg serio_raw pcspkr k8temp amd64_edac_mod edac_core i2c_piix4 e100 mii shpchp ext4 mbcache jbd2 sd_mod crc_t10dif sr_mod cdrom sata_svw ata_generic pata_acpi pata_serverworks radeon ttm drm_kms_helper drm hwmon i2c_algo_bit i2c_core dm_mirror dm_region_hash dm_log dm_mod CPU 3 Pid: 20878, comm: hackbench Not tainted 3.6.11-rt25.14.el6rt.x86_64 #1 empty empty/Tyan Transport GT24-B3992 RIP: 0010:[] [] kmem_cache_alloc_node+0x51/0x180 RSP: 0018:ffff8800a9b17d70 EFLAGS: 00010213 RAX: 0000000000000000 RBX: 0000000001200011 RCX: ffff8800a06d8000 RDX: 0000000004d92a03 RSI: 00000000000000d0 RDI: ffff88013b805500 RBP: ffff8800a9b17dc0 R08: ffff88023fd14d10 R09: ffffffff81041cbd R10: 00007f4e3f06e9d0 R11: 0000000000000246 R12: ffff88013b805500 R13: ffff8801ff46af40 R14: 0000000000000001 R15: 0000000000000000 FS: 00007f4e3f06e700(0000) GS:ffff88023fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 00000000a2d3a000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process hackbench (pid: 20878, threadinfo ffff8800a9b16000, task ffff8800a06d8000) Stack: ffff8800a9b17da0 ffffffff81202e08 ffff8800a9b17de0 000000d001200011 0000000001200011 0000000001200011 0000000000000000 0000000000000000 00007f4e3f06e9d0 0000000000000000 ffff8800a9b17e60 ffffffff81041cbd Call Trace: [] ? current_has_perm+0x68/0x80 [] copy_process+0xdd/0x15b0 [] ? rt_up_read+0x25/0x30 [] do_fork+0x5a/0x360 [] ? migrate_enable+0xeb/0x220 [] sys_clone+0x28/0x30 [] stub_clone+0x13/0x20 [] ? system_call_fastpath+0x16/0x1b Code: 89 fc 89 75 cc 41 89 d6 4d 8b 04 24 65 4c 03 04 25 48 ae 00 00 49 8b 50 08 4d 8b 28 49 8b 40 10 4d 85 ed 74 12 41 83 fe ff 74 27 <48> 8b 00 48 c1 e8 3a 41 39 c6 74 1b 8b 75 cc 4c 89 c9 44 89 f2 RIP [] kmem_cache_alloc_node+0x51/0x180 RSP CR2: 0000000000000000 ---[ end trace 0000000000000002 ]--- Now, this uses SLUB pretty much unmodified, but as it is the -rt kernel with CONFIG_PREEMPT_RT set, spinlocks are mutexes, although they do disable migration. But the SLUB code is relatively lockless, and the spin_locks there are raw_spin_locks (not converted to mutexes), thus I believe this bug can happen in mainline without -rt features. The -rt patch is just good at triggering mainline bugs ;-) Anyway, looking at where this crashed, it seems that the page variable can be NULL when passed to the node_match() function (which does not check if it is NULL). When this happens we get the above panic. As page is only used in slab_alloc() to check if the node matches, if it's NULL I'm assuming that we can say it doesn't and call the __slab_alloc() code. Is this a correct assumption? Acked-by: Christoph Lameter Signed-off-by: Steven Rostedt Signed-off-by: Pekka Enberg Signed-off-by: Linus Torvalds --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index ef60536c5d69..33f71330e713 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2374,7 +2374,7 @@ redo: object = c->freelist; page = c->page; - if (unlikely(!object || !node_match(page, node))) + if (unlikely(!object || !page || !node_match(page, node))) object = __slab_alloc(s, gfpflags, node, addr, c); else { -- cgit v1.2.3