From d0d04b78f403b0bcfe03315e16b50d196610720d Mon Sep 17 00:00:00 2001
From: Zhouping Liu <zliu@redhat.com>
Date: Thu, 16 May 2013 11:36:23 +0800
Subject: mm, slab: moved kmem_cache_alloc_node comment to correct place

After several fixing about kmem_cache_alloc_node(), its comment
was splitted. This patch moved it on top of kmem_cache_alloc_node()
definition.

Signed-off-by: Zhouping Liu <zliu@redhat.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slab.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index a98f8db93670..273a5ac2ade3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3340,18 +3340,6 @@ done:
 	return obj;
 }
 
-/**
- * kmem_cache_alloc_node - Allocate an object on the specified node
- * @cachep: The cache to allocate from.
- * @flags: See kmalloc().
- * @nodeid: node number of the target node.
- * @caller: return address of caller, used for debug information
- *
- * Identical to kmem_cache_alloc but it will allocate memory on the given
- * node, which can improve the performance for cpu bound structures.
- *
- * Fallback to other node is possible if __GFP_THISNODE is not set.
- */
 static __always_inline void *
 slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 		   unsigned long caller)
@@ -3645,6 +3633,17 @@ EXPORT_SYMBOL(kmem_cache_alloc_trace);
 #endif
 
 #ifdef CONFIG_NUMA
+/**
+ * kmem_cache_alloc_node - Allocate an object on the specified node
+ * @cachep: The cache to allocate from.
+ * @flags: See kmalloc().
+ * @nodeid: node number of the target node.
+ *
+ * Identical to kmem_cache_alloc but it will allocate memory on the given
+ * node, which can improve the performance for cpu bound structures.
+ *
+ * Fallback to other node is possible if __GFP_THISNODE is not set.
+ */
 void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
 	void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
-- 
cgit v1.2.3


From 069e2b351de67e7a837b15b3d26c65c19b790cc3 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Fri, 14 Jun 2013 19:55:13 +0000
Subject: slob: Rework #ifdeffery in slab.h

Make the SLOB specific stuff harmonize more with the way the other allocators
do it. Create the typical kmalloc constants for that purpose. SLOB does not
support it but the constants help us avoid #ifdefs.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slab.h | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0c621752caa6..9690c14eb7fb 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -169,11 +169,7 @@ struct kmem_cache {
 	struct list_head list;	/* List of all slab caches on the system */
 };
 
-#define KMALLOC_MAX_SIZE (1UL << 30)
-
-#include <linux/slob_def.h>
-
-#else /* CONFIG_SLOB */
+#endif /* CONFIG_SLOB */
 
 /*
  * Kmalloc array related definitions
@@ -195,7 +191,9 @@ struct kmem_cache {
 #ifndef KMALLOC_SHIFT_LOW
 #define KMALLOC_SHIFT_LOW	5
 #endif
-#else
+#endif
+
+#ifdef CONFIG_SLUB
 /*
  * SLUB allocates up to order 2 pages directly and otherwise
  * passes the request to the page allocator.
@@ -207,6 +205,19 @@ struct kmem_cache {
 #endif
 #endif
 
+#ifdef CONFIG_SLOB
+/*
+ * SLOB passes all page size and larger requests to the page allocator.
+ * No kmalloc array is necessary since objects of different sizes can
+ * be allocated from the same page.
+ */
+#define KMALLOC_SHIFT_MAX	30
+#define KMALLOC_SHIFT_HIGH	PAGE_SHIFT
+#ifndef KMALLOC_SHIFT_LOW
+#define KMALLOC_SHIFT_LOW	3
+#endif
+#endif
+
 /* Maximum allocatable size */
 #define KMALLOC_MAX_SIZE	(1UL << KMALLOC_SHIFT_MAX)
 /* Maximum size for which we actually use a slab cache */
@@ -221,6 +232,7 @@ struct kmem_cache {
 #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW)
 #endif
 
+#ifndef CONFIG_SLOB
 extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
 #ifdef CONFIG_ZONE_DMA
 extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
@@ -275,13 +287,18 @@ static __always_inline int kmalloc_index(size_t size)
 	/* Will never be reached. Needed because the compiler may complain */
 	return -1;
 }
+#endif /* !CONFIG_SLOB */
 
 #ifdef CONFIG_SLAB
 #include <linux/slab_def.h>
-#elif defined(CONFIG_SLUB)
+#endif
+
+#ifdef CONFIG_SLUB
 #include <linux/slub_def.h>
-#else
-#error "Unknown slab allocator"
+#endif
+
+#ifdef CONFIG_SLOB
+#include <linux/slob_def.h>
 #endif
 
 /*
@@ -291,6 +308,7 @@ static __always_inline int kmalloc_index(size_t size)
  */
 static __always_inline int kmalloc_size(int n)
 {
+#ifndef CONFIG_SLOB
 	if (n > 2)
 		return 1 << n;
 
@@ -299,10 +317,9 @@ static __always_inline int kmalloc_size(int n)
 
 	if (n == 2 && KMALLOC_MIN_SIZE <= 64)
 		return 192;
-
+#endif
 	return 0;
 }
-#endif /* !CONFIG_SLOB */
 
 /*
  * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
-- 
cgit v1.2.3


From 0fa8103be4c20f893486c533e4c6dfbc5ccddeb4 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Date: Thu, 4 Jul 2013 08:33:22 +0800
Subject: mm/slab: Fix drain freelist excessively

The drain_freelist is called to drain slabs_free lists for cache reap,
cache shrink, memory hotplug callback etc. The tofree parameter should
be the number of slab to free instead of the number of slab objects to
free.

This patch fix the callers that pass # of objects. Make sure they pass #
of slabs.

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slab.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 273a5ac2ade3..c9b4da9a1fe5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1180,6 +1180,12 @@ static int init_cache_node_node(int node)
 	return 0;
 }
 
+static inline int slabs_tofree(struct kmem_cache *cachep,
+						struct kmem_cache_node *n)
+{
+	return (n->free_objects + cachep->num - 1) / cachep->num;
+}
+
 static void __cpuinit cpuup_canceled(long cpu)
 {
 	struct kmem_cache *cachep;
@@ -1241,7 +1247,7 @@ free_array_cache:
 		n = cachep->node[node];
 		if (!n)
 			continue;
-		drain_freelist(cachep, n, n->free_objects);
+		drain_freelist(cachep, n, slabs_tofree(cachep, n));
 	}
 }
 
@@ -1408,7 +1414,7 @@ static int __meminit drain_cache_node_node(int node)
 		if (!n)
 			continue;
 
-		drain_freelist(cachep, n, n->free_objects);
+		drain_freelist(cachep, n, slabs_tofree(cachep, n));
 
 		if (!list_empty(&n->slabs_full) ||
 		    !list_empty(&n->slabs_partial)) {
@@ -2534,7 +2540,7 @@ static int __cache_shrink(struct kmem_cache *cachep)
 		if (!n)
 			continue;
 
-		drain_freelist(cachep, n, n->free_objects);
+		drain_freelist(cachep, n, slabs_tofree(cachep, n));
 
 		ret += !list_empty(&n->slabs_full) ||
 			!list_empty(&n->slabs_partial);
-- 
cgit v1.2.3


From e25839f67948ca54fa55a45686d72c266f65f099 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Date: Thu, 4 Jul 2013 08:33:23 +0800
Subject: mm/slab: Sharing s_next and s_stop between slab and slub

This patch shares s_next and s_stop between slab and slub.

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slab.c        | 10 ----------
 mm/slab.h        |  3 +++
 mm/slab_common.c |  4 ++--
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index c9b4da9a1fe5..4a907a072669 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4438,16 +4438,6 @@ static int leaks_show(struct seq_file *m, void *p)
 	return 0;
 }
 
-static void *s_next(struct seq_file *m, void *p, loff_t *pos)
-{
-	return seq_list_next(p, &slab_caches, pos);
-}
-
-static void s_stop(struct seq_file *m, void *p)
-{
-	mutex_unlock(&slab_mutex);
-}
-
 static const struct seq_operations slabstats_op = {
 	.start = leaks_start,
 	.next = s_next,
diff --git a/mm/slab.h b/mm/slab.h
index f96b49e4704e..95c88604aab7 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -271,3 +271,6 @@ struct kmem_cache_node {
 #endif
 
 };
+
+void *s_next(struct seq_file *m, void *p, loff_t *pos);
+void s_stop(struct seq_file *m, void *p);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index d2517b05d5bc..68518eb67229 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -529,12 +529,12 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 	return seq_list_start(&slab_caches, *pos);
 }
 
-static void *s_next(struct seq_file *m, void *p, loff_t *pos)
+void *s_next(struct seq_file *m, void *p, loff_t *pos)
 {
 	return seq_list_next(p, &slab_caches, pos);
 }
 
-static void s_stop(struct seq_file *m, void *p)
+void s_stop(struct seq_file *m, void *p)
 {
 	mutex_unlock(&slab_mutex);
 }
-- 
cgit v1.2.3


From e9b4db2b8dba6b6c666e54b20ce46f3e597a6d96 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Date: Thu, 4 Jul 2013 08:33:24 +0800
Subject: mm/slab: Fix /proc/slabinfo unwriteable for slab

Slab have some tunables like limit, batchcount, and sharedfactor can be
tuned through function slabinfo_write. Commit (b7454ad3: mm/sl[au]b: Move
slabinfo processing to slab_common.c) uncorrectly change /proc/slabinfo
unwriteable for slab, this patch fix it by revert to original mode.

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slab_common.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index 68518eb67229..13ae037c71d4 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -495,6 +495,13 @@ void __init create_kmalloc_caches(unsigned long flags)
 
 
 #ifdef CONFIG_SLABINFO
+
+#ifdef CONFIG_SLAB
+#define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR)
+#else
+#define SLABINFO_RIGHTS S_IRUSR
+#endif
+
 void print_slabinfo_header(struct seq_file *m)
 {
 	/*
@@ -631,7 +638,8 @@ static const struct file_operations proc_slabinfo_operations = {
 
 static int __init slab_proc_init(void)
 {
-	proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
+	proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
+						&proc_slabinfo_operations);
 	return 0;
 }
 module_init(slab_proc_init);
-- 
cgit v1.2.3


From a446336454cf9ce3234a6013d1c3b482358d9459 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Date: Thu, 4 Jul 2013 08:33:25 +0800
Subject: mm/slub: Drop unnecessary nr_partials

This patch remove unused nr_partials variable.

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slub.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 4df2c0c337fb..f788be3a0b12 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5266,7 +5266,6 @@ __initcall(slab_sysfs_init);
 #ifdef CONFIG_SLABINFO
 void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
 {
-	unsigned long nr_partials = 0;
 	unsigned long nr_slabs = 0;
 	unsigned long nr_objs = 0;
 	unsigned long nr_free = 0;
@@ -5278,7 +5277,6 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
 		if (!n)
 			continue;
 
-		nr_partials += n->nr_partial;
 		nr_slabs += atomic_long_read(&n->nr_slabs);
 		nr_objs += atomic_long_read(&n->total_objects);
 		nr_free += count_partial(n, count_free);
-- 
cgit v1.2.3


From c17fd13ec0677e61f3692ecb9d4b21f79848fa04 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Date: Thu, 4 Jul 2013 08:33:26 +0800
Subject: mm/slub: Use node_nr_slabs and node_nr_objs in get_slabinfo

Use existing interface node_nr_slabs and node_nr_objs to get
nr_slabs and nr_objs.

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slub.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index f788be3a0b12..5ee6c7cd9fc4 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5277,8 +5277,8 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
 		if (!n)
 			continue;
 
-		nr_slabs += atomic_long_read(&n->nr_slabs);
-		nr_objs += atomic_long_read(&n->total_objects);
+		nr_slabs += node_nr_slabs(n);
+		nr_objs += node_nr_objs(n);
 		nr_free += count_partial(n, count_free);
 	}
 
-- 
cgit v1.2.3


From 318df36e57c0ca9f2146660d41ff28e8650af423 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Wed, 19 Jun 2013 15:33:55 +0900
Subject: slub: do not put a slab to cpu partial list when cpu_partial is 0

In free path, we don't check number of cpu_partial, so one slab can
be linked in cpu partial list even if cpu_partial is 0. To prevent this,
we should check number of cpu_partial in put_cpu_partial().

Acked-by: Christoph Lameeter <cl@linux.com>
Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slub.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/slub.c b/mm/slub.c
index 5ee6c7cd9fc4..54cc4d544f3c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1954,6 +1954,9 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
 	int pages;
 	int pobjects;
 
+	if (!s->cpu_partial)
+		return;
+
 	do {
 		pages = 0;
 		pobjects = 0;
-- 
cgit v1.2.3


From a6d78159f8a717263bea71bef738256dafe6260d Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 20 Dec 2012 14:11:39 -0500
Subject: slob: use DIV_ROUND_UP where possible

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slob.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/slob.c b/mm/slob.c
index a99fdf7a0907..f729c46639fa 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -122,7 +122,7 @@ static inline void clear_slob_page_free(struct page *sp)
 }
 
 #define SLOB_UNIT sizeof(slob_t)
-#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
+#define SLOB_UNITS(size) DIV_ROUND_UP(size, SLOB_UNIT)
 
 /*
  * struct slob_rcu is inserted at the tail of allocated slob blocks, which
-- 
cgit v1.2.3


From 0f8f8094d28eb53368ac09186ea6b3a324cc7d44 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 2 Jul 2013 12:12:10 -0700
Subject: slab: fix init_lock_keys

Some architectures (e.g. powerpc built with CONFIG_PPC_256K_PAGES=y
CONFIG_FORCE_MAX_ZONEORDER=11) get PAGE_SHIFT + MAX_ORDER > 26.

In 3.10 kernels, CONFIG_LOCKDEP=y with PAGE_SHIFT + MAX_ORDER > 26 makes
init_lock_keys() dereference beyond kmalloc_caches[26].
This leads to an unbootable system (kernel panic at initializing SLAB)
if one of kmalloc_caches[26...PAGE_SHIFT+MAX_ORDER-1] is not NULL.

Fix this by making sure that init_lock_keys() does not dereference beyond
kmalloc_caches[26] arrays.

Signed-off-by: Christoph Lameter <cl@linux.com>
Reported-by: Tetsuo Handa <penguin-kernel@I-Love.SAKURA.ne.jp>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: <stable@vger.kernel.org>	[3.10.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slab.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/slab.c b/mm/slab.c
index 4a907a072669..9bf225162fcb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -565,7 +565,7 @@ static void init_node_lock_keys(int q)
 	if (slab_state < UP)
 		return;
 
-	for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) {
+	for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) {
 		struct kmem_cache_node *n;
 		struct kmem_cache *cache = kmalloc_caches[i];
 
-- 
cgit v1.2.3


From e7efa615ccf78394338144ff0187be331240748a Mon Sep 17 00:00:00 2001
From: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Date: Tue, 25 Jun 2013 18:16:55 +0200
Subject: slab: add kmalloc() to kernel API documentation

At the moment, kmalloc() isn't even listed in the kernel API
documentation (DocBook/kernel-api.html after running "make htmldocs").

Another issue is that the documentation for kmalloc_node()
refers to kcalloc()'s documentation to describe its 'flags' parameter,
while kcalloc() refered to kmalloc()'s documentation, which doesn't exist!

This patch is a proposed fix for this. It also removes the documentation
for kmalloc() in include/linux/slob_def.h which isn't included to
generate the documentation anyway. This way, kmalloc() is described
in only one place.

Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 include/linux/slab.h     | 18 ++++++++++++++----
 include/linux/slob_def.h |  8 --------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9690c14eb7fb..6c5cc0ea8713 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -373,9 +373,8 @@ int cache_show(struct kmem_cache *s, struct seq_file *m);
 void print_slabinfo_header(struct seq_file *m);
 
 /**
- * kmalloc_array - allocate memory for an array.
- * @n: number of elements.
- * @size: element size.
+ * kmalloc - allocate memory
+ * @size: how many bytes of memory are required.
  * @flags: the type of memory to allocate.
  *
  * The @flags argument may be one of:
@@ -422,6 +421,17 @@ void print_slabinfo_header(struct seq_file *m);
  * There are other flags available as well, but these are not intended
  * for general use, and so are not documented here. For a full list of
  * potential flags, always refer to linux/gfp.h.
+ *
+ * kmalloc is the normal method of allocating memory
+ * in the kernel.
+ */
+static __always_inline void *kmalloc(size_t size, gfp_t flags);
+
+/**
+ * kmalloc_array - allocate memory for an array.
+ * @n: number of elements.
+ * @size: element size.
+ * @flags: the type of memory to allocate (see kmalloc).
  */
 static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
 {
@@ -445,7 +455,7 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
 /**
  * kmalloc_node - allocate memory from a specific node
  * @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate (see kcalloc).
+ * @flags: the type of memory to allocate (see kmalloc).
  * @node: node to allocate from.
  *
  * kmalloc() for non-local nodes, used to allocate from a specific node
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index f28e14a12e3f..095a5a4a8516 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -18,14 +18,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 	return __kmalloc_node(size, flags, node);
 }
 
-/**
- * kmalloc - allocate memory
- * @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate (see kcalloc).
- *
- * kmalloc is the normal method of allocating memory
- * in the kernel.
- */
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
 	return __kmalloc_node(size, flags, NUMA_NO_NODE);
-- 
cgit v1.2.3


From 345c905d13a4ec9f774b6b4bc038fe4aef26cced Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Wed, 19 Jun 2013 14:05:52 +0900
Subject: slub: Make cpu partial slab support configurable

CPU partial support can introduce level of indeterminism that is not
wanted in certain context (like a realtime kernel). Make it
configurable.

This patch is based on Christoph Lameter's "slub: Make cpu partial slab
support configurable V2".

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 init/Kconfig | 11 +++++++++++
 mm/slub.c    | 27 +++++++++++++++++++++------
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index 7d30240e5bfe..3b34a88cf34e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1511,6 +1511,17 @@ config SLOB
 
 endchoice
 
+config SLUB_CPU_PARTIAL
+	default y
+	depends on SLUB
+	bool "SLUB per cpu partial cache"
+	help
+	  Per cpu partial caches accellerate objects allocation and freeing
+	  that is local to a processor at the price of more indeterminism
+	  in the latency of the free. On overflow these caches will be cleared
+	  which requires the taking of locks that may cause latency spikes.
+	  Typically one would choose no for a realtime system.
+
 config MMAP_ALLOW_UNINITIALIZED
 	bool "Allow mmapped anonymous memory to be uninitialized"
 	depends on EXPERT && !MMU
diff --git a/mm/slub.c b/mm/slub.c
index 54cc4d544f3c..ef60536c5d69 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -122,6 +122,15 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
 #endif
 }
 
+static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
+{
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+	return !kmem_cache_debug(s);
+#else
+	return false;
+#endif
+}
+
 /*
  * Issues still to be resolved:
  *
@@ -1572,7 +1581,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
 			put_cpu_partial(s, page, 0);
 			stat(s, CPU_PARTIAL_NODE);
 		}
-		if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
+		if (!kmem_cache_has_cpu_partial(s)
+			|| available > s->cpu_partial / 2)
 			break;
 
 	}
@@ -1883,6 +1893,7 @@ redo:
 static void unfreeze_partials(struct kmem_cache *s,
 		struct kmem_cache_cpu *c)
 {
+#ifdef CONFIG_SLUB_CPU_PARTIAL
 	struct kmem_cache_node *n = NULL, *n2 = NULL;
 	struct page *page, *discard_page = NULL;
 
@@ -1937,6 +1948,7 @@ static void unfreeze_partials(struct kmem_cache *s,
 		discard_slab(s, page);
 		stat(s, FREE_SLAB);
 	}
+#endif
 }
 
 /*
@@ -1950,6 +1962,7 @@ static void unfreeze_partials(struct kmem_cache *s,
  */
 static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
 {
+#ifdef CONFIG_SLUB_CPU_PARTIAL
 	struct page *oldpage;
 	int pages;
 	int pobjects;
@@ -1989,6 +2002,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
 		page->next = oldpage;
 
 	} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
+#endif
 }
 
 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
@@ -2497,7 +2511,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 		new.inuse--;
 		if ((!new.inuse || !prior) && !was_frozen) {
 
-			if (!kmem_cache_debug(s) && !prior)
+			if (kmem_cache_has_cpu_partial(s) && !prior)
 
 				/*
 				 * Slab was on no list before and will be partially empty
@@ -2552,8 +2566,9 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 	 * Objects left in the slab. If it was not on the partial list before
 	 * then add it.
 	 */
-	if (kmem_cache_debug(s) && unlikely(!prior)) {
-		remove_full(s, page);
+	if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
+		if (kmem_cache_debug(s))
+			remove_full(s, page);
 		add_partial(n, page, DEACTIVATE_TO_TAIL);
 		stat(s, FREE_ADD_PARTIAL);
 	}
@@ -3061,7 +3076,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
 	 *    per node list when we run out of per cpu objects. We only fetch 50%
 	 *    to keep some capacity around for frees.
 	 */
-	if (kmem_cache_debug(s))
+	if (!kmem_cache_has_cpu_partial(s))
 		s->cpu_partial = 0;
 	else if (s->size >= PAGE_SIZE)
 		s->cpu_partial = 2;
@@ -4456,7 +4471,7 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
 	err = strict_strtoul(buf, 10, &objects);
 	if (err)
 		return err;
-	if (objects && kmem_cache_debug(s))
+	if (objects && !kmem_cache_has_cpu_partial(s))
 		return -EINVAL;
 
 	s->cpu_partial = objects;
-- 
cgit v1.2.3


From c1e854e924f354657ea2ad08fd7b38aac81c59b1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 17 Jan 2013 12:13:46 -0500
Subject: slob: Check for NULL pointer before calling ctor()

While doing some code inspection, I noticed that the slob constructor
method can be called with a NULL pointer. If memory is tight and slob
fails to allocate with slob_alloc() or slob_new_pages() it still calls
the ctor() method with a NULL pointer. Looking at the first ctor()
method I found, I noticed that it can not handle a NULL pointer (I'm
sure others probably can't either):

static void sighand_ctor(void *data)
{
        struct sighand_struct *sighand = data;

        spin_lock_init(&sighand->siglock);
        init_waitqueue_head(&sighand->signalfd_wqh);
}

The solution is to only call the ctor() method if allocation succeeded.

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slob.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/slob.c b/mm/slob.c
index f729c46639fa..3d73b3b8fb1d 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -554,7 +554,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 					    flags, node);
 	}
 
-	if (c->ctor)
+	if (b && c->ctor)
 		c->ctor(b);
 
 	kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags);
-- 
cgit v1.2.3


From 276a2439ce7917b8c3043af7ad6bf17bbcc24030 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Date: Mon, 8 Jul 2013 08:08:28 +0800
Subject: mm/slab: Give s_next and s_stop slab-specific names

Give s_next and s_stop slab-specific names instead of exporting
"s_next" and "s_stop".

Signed-off-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
---
 mm/slab.c        | 4 ++--
 mm/slab.h        | 4 ++--
 mm/slab_common.c | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 9bf225162fcb..57ab42297d96 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4440,8 +4440,8 @@ static int leaks_show(struct seq_file *m, void *p)
 
 static const struct seq_operations slabstats_op = {
 	.start = leaks_start,
-	.next = s_next,
-	.stop = s_stop,
+	.next = slab_next,
+	.stop = slab_stop,
 	.show = leaks_show,
 };
 
diff --git a/mm/slab.h b/mm/slab.h
index 95c88604aab7..620ceeddbe1a 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -272,5 +272,5 @@ struct kmem_cache_node {
 
 };
 
-void *s_next(struct seq_file *m, void *p, loff_t *pos);
-void s_stop(struct seq_file *m, void *p);
+void *slab_next(struct seq_file *m, void *p, loff_t *pos);
+void slab_stop(struct seq_file *m, void *p);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 13ae037c71d4..eacdffaf71c9 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -536,12 +536,12 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 	return seq_list_start(&slab_caches, *pos);
 }
 
-void *s_next(struct seq_file *m, void *p, loff_t *pos)
+void *slab_next(struct seq_file *m, void *p, loff_t *pos)
 {
 	return seq_list_next(p, &slab_caches, pos);
 }
 
-void s_stop(struct seq_file *m, void *p)
+void slab_stop(struct seq_file *m, void *p)
 {
 	mutex_unlock(&slab_mutex);
 }
@@ -618,8 +618,8 @@ static int s_show(struct seq_file *m, void *p)
  */
 static const struct seq_operations slabinfo_op = {
 	.start = s_start,
-	.next = s_next,
-	.stop = s_stop,
+	.next = slab_next,
+	.stop = slab_stop,
 	.show = s_show,
 };
 
-- 
cgit v1.2.3


From c25f195e828f847735c7626b5693ddc3b853d245 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 17 Jan 2013 13:10:58 -0500
Subject: slub: Check for page NULL before doing the node_match check

In the -rt kernel (mrg), we hit the following dump:

BUG: unable to handle kernel NULL pointer dereference at           (null)
IP: [<ffffffff811573f1>] kmem_cache_alloc_node+0x51/0x180
PGD a2d39067 PUD b1641067 PMD 0
Oops: 0000 [#1] PREEMPT SMP
Modules linked in: sunrpc cpufreq_ondemand ipv6 tg3 joydev sg serio_raw pcspkr k8temp amd64_edac_mod edac_core i2c_piix4 e100 mii shpchp ext4 mbcache jbd2 sd_mod crc_t10dif sr_mod cdrom sata_svw ata_generic pata_acpi pata_serverworks radeon ttm drm_kms_helper drm hwmon i2c_algo_bit i2c_core dm_mirror dm_region_hash dm_log dm_mod
CPU 3
Pid: 20878, comm: hackbench Not tainted 3.6.11-rt25.14.el6rt.x86_64 #1 empty empty/Tyan Transport GT24-B3992
RIP: 0010:[<ffffffff811573f1>]  [<ffffffff811573f1>] kmem_cache_alloc_node+0x51/0x180
RSP: 0018:ffff8800a9b17d70  EFLAGS: 00010213
RAX: 0000000000000000 RBX: 0000000001200011 RCX: ffff8800a06d8000
RDX: 0000000004d92a03 RSI: 00000000000000d0 RDI: ffff88013b805500
RBP: ffff8800a9b17dc0 R08: ffff88023fd14d10 R09: ffffffff81041cbd
R10: 00007f4e3f06e9d0 R11: 0000000000000246 R12: ffff88013b805500
R13: ffff8801ff46af40 R14: 0000000000000001 R15: 0000000000000000
FS:  00007f4e3f06e700(0000) GS:ffff88023fd00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000000 CR3: 00000000a2d3a000 CR4: 00000000000007e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process hackbench (pid: 20878, threadinfo ffff8800a9b16000, task ffff8800a06d8000)
Stack:
 ffff8800a9b17da0 ffffffff81202e08 ffff8800a9b17de0 000000d001200011
 0000000001200011 0000000001200011 0000000000000000 0000000000000000
 00007f4e3f06e9d0 0000000000000000 ffff8800a9b17e60 ffffffff81041cbd
Call Trace:
 [<ffffffff81202e08>] ? current_has_perm+0x68/0x80
 [<ffffffff81041cbd>] copy_process+0xdd/0x15b0
 [<ffffffff810a2125>] ? rt_up_read+0x25/0x30
 [<ffffffff8104369a>] do_fork+0x5a/0x360
 [<ffffffff8107c66b>] ? migrate_enable+0xeb/0x220
 [<ffffffff8100b068>] sys_clone+0x28/0x30
 [<ffffffff81527423>] stub_clone+0x13/0x20
 [<ffffffff81527152>] ? system_call_fastpath+0x16/0x1b
Code: 89 fc 89 75 cc 41 89 d6 4d 8b 04 24 65 4c 03 04 25 48 ae 00 00 49 8b 50 08 4d 8b 28 49 8b 40 10 4d 85 ed 74 12 41 83 fe ff 74 27 <48> 8b 00 48 c1 e8 3a 41 39 c6 74 1b 8b 75 cc 4c 89 c9 44 89 f2
RIP  [<ffffffff811573f1>] kmem_cache_alloc_node+0x51/0x180
 RSP <ffff8800a9b17d70>
CR2: 0000000000000000
---[ end trace 0000000000000002 ]---

Now, this uses SLUB pretty much unmodified, but as it is the -rt kernel
with CONFIG_PREEMPT_RT set, spinlocks are mutexes, although they do
disable migration. But the SLUB code is relatively lockless, and the
spin_locks there are raw_spin_locks (not converted to mutexes), thus I
believe this bug can happen in mainline without -rt features. The -rt
patch is just good at triggering mainline bugs ;-)

Anyway, looking at where this crashed, it seems that the page variable
can be NULL when passed to the node_match() function (which does not
check if it is NULL). When this happens we get the above panic.

As page is only used in slab_alloc() to check if the node matches, if
it's NULL I'm assuming that we can say it doesn't and call the
__slab_alloc() code. Is this a correct assumption?

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/slub.c b/mm/slub.c
index ef60536c5d69..33f71330e713 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2374,7 +2374,7 @@ redo:
 
 	object = c->freelist;
 	page = c->page;
-	if (unlikely(!object || !node_match(page, node)))
+	if (unlikely(!object || !page || !node_match(page, node)))
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 
 	else {
-- 
cgit v1.2.3