summaryrefslogtreecommitdiff
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c183
1 files changed, 102 insertions, 81 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 233e39d14caf..85c2e03098a7 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -309,6 +309,13 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
#define SIZE_AC 1
#define SIZE_L3 (1 + MAX_NUMNODES)
+static int drain_freelist(struct kmem_cache *cache,
+ struct kmem_list3 *l3, int tofree);
+static void free_block(struct kmem_cache *cachep, void **objpp, int len,
+ int node);
+static void enable_cpucache(struct kmem_cache *cachep);
+static void cache_reap(void *unused);
+
/*
* This function must be completely optimized away if a constant is passed to
* it. Mostly the same as what is in linux/slab.h except it returns an index.
@@ -456,7 +463,7 @@ struct kmem_cache {
#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
#define STATS_INC_GROWN(x) ((x)->grown++)
-#define STATS_INC_REAPED(x) ((x)->reaped++)
+#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
#define STATS_SET_HIGH(x) \
do { \
if ((x)->num_active > (x)->high_mark) \
@@ -480,7 +487,7 @@ struct kmem_cache {
#define STATS_DEC_ACTIVE(x) do { } while (0)
#define STATS_INC_ALLOCED(x) do { } while (0)
#define STATS_INC_GROWN(x) do { } while (0)
-#define STATS_INC_REAPED(x) do { } while (0)
+#define STATS_ADD_REAPED(x,y) do { } while (0)
#define STATS_SET_HIGH(x) do { } while (0)
#define STATS_INC_ERR(x) do { } while (0)
#define STATS_INC_NODEALLOCS(x) do { } while (0)
@@ -700,12 +707,6 @@ int slab_is_available(void)
static DEFINE_PER_CPU(struct work_struct, reap_work);
-static void free_block(struct kmem_cache *cachep, void **objpp, int len,
- int node);
-static void enable_cpucache(struct kmem_cache *cachep);
-static void cache_reap(void *unused);
-static int __node_shrink(struct kmem_cache *cachep, int node);
-
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
{
return cachep->array[smp_processor_id()];
@@ -1020,7 +1021,8 @@ static void drain_alien_cache(struct kmem_cache *cachep,
}
}
-static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
+static inline int cache_free_alien(struct kmem_cache *cachep, void *objp,
+ int nesting)
{
struct slab *slabp = virt_to_slab(objp);
int nodeid = slabp->nodeid;
@@ -1038,7 +1040,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
STATS_INC_NODEFREES(cachep);
if (l3->alien && l3->alien[nodeid]) {
alien = l3->alien[nodeid];
- spin_lock(&alien->lock);
+ spin_lock_nested(&alien->lock, nesting);
if (unlikely(alien->avail == alien->limit)) {
STATS_INC_ACOVERFLOW(cachep);
__drain_alien_cache(cachep, alien, nodeid);
@@ -1067,7 +1069,8 @@ static inline void free_alien_cache(struct array_cache **ac_ptr)
{
}
-static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
+static inline int cache_free_alien(struct kmem_cache *cachep, void *objp,
+ int nesting)
{
return 0;
}
@@ -1241,10 +1244,7 @@ free_array_cache:
l3 = cachep->nodelists[node];
if (!l3)
continue;
- spin_lock_irq(&l3->list_lock);
- /* free slabs belonging to this node */
- __node_shrink(cachep, node);
- spin_unlock_irq(&l3->list_lock);
+ drain_freelist(cachep, l3, l3->free_objects);
}
mutex_unlock(&cache_chain_mutex);
break;
@@ -1274,6 +1274,11 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
local_irq_disable();
memcpy(ptr, list, sizeof(struct kmem_list3));
+ /*
+ * Do not assume that spinlocks can be initialized via memcpy:
+ */
+ spin_lock_init(&ptr->list_lock);
+
MAKE_ALL_LISTS(cachep, ptr, nodeid);
cachep->nodelists[nodeid] = ptr;
local_irq_enable();
@@ -1400,7 +1405,7 @@ void __init kmem_cache_init(void)
}
/* 4) Replace the bootstrap head arrays */
{
- void *ptr;
+ struct array_cache *ptr;
ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
@@ -1408,6 +1413,11 @@ void __init kmem_cache_init(void)
BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
memcpy(ptr, cpu_cache_get(&cache_cache),
sizeof(struct arraycache_init));
+ /*
+ * Do not assume that spinlocks can be initialized via memcpy:
+ */
+ spin_lock_init(&ptr->lock);
+
cache_cache.array[smp_processor_id()] = ptr;
local_irq_enable();
@@ -1418,6 +1428,11 @@ void __init kmem_cache_init(void)
!= &initarray_generic.cache);
memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
sizeof(struct arraycache_init));
+ /*
+ * Do not assume that spinlocks can be initialized via memcpy:
+ */
+ spin_lock_init(&ptr->lock);
+
malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
ptr;
local_irq_enable();
@@ -1507,7 +1522,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
nr_pages = (1 << cachep->gfporder);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
atomic_add(nr_pages, &slab_reclaim_pages);
- add_page_state(nr_slab, nr_pages);
+ add_zone_page_state(page_zone(page), NR_SLAB, nr_pages);
for (i = 0; i < nr_pages; i++)
__SetPageSlab(page + i);
return page_address(page);
@@ -1522,12 +1537,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
struct page *page = virt_to_page(addr);
const unsigned long nr_freed = i;
+ sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed);
while (i--) {
BUG_ON(!PageSlab(page));
__ClearPageSlab(page);
page++;
}
- sub_page_state(nr_slab, nr_freed);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += nr_freed;
free_pages((unsigned long)addr, cachep->gfporder);
@@ -1745,6 +1760,8 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
}
#endif
+static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting);
+
/**
* slab_destroy - destroy and release all objects in a slab
* @cachep: cache pointer being destroyed
@@ -1768,8 +1785,17 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
call_rcu(&slab_rcu->head, kmem_rcu_free);
} else {
kmem_freepages(cachep, addr);
- if (OFF_SLAB(cachep))
- kmem_cache_free(cachep->slabp_cache, slabp);
+ if (OFF_SLAB(cachep)) {
+ unsigned long flags;
+
+ /*
+ * lockdep: we may nest inside an already held
+ * ac->lock, so pass in a nesting flag:
+ */
+ local_irq_save(flags);
+ __cache_free(cachep->slabp_cache, slabp, 1);
+ local_irq_restore(flags);
+ }
}
}
@@ -2248,32 +2274,45 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
}
}
-static int __node_shrink(struct kmem_cache *cachep, int node)
+/*
+ * Remove slabs from the list of free slabs.
+ * Specify the number of slabs to drain in tofree.
+ *
+ * Returns the actual number of slabs released.
+ */
+static int drain_freelist(struct kmem_cache *cache,
+ struct kmem_list3 *l3, int tofree)
{
+ struct list_head *p;
+ int nr_freed;
struct slab *slabp;
- struct kmem_list3 *l3 = cachep->nodelists[node];
- int ret;
- for (;;) {
- struct list_head *p;
+ nr_freed = 0;
+ while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
+ spin_lock_irq(&l3->list_lock);
p = l3->slabs_free.prev;
- if (p == &l3->slabs_free)
- break;
+ if (p == &l3->slabs_free) {
+ spin_unlock_irq(&l3->list_lock);
+ goto out;
+ }
- slabp = list_entry(l3->slabs_free.prev, struct slab, list);
+ slabp = list_entry(p, struct slab, list);
#if DEBUG
BUG_ON(slabp->inuse);
#endif
list_del(&slabp->list);
-
- l3->free_objects -= cachep->num;
+ /*
+ * Safe to drop the lock. The slab is no longer linked
+ * to the cache.
+ */
+ l3->free_objects -= cache->num;
spin_unlock_irq(&l3->list_lock);
- slab_destroy(cachep, slabp);
- spin_lock_irq(&l3->list_lock);
+ slab_destroy(cache, slabp);
+ nr_freed++;
}
- ret = !list_empty(&l3->slabs_full) || !list_empty(&l3->slabs_partial);
- return ret;
+out:
+ return nr_freed;
}
static int __cache_shrink(struct kmem_cache *cachep)
@@ -2286,11 +2325,13 @@ static int __cache_shrink(struct kmem_cache *cachep)
check_irq_on();
for_each_online_node(i) {
l3 = cachep->nodelists[i];
- if (l3) {
- spin_lock_irq(&l3->list_lock);
- ret += __node_shrink(cachep, i);
- spin_unlock_irq(&l3->list_lock);
- }
+ if (!l3)
+ continue;
+
+ drain_freelist(cachep, l3, l3->free_objects);
+
+ ret += !list_empty(&l3->slabs_full) ||
+ !list_empty(&l3->slabs_partial);
}
return (ret ? 1 : 0);
}
@@ -3059,7 +3100,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
if (slabp->inuse == 0) {
if (l3->free_objects > l3->free_limit) {
l3->free_objects -= cachep->num;
+ /*
+ * It is safe to drop the lock. The slab is
+ * no longer linked to the cache. cachep
+ * cannot disappear - we are using it and
+ * all destruction of caches must be
+ * serialized properly by the user.
+ */
+ spin_unlock(&l3->list_lock);
slab_destroy(cachep, slabp);
+ spin_lock(&l3->list_lock);
} else {
list_add(&slabp->list, &l3->slabs_free);
}
@@ -3085,7 +3135,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
#endif
check_irq_off();
l3 = cachep->nodelists[node];
- spin_lock(&l3->list_lock);
+ spin_lock_nested(&l3->list_lock, SINGLE_DEPTH_NESTING);
if (l3->shared) {
struct array_cache *shared_array = l3->shared;
int max = shared_array->limit - shared_array->avail;
@@ -3128,14 +3178,14 @@ free_done:
* Release an obj back to its cache. If the obj has a constructed state, it must
* be in this state _before_ it is released. Called with disabled ints.
*/
-static inline void __cache_free(struct kmem_cache *cachep, void *objp)
+static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting)
{
struct array_cache *ac = cpu_cache_get(cachep);
check_irq_off();
objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
- if (cache_free_alien(cachep, objp))
+ if (cache_free_alien(cachep, objp, nesting))
return;
if (likely(ac->avail < ac->limit)) {
@@ -3374,7 +3424,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
BUG_ON(virt_to_cache(objp) != cachep);
local_irq_save(flags);
- __cache_free(cachep, objp);
+ __cache_free(cachep, objp, 0);
local_irq_restore(flags);
}
EXPORT_SYMBOL(kmem_cache_free);
@@ -3399,7 +3449,7 @@ void kfree(const void *objp)
kfree_debugcheck(objp);
c = virt_to_cache(objp);
debug_check_no_locks_freed(objp, obj_size(c));
- __cache_free(c, (void *)objp);
+ __cache_free(c, (void *)objp, 0);
local_irq_restore(flags);
}
EXPORT_SYMBOL(kfree);
@@ -3694,10 +3744,6 @@ static void cache_reap(void *unused)
}
list_for_each_entry(searchp, &cache_chain, next) {
- struct list_head *p;
- int tofree;
- struct slab *slabp;
-
check_irq_on();
/*
@@ -3722,47 +3768,22 @@ static void cache_reap(void *unused)
drain_array(searchp, l3, l3->shared, 0, node);
- if (l3->free_touched) {
+ if (l3->free_touched)
l3->free_touched = 0;
- goto next;
- }
-
- tofree = (l3->free_limit + 5 * searchp->num - 1) /
- (5 * searchp->num);
- do {
- /*
- * Do not lock if there are no free blocks.
- */
- if (list_empty(&l3->slabs_free))
- break;
-
- spin_lock_irq(&l3->list_lock);
- p = l3->slabs_free.next;
- if (p == &(l3->slabs_free)) {
- spin_unlock_irq(&l3->list_lock);
- break;
- }
-
- slabp = list_entry(p, struct slab, list);
- BUG_ON(slabp->inuse);
- list_del(&slabp->list);
- STATS_INC_REAPED(searchp);
+ else {
+ int freed;
- /*
- * Safe to drop the lock. The slab is no longer linked
- * to the cache. searchp cannot disappear, we hold
- * cache_chain_lock
- */
- l3->free_objects -= searchp->num;
- spin_unlock_irq(&l3->list_lock);
- slab_destroy(searchp, slabp);
- } while (--tofree > 0);
+ freed = drain_freelist(searchp, l3, (l3->free_limit +
+ 5 * searchp->num - 1) / (5 * searchp->num));
+ STATS_ADD_REAPED(searchp, freed);
+ }
next:
cond_resched();
}
check_irq_on();
mutex_unlock(&cache_chain_mutex);
next_reap_node();
+ refresh_cpu_vm_stats(smp_processor_id());
/* Set up the next iteration */
schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
}