From b40607fc02f8248828d52d88f91b7d68df1933b0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 22 Mar 2006 00:07:39 -0800 Subject: [PATCH] __get_page_state() cpumask cleanup and fix __get_page_state() has an open-coded for_each_cpu_mask() loop in it. Tidy that up, then notice that the code was buggy: while (cpu < NR_CPUS) { unsigned long *in, *out, off; if (!cpu_isset(cpu, *cpumask)) continue; an obvious infinite loop. I guess we just never call it with a holey cpu mask. Even after my cpumask size-reduction work, this patch increases code size :( Cc: Paul Jackson Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 234bd4895d14..61775866ea18 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1214,24 +1214,22 @@ DEFINE_PER_CPU(long, nr_pagecache_local) = 0; static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) { - int cpu = 0; + unsigned cpu; memset(ret, 0, nr * sizeof(unsigned long)); cpus_and(*cpumask, *cpumask, cpu_online_map); - cpu = first_cpu(*cpumask); - while (cpu < NR_CPUS) { - unsigned long *in, *out, off; - - if (!cpu_isset(cpu, *cpumask)) - continue; + for_each_cpu_mask(cpu, *cpumask) { + unsigned long *in; + unsigned long *out; + unsigned off; + unsigned next_cpu; in = (unsigned long *)&per_cpu(page_states, cpu); - cpu = next_cpu(cpu, *cpumask); - - if (likely(cpu < NR_CPUS)) - prefetch(&per_cpu(page_states, cpu)); + next_cpu = next_cpu(cpu, *cpumask); + if (likely(next_cpu < NR_CPUS)) + prefetch(&per_cpu(page_states, next_cpu)); out = (unsigned long *)ret; for (off = 0; off < nr; off++) -- cgit v1.2.3 From 5e9dace8d386def04219134d7160e8a778824764 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 22 Mar 2006 00:08:01 -0800 Subject: [PATCH] mm: page_alloc less atomics More atomic operation removal from page allocator Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 61775866ea18..102919851353 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -190,7 +190,7 @@ static void prep_compound_page(struct page *page, unsigned long order) for (i = 0; i < nr_pages; i++) { struct page *p = page + i; - SetPageCompound(p); + __SetPageCompound(p); set_page_private(p, (unsigned long)page); } } @@ -209,7 +209,7 @@ static void destroy_compound_page(struct page *page, unsigned long order) if (unlikely(!PageCompound(p) | (page_private(p) != (unsigned long)page))) bad_page(page); - ClearPageCompound(p); + __ClearPageCompound(p); } } -- cgit v1.2.3 From 8dfcc9ba27e2ed257e5de9539f7f03e57c2c0e33 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 22 Mar 2006 00:08:05 -0800 Subject: [PATCH] mm: split highorder pages Have an explicit mm call to split higher order pages into individual pages. Should help to avoid bugs and be more explicit about the code's intention. Signed-off-by: Nick Piggin Cc: Russell King Cc: David Howells Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mundt Cc: "David S. Miller" Cc: Chris Zankel Signed-off-by: Yoichi Yuasa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 102919851353..fc65e87368b3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -752,6 +752,28 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) clear_highpage(page + i); } +#ifdef CONFIG_MMU +/* + * split_page takes a non-compound higher-order page, and splits it into + * n (1< 0 path. Saves a branch -- cgit v1.2.3 From 545b1ea9bfa5a8ca9af33d63144bd4f2faaea8dd Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 22 Mar 2006 00:08:07 -0800 Subject: [PATCH] mm: cleanup bootmem The bootmem code added to page_alloc.c duplicated some page freeing code that it really doesn't need to because it is not so performance critical. While we're here, make prefetching work properly by actually prefetching the page we're about to use before prefetching ahead to the next one (ie. get the most important transaction started first). Also prefetch just a single page ahead rather than leaving a gap of 16. Jack Steiner reported no problems with SGI's ia64 simulator. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fc65e87368b3..7aa0181287e1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -55,7 +55,6 @@ unsigned long totalhigh_pages __read_mostly; long nr_swap_pages; int percpu_pagelist_fraction; -static void fastcall free_hot_cold_page(struct page *page, int cold); static void __free_pages_ok(struct page *page, unsigned int order); /* @@ -448,28 +447,23 @@ void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order) if (order == 0) { __ClearPageReserved(page); set_page_count(page, 0); - - free_hot_cold_page(page, 0); + set_page_refs(page, 0); + __free_page(page); } else { - LIST_HEAD(list); int loop; + prefetchw(page); for (loop = 0; loop < BITS_PER_LONG; loop++) { struct page *p = &page[loop]; - if (loop + 16 < BITS_PER_LONG) - prefetchw(p + 16); + if (loop + 1 < BITS_PER_LONG) + prefetchw(p + 1); __ClearPageReserved(p); set_page_count(p, 0); } - arch_free_page(page, order); - - mod_page_state(pgfree, 1 << order); - - list_add(&page->lru, &list); - kernel_map_pages(page, 1 << order, 0); - free_pages_bulk(page_zone(page), 1, &list, order); + set_page_refs(page, order); + __free_pages(page, order); } } -- cgit v1.2.3 From 84097518d1ecd2330f9488e4c2d09953a3340e74 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 22 Mar 2006 00:08:34 -0800 Subject: [PATCH] mm: nommu use compound pages Now that compound page handling is properly fixed in the VM, move nommu over to using compound pages rather than rolling their own refcounting. nommu vm page refcounting is broken anyway, but there is no need to have divergent code in the core VM now, nor when it gets fixed. Signed-off-by: Nick Piggin Cc: David Howells (Needs testing, please). Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7aa0181287e1..e197818a7cf6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -422,11 +422,6 @@ static void __free_pages_ok(struct page *page, unsigned int order) mutex_debug_check_no_locks_freed(page_address(page), PAGE_SIZE< Date: Wed, 22 Mar 2006 00:08:40 -0800 Subject: [PATCH] remove set_page_count() outside mm/ set_page_count usage outside mm/ is limited to setting the refcount to 1. Remove set_page_count from outside mm/, and replace those users with init_page_count() and set_page_refcounted(). This allows more debug checking, and tighter control on how code is allowed to play around with page->_count. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e197818a7cf6..7f65b5a63bb3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -442,7 +442,7 @@ void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order) if (order == 0) { __ClearPageReserved(page); set_page_count(page, 0); - set_page_refs(page, 0); + set_page_refcounted(page); __free_page(page); } else { int loop; @@ -457,7 +457,7 @@ void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order) set_page_count(p, 0); } - set_page_refs(page, order); + set_page_refcounted(page); __free_pages(page, order); } } @@ -525,7 +525,7 @@ static int prep_new_page(struct page *page, int order) 1 << PG_referenced | 1 << PG_arch_1 | 1 << PG_checked | 1 << PG_mappedtodisk); set_page_private(page, 0); - set_page_refs(page, order); + set_page_refcounted(page); kernel_map_pages(page, 1 << order, 1); return 0; } @@ -755,10 +755,8 @@ void split_page(struct page *page, unsigned int order) BUG_ON(PageCompound(page)); BUG_ON(!page_count(page)); - for (i = 1; i < (1 << order); i++) { - BUG_ON(page_count(page + i)); - set_page_count(page + i, 1); - } + for (i = 1; i < (1 << order); i++) + set_page_refcounted(page + i); } /* @@ -1771,7 +1769,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, continue; page = pfn_to_page(pfn); set_page_links(page, zone, nid, pfn); - set_page_count(page, 1); + init_page_count(page); reset_page_mapcount(page); SetPageReserved(page); INIT_LIST_HEAD(&page->lru); -- cgit v1.2.3 From 17cf44064ae744f081309108fa67f0e942b10167 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 22 Mar 2006 00:08:41 -0800 Subject: [PATCH] mm: cleanup prep_ stuff Move the prep_ stuff into prep_new_page. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7f65b5a63bb3..bdff85899638 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -212,6 +212,15 @@ static void destroy_compound_page(struct page *page, unsigned long order) } } +static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) +{ + int i; + + BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); + for (i = 0; i < (1 << order); i++) + clear_highpage(page + i); +} + /* * function for dealing with page's order in buddy system. * zone->lock is already acquired when we use these. @@ -496,7 +505,7 @@ static inline void expand(struct zone *zone, struct page *page, /* * This page is about to be returned from the page allocator */ -static int prep_new_page(struct page *page, int order) +static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) { if (unlikely(page_mapcount(page) | (page->mapping != NULL) | @@ -527,6 +536,13 @@ static int prep_new_page(struct page *page, int order) set_page_private(page, 0); set_page_refcounted(page); kernel_map_pages(page, 1 << order, 1); + + if (gfp_flags & __GFP_ZERO) + prep_zero_page(page, order, gfp_flags); + + if (order && (gfp_flags & __GFP_COMP)) + prep_compound_page(page, order); + return 0; } @@ -732,15 +748,6 @@ void fastcall free_cold_page(struct page *page) free_hot_cold_page(page, 1); } -static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) -{ - int i; - - BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); - for(i = 0; i < (1 << order); i++) - clear_highpage(page + i); -} - /* * split_page takes a non-compound higher-order page, and splits it into * n (1< Date: Wed, 22 Mar 2006 00:08:42 -0800 Subject: [PATCH] mm: prep_zero_page() in irq is a bug prep_zero_page() uses KM_USER0 and hence may not be used from IRQ context, at least for highmem pages. Cc: Nick Piggin Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bdff85899638..ed91684cb1f5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -217,6 +217,11 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) int i; BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); + /* + * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO + * and __GFP_HIGHMEM from hard or soft interrupt context. + */ + BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); for (i = 0; i < (1 << order); i++) clear_highpage(page + i); } -- cgit v1.2.3 From 879336c3930ae9273ea1c45214cb8adae0ce494a Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 22 Mar 2006 00:09:08 -0800 Subject: [PATCH] drain_node_pages: interrupt latency reduction / optimization 1. Only disable interrupts if there is actually something to free 2. Only dirty the pcp cacheline if we actually freed something. 3. Disable interrupts for each single pcp and not for cleaning all the pcps in all zones of a node. drain_node_pages is called every 2 seconds from cache_reap. This fix should avoid most disabling of interrupts. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ed91684cb1f5..b7f14a4799a5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -603,13 +603,14 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, /* * Called from the slab reaper to drain pagesets on a particular node that * belong to the currently executing processor. + * Note that this function must be called with the thread pinned to + * a single processor. */ void drain_node_pages(int nodeid) { int i, z; unsigned long flags; - local_irq_save(flags); for (z = 0; z < MAX_NR_ZONES; z++) { struct zone *zone = NODE_DATA(nodeid)->node_zones + z; struct per_cpu_pageset *pset; @@ -619,11 +620,14 @@ void drain_node_pages(int nodeid) struct per_cpu_pages *pcp; pcp = &pset->pcp[i]; - free_pages_bulk(zone, pcp->count, &pcp->list, 0); - pcp->count = 0; + if (pcp->count) { + local_irq_save(flags); + free_pages_bulk(zone, pcp->count, &pcp->list, 0); + pcp->count = 0; + local_irq_restore(flags); + } } } - local_irq_restore(flags); } #endif -- cgit v1.2.3