From 5f63b720b62925ef3c6a85473dcd547b0fd90616 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Wed, 11 Jan 2012 15:16:11 +0100 Subject: mm: page_alloc: remove trailing whitespace Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Acked-by: Mel Gorman --- mm/page_alloc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 918330f71dba..6fb46c1589b9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -513,10 +513,10 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, * free pages of length of (1 << order) and marked with _mapcount -2. Page's * order is recorded in page_private(page) field. * So when we are allocating or freeing one, we can derive the state of the - * other. That is, if we allocate a small block, and both were - * free, the remainder of the region must be split into blocks. + * other. That is, if we allocate a small block, and both were + * free, the remainder of the region must be split into blocks. * If a block is freed, and its buddy is also free, then this - * triggers coalescing into a block of larger size. + * triggers coalescing into a block of larger size. * * -- wli */ @@ -1061,17 +1061,17 @@ retry_reserve: return page; } -/* +/* * Obtain a specified number of elements from the buddy allocator, all under * a single hold of the lock, for efficiency. Add them to the supplied list. * Returns the number of new pages which were placed at *list. */ -static int rmqueue_bulk(struct zone *zone, unsigned int order, +static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, int cold) { int i; - + spin_lock(&zone->lock); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype); @@ -4301,7 +4301,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, init_waitqueue_head(&pgdat->kswapd_wait); pgdat->kswapd_max_order = 0; pgdat_page_cgroup_init(pgdat); - + for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, memmap_pages; -- cgit v1.2.3 From 041d3a8cdc18dc375a128d90bbb753949a81b1fb Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 29 Dec 2011 13:09:50 +0100 Subject: mm: page_alloc: introduce alloc_contig_range() This commit adds the alloc_contig_range() function which tries to allocate given range of pages. It tries to migrate all already allocated pages that fall in the range thus freeing them. Once all pages in the range are freed they are removed from the buddy system thus allocated for the caller to use. Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Acked-by: Mel Gorman Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song --- mm/page_alloc.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6fb46c1589b9..2c38a30d064e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include @@ -5550,6 +5551,193 @@ out: spin_unlock_irqrestore(&zone->lock, flags); } +#ifdef CONFIG_CMA + +static unsigned long pfn_max_align_down(unsigned long pfn) +{ + return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES, + pageblock_nr_pages) - 1); +} + +static unsigned long pfn_max_align_up(unsigned long pfn) +{ + return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES, + pageblock_nr_pages)); +} + +static struct page * +__alloc_contig_migrate_alloc(struct page *page, unsigned long private, + int **resultp) +{ + return alloc_page(GFP_HIGHUSER_MOVABLE); +} + +/* [start, end) must belong to a single zone. */ +static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) +{ + /* This function is based on compact_zone() from compaction.c. */ + + unsigned long pfn = start; + unsigned int tries = 0; + int ret = 0; + + struct compact_control cc = { + .nr_migratepages = 0, + .order = -1, + .zone = page_zone(pfn_to_page(start)), + .sync = true, + }; + INIT_LIST_HEAD(&cc.migratepages); + + migrate_prep_local(); + + while (pfn < end || !list_empty(&cc.migratepages)) { + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } + + if (list_empty(&cc.migratepages)) { + cc.nr_migratepages = 0; + pfn = isolate_migratepages_range(cc.zone, &cc, + pfn, end); + if (!pfn) { + ret = -EINTR; + break; + } + tries = 0; + } else if (++tries == 5) { + ret = ret < 0 ? ret : -EBUSY; + break; + } + + ret = migrate_pages(&cc.migratepages, + __alloc_contig_migrate_alloc, + 0, false, true); + } + + putback_lru_pages(&cc.migratepages); + return ret > 0 ? 0 : ret; +} + +/** + * alloc_contig_range() -- tries to allocate given range of pages + * @start: start PFN to allocate + * @end: one-past-the-last PFN to allocate + * + * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES + * aligned, however it's the caller's responsibility to guarantee that + * we are the only thread that changes migrate type of pageblocks the + * pages fall in. + * + * The PFN range must belong to a single zone. + * + * Returns zero on success or negative error code. On success all + * pages which PFN is in [start, end) are allocated for the caller and + * need to be freed with free_contig_range(). + */ +int alloc_contig_range(unsigned long start, unsigned long end) +{ + struct zone *zone = page_zone(pfn_to_page(start)); + unsigned long outer_start, outer_end; + int ret = 0, order; + + /* + * What we do here is we mark all pageblocks in range as + * MIGRATE_ISOLATE. Because pageblock and max order pages may + * have different sizes, and due to the way page allocator + * work, we align the range to biggest of the two pages so + * that page allocator won't try to merge buddies from + * different pageblocks and change MIGRATE_ISOLATE to some + * other migration type. + * + * Once the pageblocks are marked as MIGRATE_ISOLATE, we + * migrate the pages from an unaligned range (ie. pages that + * we are interested in). This will put all the pages in + * range back to page allocator as MIGRATE_ISOLATE. + * + * When this is done, we take the pages in range from page + * allocator removing them from the buddy system. This way + * page allocator will never consider using them. + * + * This lets us mark the pageblocks back as + * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the + * aligned range but not in the unaligned, original range are + * put back to page allocator so that buddy can use them. + */ + + ret = start_isolate_page_range(pfn_max_align_down(start), + pfn_max_align_up(end)); + if (ret) + goto done; + + ret = __alloc_contig_migrate_range(start, end); + if (ret) + goto done; + + /* + * Pages from [start, end) are within a MAX_ORDER_NR_PAGES + * aligned blocks that are marked as MIGRATE_ISOLATE. What's + * more, all pages in [start, end) are free in page allocator. + * What we are going to do is to allocate all pages from + * [start, end) (that is remove them from page allocator). + * + * The only problem is that pages at the beginning and at the + * end of interesting range may be not aligned with pages that + * page allocator holds, ie. they can be part of higher order + * pages. Because of this, we reserve the bigger range and + * once this is done free the pages we are not interested in. + * + * We don't have to hold zone->lock here because the pages are + * isolated thus they won't get removed from buddy. + */ + + lru_add_drain_all(); + drain_all_pages(); + + order = 0; + outer_start = start; + while (!PageBuddy(pfn_to_page(outer_start))) { + if (++order >= MAX_ORDER) { + ret = -EBUSY; + goto done; + } + outer_start &= ~0UL << order; + } + + /* Make sure the range is really isolated. */ + if (test_pages_isolated(outer_start, end)) { + pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", + outer_start, end); + ret = -EBUSY; + goto done; + } + + outer_end = isolate_freepages_range(outer_start, end); + if (!outer_end) { + ret = -EBUSY; + goto done; + } + + /* Free head and tail (if any) */ + if (start != outer_start) + free_contig_range(outer_start, start - outer_start); + if (end != outer_end) + free_contig_range(end, outer_end - end); + +done: + undo_isolate_page_range(pfn_max_align_down(start), + pfn_max_align_up(end)); + return ret; +} + +void free_contig_range(unsigned long pfn, unsigned nr_pages) +{ + for (; nr_pages--; ++pfn) + __free_page(pfn_to_page(pfn)); +} +#endif + #ifdef CONFIG_MEMORY_HOTREMOVE /* * All pages in the range must be isolated before calling this. -- cgit v1.2.3 From 6d4a49160de2c684fb59fa627bce80e200224331 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Wed, 11 Jan 2012 15:31:33 +0100 Subject: mm: page_alloc: change fallbacks array handling This commit adds a row for MIGRATE_ISOLATE type to the fallbacks array which was missing from it. It also, changes the array traversal logic a little making MIGRATE_RESERVE an end marker. The letter change, removes the implicit MIGRATE_UNMOVABLE from the end of each row which was read by __rmqueue_fallback() function. Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Acked-by: Mel Gorman Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song --- mm/page_alloc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2c38a30d064e..d6b580c660f5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -875,11 +875,12 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, * This array describes the order lists are fallen back to when * the free lists for the desirable migrate type are depleted */ -static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { +static int fallbacks[MIGRATE_TYPES][3] = { [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, - [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ + [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ + [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ }; /* @@ -974,12 +975,12 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) /* Find the largest possible block of pages in the other list */ for (current_order = MAX_ORDER-1; current_order >= order; --current_order) { - for (i = 0; i < MIGRATE_TYPES - 1; i++) { + for (i = 0;; i++) { migratetype = fallbacks[start_migratetype][i]; /* MIGRATE_RESERVE handled later if necessary */ if (migratetype == MIGRATE_RESERVE) - continue; + break; area = &(zone->free_area[current_order]); if (list_empty(&area->free_list[migratetype])) -- cgit v1.2.3 From 47118af076f64844b4f423bc2f545b2da9dab50d Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Thu, 29 Dec 2011 13:09:50 +0100 Subject: mm: mmzone: MIGRATE_CMA migration type added The MIGRATE_CMA migration type has two main characteristics: (i) only movable pages can be allocated from MIGRATE_CMA pageblocks and (ii) page allocator will never change migration type of MIGRATE_CMA pageblocks. This guarantees (to some degree) that page in a MIGRATE_CMA page block can always be migrated somewhere else (unless there's no memory left in the system). It is designed to be used for allocating big chunks (eg. 10MiB) of physically contiguous memory. Once driver requests contiguous memory, pages from MIGRATE_CMA pageblocks may be migrated away to create a contiguous block. To minimise number of migrations, MIGRATE_CMA migration type is the last type tried when page allocator falls back to other migration types when requested. Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park Acked-by: Mel Gorman Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song --- mm/page_alloc.c | 76 +++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 16 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d6b580c660f5..0869eb1e9461 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -750,6 +750,24 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order) __free_pages(page, order); } +#ifdef CONFIG_CMA +/* Free whole pageblock and set it's migration type to MIGRATE_CMA. */ +void __init init_cma_reserved_pageblock(struct page *page) +{ + unsigned i = pageblock_nr_pages; + struct page *p = page; + + do { + __ClearPageReserved(p); + set_page_count(p, 0); + } while (++p, --i); + + set_page_refcounted(page); + set_pageblock_migratetype(page, MIGRATE_CMA); + __free_pages(page, pageblock_order); + totalram_pages += pageblock_nr_pages; +} +#endif /* * The order of subdivision here is critical for the IO subsystem. @@ -875,10 +893,15 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, * This array describes the order lists are fallen back to when * the free lists for the desirable migrate type are depleted */ -static int fallbacks[MIGRATE_TYPES][3] = { - [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, - [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, - [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, +static int fallbacks[MIGRATE_TYPES][4] = { + [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, + [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, +#ifdef CONFIG_CMA + [MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, + [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ +#else + [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, +#endif [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ }; @@ -995,11 +1018,18 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) * pages to the preferred allocation list. If falling * back for a reclaimable kernel allocation, be more * aggressive about taking ownership of free pages + * + * On the other hand, never change migration + * type of MIGRATE_CMA pageblocks nor move CMA + * pages on different free lists. We don't + * want unmovable pages to be allocated from + * MIGRATE_CMA areas. */ - if (unlikely(current_order >= (pageblock_order >> 1)) || - start_migratetype == MIGRATE_RECLAIMABLE || - page_group_by_mobility_disabled) { - unsigned long pages; + if (!is_migrate_cma(migratetype) && + (unlikely(current_order >= pageblock_order / 2) || + start_migratetype == MIGRATE_RECLAIMABLE || + page_group_by_mobility_disabled)) { + int pages; pages = move_freepages_block(zone, page, start_migratetype); @@ -1017,11 +1047,14 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) rmv_page_order(page); /* Take ownership for orders >= pageblock_order */ - if (current_order >= pageblock_order) + if (current_order >= pageblock_order && + !is_migrate_cma(migratetype)) change_pageblock_range(page, current_order, start_migratetype); - expand(zone, page, order, current_order, area, migratetype); + expand(zone, page, order, current_order, area, + is_migrate_cma(migratetype) + ? migratetype : start_migratetype); trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, migratetype); @@ -1072,7 +1105,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, int cold) { - int i; + int mt = migratetype, i; spin_lock(&zone->lock); for (i = 0; i < count; ++i) { @@ -1093,7 +1126,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, list_add(&page->lru, list); else list_add_tail(&page->lru, list); - set_page_private(page, migratetype); + if (IS_ENABLED(CONFIG_CMA)) { + mt = get_pageblock_migratetype(page); + if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) + mt = migratetype; + } + set_page_private(page, mt); list = &page->lru; } __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); @@ -1373,8 +1411,12 @@ int split_free_page(struct page *page) if (order >= pageblock_order - 1) { struct page *endpage = page + (1 << order) - 1; - for (; page < endpage; page += pageblock_nr_pages) - set_pageblock_migratetype(page, MIGRATE_MOVABLE); + for (; page < endpage; page += pageblock_nr_pages) { + int mt = get_pageblock_migratetype(page); + if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt)) + set_pageblock_migratetype(page, + MIGRATE_MOVABLE); + } } return 1 << order; @@ -5414,14 +5456,16 @@ static int __count_immobile_pages(struct zone *zone, struct page *page, int count) { unsigned long pfn, iter, found; + int mt; + /* * For avoiding noise data, lru_add_drain_all() should be called * If ZONE_MOVABLE, the zone never contains immobile pages */ if (zone_idx(zone) == ZONE_MOVABLE) return true; - - if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE) + mt = get_pageblock_migratetype(page); + if (mt == MIGRATE_MOVABLE || is_migrate_cma(mt)) return true; pfn = page_to_pfn(page); -- cgit v1.2.3 From 0815f3d81d76dfbf2abcfd93a85ff0a6008fe4c0 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Tue, 3 Apr 2012 15:06:15 +0200 Subject: mm: page_isolation: MIGRATE_CMA isolation functions added This commit changes various functions that change pages and pageblocks migrate type between MIGRATE_ISOLATE and MIGRATE_MOVABLE in such a way as to allow to work with MIGRATE_CMA migrate type. Signed-off-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song --- mm/page_alloc.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0869eb1e9461..116c087f76bb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5582,7 +5582,7 @@ out: return ret; } -void unset_migratetype_isolate(struct page *page) +void unset_migratetype_isolate(struct page *page, unsigned migratetype) { struct zone *zone; unsigned long flags; @@ -5590,8 +5590,8 @@ void unset_migratetype_isolate(struct page *page) spin_lock_irqsave(&zone->lock, flags); if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) goto out; - set_pageblock_migratetype(page, MIGRATE_MOVABLE); - move_freepages_block(zone, page, MIGRATE_MOVABLE); + set_pageblock_migratetype(page, migratetype); + move_freepages_block(zone, page, migratetype); out: spin_unlock_irqrestore(&zone->lock, flags); } @@ -5669,6 +5669,10 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) * alloc_contig_range() -- tries to allocate given range of pages * @start: start PFN to allocate * @end: one-past-the-last PFN to allocate + * @migratetype: migratetype of the underlaying pageblocks (either + * #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks + * in range must have the same migratetype and it must + * be either of the two. * * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES * aligned, however it's the caller's responsibility to guarantee that @@ -5681,7 +5685,8 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) * pages which PFN is in [start, end) are allocated for the caller and * need to be freed with free_contig_range(). */ -int alloc_contig_range(unsigned long start, unsigned long end) +int alloc_contig_range(unsigned long start, unsigned long end, + unsigned migratetype) { struct zone *zone = page_zone(pfn_to_page(start)); unsigned long outer_start, outer_end; @@ -5712,7 +5717,7 @@ int alloc_contig_range(unsigned long start, unsigned long end) */ ret = start_isolate_page_range(pfn_max_align_down(start), - pfn_max_align_up(end)); + pfn_max_align_up(end), migratetype); if (ret) goto done; @@ -5772,7 +5777,7 @@ int alloc_contig_range(unsigned long start, unsigned long end) done: undo_isolate_page_range(pfn_max_align_down(start), - pfn_max_align_up(end)); + pfn_max_align_up(end), migratetype); return ret; } -- cgit v1.2.3 From cfd3da1e49bb95c355c01c0f502d657deb3d34a4 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 25 Apr 2011 21:36:42 +0000 Subject: mm: Serialize access to min_free_kbytes There is a race between the min_free_kbytes sysctl, memory hotplug and transparent hugepage support enablement. Memory hotplug uses a zonelists_mutex to avoid a race when building zonelists. Reuse it to serialise watermark updates. [a.p.zijlstra@chello.nl: Older patch fixed the race with spinlock] Signed-off-by: Mel Gorman Signed-off-by: Marek Szyprowski Reviewed-by: KAMEZAWA Hiroyuki Tested-by: Barry Song --- mm/page_alloc.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 116c087f76bb..8be37bcda0b2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5020,14 +5020,7 @@ static void setup_per_zone_lowmem_reserve(void) calculate_totalreserve_pages(); } -/** - * setup_per_zone_wmarks - called when min_free_kbytes changes - * or when memory is hot-{added|removed} - * - * Ensures that the watermark[min,low,high] values for each zone are set - * correctly with respect to min_free_kbytes. - */ -void setup_per_zone_wmarks(void) +static void __setup_per_zone_wmarks(void) { unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); unsigned long lowmem_pages = 0; @@ -5082,6 +5075,20 @@ void setup_per_zone_wmarks(void) calculate_totalreserve_pages(); } +/** + * setup_per_zone_wmarks - called when min_free_kbytes changes + * or when memory is hot-{added|removed} + * + * Ensures that the watermark[min,low,high] values for each zone are set + * correctly with respect to min_free_kbytes. + */ +void setup_per_zone_wmarks(void) +{ + mutex_lock(&zonelists_mutex); + __setup_per_zone_wmarks(); + mutex_unlock(&zonelists_mutex); +} + /* * The inactive anon list should be small enough that the VM never has to * do too much work, but large enough that each inactive page has a chance -- cgit v1.2.3 From bba9071087108d3de70bea274e35064cc480487b Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 25 Jan 2012 12:09:52 +0100 Subject: mm: extract reclaim code from __alloc_pages_direct_reclaim() This patch extracts common reclaim code from __alloc_pages_direct_reclaim() function to separate function: __perform_reclaim() which can be later used by alloc_contig_range(). Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park Cc: Michal Nazarewicz Acked-by: Mel Gorman Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song --- mm/page_alloc.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8be37bcda0b2..4615531dcf66 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2130,16 +2130,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, } #endif /* CONFIG_COMPACTION */ -/* The really slow allocator path where we enter direct reclaim */ -static inline struct page * -__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, enum zone_type high_zoneidx, - nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, unsigned long *did_some_progress) +/* Perform direct synchronous page reclaim */ +static int +__perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, + nodemask_t *nodemask) { - struct page *page = NULL; struct reclaim_state reclaim_state; - bool drained = false; + int progress; cond_resched(); @@ -2150,7 +2147,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, reclaim_state.reclaimed_slab = 0; current->reclaim_state = &reclaim_state; - *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); + progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); current->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); @@ -2158,6 +2155,21 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, cond_resched(); + return progress; +} + +/* The really slow allocator path where we enter direct reclaim */ +static inline struct page * +__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist, enum zone_type high_zoneidx, + nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, + int migratetype, unsigned long *did_some_progress) +{ + struct page *page = NULL; + bool drained = false; + + *did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, + nodemask); if (unlikely(!(*did_some_progress))) return NULL; -- cgit v1.2.3 From 49f223a9cd96c7293d7258ff88c2bdf83065f69c Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 25 Jan 2012 12:49:24 +0100 Subject: mm: trigger page reclaim in alloc_contig_range() to stabilise watermarks alloc_contig_range() performs memory allocation so it also should keep track on keeping the correct level of memory watermarks. This commit adds a call to *_slowpath style reclaim to grab enough pages to make sure that the final collection of contiguous pages from freelists will not starve the system. Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park CC: Michal Nazarewicz Tested-by: Rob Clark Tested-by: Ohad Ben-Cohen Tested-by: Benjamin Gaignard Tested-by: Robert Nelson Tested-by: Barry Song --- mm/page_alloc.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4615531dcf66..22348ae1005d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5079,6 +5079,11 @@ static void __setup_per_zone_wmarks(void) zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); + + zone->watermark[WMARK_MIN] += cma_wmark_pages(zone); + zone->watermark[WMARK_LOW] += cma_wmark_pages(zone); + zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone); + setup_zone_migrate_reserve(zone); spin_unlock_irqrestore(&zone->lock, flags); } @@ -5684,6 +5689,54 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) return ret > 0 ? 0 : ret; } +/* + * Update zone's cma pages counter used for watermark level calculation. + */ +static inline void __update_cma_watermarks(struct zone *zone, int count) +{ + unsigned long flags; + spin_lock_irqsave(&zone->lock, flags); + zone->min_cma_pages += count; + spin_unlock_irqrestore(&zone->lock, flags); + setup_per_zone_wmarks(); +} + +/* + * Trigger memory pressure bump to reclaim some pages in order to be able to + * allocate 'count' pages in single page units. Does similar work as + *__alloc_pages_slowpath() function. + */ +static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count) +{ + enum zone_type high_zoneidx = gfp_zone(gfp_mask); + struct zonelist *zonelist = node_zonelist(0, gfp_mask); + int did_some_progress = 0; + int order = 1; + + /* + * Increase level of watermarks to force kswapd do his job + * to stabilise at new watermark level. + */ + __update_cma_watermarks(zone, count); + + /* Obey watermarks as if the page was being allocated */ + while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) { + wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone)); + + did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, + NULL); + if (!did_some_progress) { + /* Exhausted what can be done so it's blamo time */ + out_of_memory(zonelist, gfp_mask, order, NULL, false); + } + } + + /* Restore original watermark levels. */ + __update_cma_watermarks(zone, -count); + + return count; +} + /** * alloc_contig_range() -- tries to allocate given range of pages * @start: start PFN to allocate @@ -5782,6 +5835,13 @@ int alloc_contig_range(unsigned long start, unsigned long end, goto done; } + /* + * Reclaim enough pages to make sure that contiguous allocation + * will not starve the system. + */ + __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); + + /* Grab isolated pages from freelists. */ outer_end = isolate_freepages_range(outer_start, end); if (!outer_end) { ret = -EBUSY; -- cgit v1.2.3 From 58f42fd54144346898e6dc6d6ae3acd4c591b42f Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Fri, 11 May 2012 09:37:13 +0200 Subject: cma: fix migration mode __alloc_contig_migrate_range calls migrate_pages with wrong argument for migrate_mode. Fix it. Cc: Marek Szyprowski Signed-off-by: Minchan Kim Acked-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 22348ae1005d..ed85c02c1f6e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5682,7 +5682,7 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) ret = migrate_pages(&cc.migratepages, __alloc_contig_migrate_alloc, - 0, false, true); + 0, false, MIGRATE_SYNC); } putback_lru_pages(&cc.migratepages); -- cgit v1.2.3