From 15fb09722df32b7685be1cbcac198bb556ddaffe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 09:58:07 +0200 Subject: memblock: Use MEMBLOCK_ALLOC_ACCESSIBLE instead of ANYWHERE in memblock_alloc_try_nid() After node affine allocation fails, memblock_alloc_try_nid() calls memblock_alloc_base() with @max_addr set to MEMBLOCK_ALLOC_ANYWHERE. This is inconsistent with memblock_alloc() and what the function's sole user - sparc/mm/init_64 - expects, although it doesn't make any difference as sparc64 doesn't have highmem and ACCESSIBLE equals ANYWHERE. This patch makes memblock_alloc_try_nid() use ACCESSIBLE instead of ANYWHERE. This isn't complete as node affine allocation doesn't consider memblock.current_limit. It will be handled with future changes. This patch doesn't introduce any behavior difference. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310457490-3356-4-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index a0562d1a6ad4..87e512dc8018 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -612,7 +612,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i if (res) return res; - return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE); + return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); } -- cgit v1.2.3 From 348968eb151e2569ad0ebe19b2f9c3c25b5c816a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 09:58:08 +0200 Subject: memblock: Use round_up/down() instead of memblock_align_up/down() Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310457490-3356-5-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/memblock.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 87e512dc8018..9882a88d4a10 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -41,17 +41,6 @@ static inline const char *memblock_type_name(struct memblock_type *type) /* * Address comparison utilities */ - -static phys_addr_t __init_memblock memblock_align_down(phys_addr_t addr, phys_addr_t size) -{ - return addr & ~(size - 1); -} - -static phys_addr_t __init_memblock memblock_align_up(phys_addr_t addr, phys_addr_t size) -{ - return (addr + (size - 1)) & ~(size - 1); -} - static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, phys_addr_t size2) { @@ -87,7 +76,7 @@ static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_ if (end < size) return MEMBLOCK_ERROR; - base = memblock_align_down((end - size), align); + base = round_down(end - size, align); /* Prevent allocations returning 0 as it's also used to * indicate an allocation failure @@ -102,7 +91,7 @@ static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_ res_base = memblock.reserved.regions[j].base; if (res_base < size) break; - base = memblock_align_down(res_base - size, align); + base = round_down(res_base - size, align); } return MEMBLOCK_ERROR; @@ -486,7 +475,7 @@ phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, ph /* We align the size to limit fragmentation. Without this, a lot of * small allocs quickly eat up the whole reserve array on sparc */ - size = memblock_align_up(size, align); + size = round_up(size, align); found = memblock_find_base(size, align, 0, max_addr); if (found != MEMBLOCK_ERROR && @@ -562,7 +551,7 @@ static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, start = mp->base; end = start + mp->size; - start = memblock_align_up(start, align); + start = round_up(start, align); while (start < end) { phys_addr_t this_end; int this_nid; @@ -590,7 +579,7 @@ phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int n /* We align the size to limit fragmentation. Without this, a lot of * small allocs quickly eat up the whole reserve array on sparc */ - size = memblock_align_up(size, align); + size = round_up(size, align); /* We do a bottom-up search for a region with the right * nid since that's easier considering how memblock_nid_range() -- cgit v1.2.3 From 1f5026a7e21e409c2b9dd54f6dfb9446511fb7c5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 09:58:09 +0200 Subject: memblock: Kill MEMBLOCK_ERROR 25818f0f28 (memblock: Make MEMBLOCK_ERROR be 0) thankfully made MEMBLOCK_ERROR 0 and there already are codes which expect error return to be 0. There's no point in keeping MEMBLOCK_ERROR around. End its misery. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310457490-3356-6-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/memblock.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 9882a88d4a10..196993661346 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -74,7 +74,7 @@ static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_ /* In case, huge size is requested */ if (end < size) - return MEMBLOCK_ERROR; + return 0; base = round_down(end - size, align); @@ -94,7 +94,7 @@ static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_ base = round_down(res_base - size, align); } - return MEMBLOCK_ERROR; + return 0; } static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size, @@ -126,10 +126,10 @@ static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size, if (bottom >= top) continue; found = memblock_find_region(bottom, top, size, align); - if (found != MEMBLOCK_ERROR) + if (found) return found; } - return MEMBLOCK_ERROR; + return 0; } /* @@ -214,10 +214,10 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) */ if (use_slab) { new_array = kmalloc(new_size, GFP_KERNEL); - addr = new_array == NULL ? MEMBLOCK_ERROR : __pa(new_array); + addr = new_array ? __pa(new_array) : 0; } else addr = memblock_find_base(new_size, sizeof(phys_addr_t), 0, MEMBLOCK_ALLOC_ACCESSIBLE); - if (addr == MEMBLOCK_ERROR) { + if (!addr) { pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", memblock_type_name(type), type->max, type->max * 2); return -1; @@ -478,8 +478,7 @@ phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, ph size = round_up(size, align); found = memblock_find_base(size, align, 0, max_addr); - if (found != MEMBLOCK_ERROR && - !memblock_add_region(&memblock.reserved, found, size)) + if (found && !memblock_add_region(&memblock.reserved, found, size)) return found; return 0; @@ -559,14 +558,14 @@ static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, this_end = memblock_nid_range(start, end, &this_nid); if (this_nid == nid) { phys_addr_t ret = memblock_find_region(start, this_end, size, align); - if (ret != MEMBLOCK_ERROR && + if (ret && !memblock_add_region(&memblock.reserved, ret, size)) return ret; } start = this_end; } - return MEMBLOCK_ERROR; + return 0; } phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) @@ -588,7 +587,7 @@ phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int n for (i = 0; i < mem->cnt; i++) { phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i], size, align, nid); - if (ret != MEMBLOCK_ERROR) + if (ret) return ret; } -- cgit v1.2.3 From fc769a8e70a3348d5de49e5f69f6aff810157360 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 09:58:10 +0200 Subject: memblock: Replace memblock_find_base() with memblock_find_in_range() memblock_find_base() is a static function with two callers in memblock.c and memblock_find_in_range() is a wrapper around it which just changes the types and order of parameters. Make memblock_find_in_range() take phys_addr_t instead of u64 for consistency and replace memblock_find_base() with it. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310457490-3356-7-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/memblock.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 196993661346..0f9626f01b5e 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -97,8 +97,11 @@ static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_ return 0; } -static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size, - phys_addr_t align, phys_addr_t start, phys_addr_t end) +/* + * Find a free area with specified alignment in a specific range. + */ +phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, phys_addr_t end, + phys_addr_t size, phys_addr_t align) { long i; @@ -132,14 +135,6 @@ static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size, return 0; } -/* - * Find a free area with specified alignment in a specific range. - */ -u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 align) -{ - return memblock_find_base(size, align, start, end); -} - /* * Free memblock.reserved.regions */ @@ -216,7 +211,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) new_array = kmalloc(new_size, GFP_KERNEL); addr = new_array ? __pa(new_array) : 0; } else - addr = memblock_find_base(new_size, sizeof(phys_addr_t), 0, MEMBLOCK_ALLOC_ACCESSIBLE); + addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t)); if (!addr) { pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", memblock_type_name(type), type->max, type->max * 2); @@ -477,7 +472,7 @@ phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, ph */ size = round_up(size, align); - found = memblock_find_base(size, align, 0, max_addr); + found = memblock_find_in_range(0, max_addr, size, align); if (found && !memblock_add_region(&memblock.reserved, found, size)) return found; -- cgit v1.2.3 From b2fea988f4f3b38ff4edfc1556a843c91932804c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 10:46:31 +0200 Subject: memblock: Improve generic memblock_nid_range() using for_each_mem_pfn_range() Given an address range, memblock_nid_range() determines the node the start of the range belongs to and upto where the range stays in the same node. It's implemented by calling get_pfn_range_for_nid(), which determines min and max pfns for a given node, for each node and testing whether start address falls in there. This is not only inefficient but also incorrect when nodes interleave as min-max ranges for nodes overlap. This patch reimplements memblock_nid_range() using for_each_mem_pfn_range(). It's simpler, walks the mem ranges once and can find the exact range the start address falls in. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310460395-30913-5-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/memblock.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 0f9626f01b5e..97f3486ce4d6 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -511,28 +511,14 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) { #ifdef CONFIG_ARCH_POPULATES_NODE_MAP - /* - * This code originates from sparc which really wants use to walk by addresses - * and returns the nid. This is not very convenient for early_pfn_map[] users - * as the map isn't sorted yet, and it really wants to be walked by nid. - * - * For now, I implement the inefficient method below which walks the early - * map multiple times. Eventually we may want to use an ARCH config option - * to implement a completely different method for both case. - */ unsigned long start_pfn, end_pfn; int i; - for (i = 0; i < MAX_NUMNODES; i++) { - get_pfn_range_for_nid(i, &start_pfn, &end_pfn); - if (start < PFN_PHYS(start_pfn) || start >= PFN_PHYS(end_pfn)) - continue; - *nid = i; - return min(end, PFN_PHYS(end_pfn)); - } + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, nid) + if (start >= PFN_PHYS(start_pfn) && start < PFN_PHYS(end_pfn)) + return min(end, PFN_PHYS(end_pfn)); #endif *nid = 0; - return end; } -- cgit v1.2.3 From f9b18db3b1cedc75e5d002a4d7097891c3399736 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 10:46:32 +0200 Subject: memblock: Don't allow archs to override memblock_nid_range() memblock_nid_range() is used to implement memblock_[try_]alloc_nid(). The generic version determines the range by walking early_node_map with for_each_mem_pfn_range(). The generic version is defined __weak to allow arch override. Currently, only sparc overrides it; however, with the previous update to the generic implementation, there isn't much to be gained with arch override. Sparc would behave exactly the same with the generic implementation. This patch disallows arch override for memblock_nid_range() and make both generic and sparc versions static. sparc is only compile tested. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310460395-30913-6-git-send-email-tj@kernel.org Cc: "David S. Miller" Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 97f3486ce4d6..22cd999b0d4e 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -508,7 +508,7 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) * have been done to populate it. */ -phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) +static phys_addr_t __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) { #ifdef CONFIG_ARCH_POPULATES_NODE_MAP unsigned long start_pfn, end_pfn; -- cgit v1.2.3 From 34e1845548418e5cecee0568ba721e1f089c092c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 10:46:33 +0200 Subject: memblock: Make memblock_alloc_[try_]nid() top-down NUMA aware memblock alloc functions - memblock_alloc_[try_]nid() - weren't properly top-down because memblock_nid_range() scanned forward. This patch reverses memblock_nid_range(), renames it to memblock_nid_range_rev() and updates related functions to implement proper top-down allocation. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310460395-30913-7-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/memblock.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 22cd999b0d4e..447cf64304ba 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -499,27 +499,26 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) /* - * Additional node-local allocators. Search for node memory is bottom up - * and walks memblock regions within that node bottom-up as well, but allocation - * within an memblock region is top-down. XXX I plan to fix that at some stage + * Additional node-local top-down allocators. * * WARNING: Only available after early_node_map[] has been populated, * on some architectures, that is after all the calls to add_active_range() * have been done to populate it. */ -static phys_addr_t __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) +static phys_addr_t __init memblock_nid_range_rev(phys_addr_t start, + phys_addr_t end, int *nid) { #ifdef CONFIG_ARCH_POPULATES_NODE_MAP unsigned long start_pfn, end_pfn; int i; for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, nid) - if (start >= PFN_PHYS(start_pfn) && start < PFN_PHYS(end_pfn)) - return min(end, PFN_PHYS(end_pfn)); + if (end > PFN_PHYS(start_pfn) && end <= PFN_PHYS(end_pfn)) + return max(start, PFN_PHYS(start_pfn)); #endif *nid = 0; - return end; + return start; } static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, @@ -531,21 +530,19 @@ static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, start = mp->base; end = start + mp->size; - start = round_up(start, align); while (start < end) { - phys_addr_t this_end; + phys_addr_t this_start; int this_nid; - this_end = memblock_nid_range(start, end, &this_nid); + this_start = memblock_nid_range_rev(start, end, &this_nid); if (this_nid == nid) { - phys_addr_t ret = memblock_find_region(start, this_end, size, align); + phys_addr_t ret = memblock_find_region(this_start, end, size, align); if (ret && !memblock_add_region(&memblock.reserved, ret, size)) return ret; } - start = this_end; + end = this_start; } - return 0; } @@ -561,11 +558,7 @@ phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int n */ size = round_up(size, align); - /* We do a bottom-up search for a region with the right - * nid since that's easier considering how memblock_nid_range() - * works - */ - for (i = 0; i < mem->cnt; i++) { + for (i = mem->cnt - 1; i >= 0; i--) { phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i], size, align, nid); if (ret) -- cgit v1.2.3 From e64980405cc6aa74ef178d8d9aa4018c867ceed1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 10:46:34 +0200 Subject: memblock: Separate out memblock_find_in_range_node() Node affine memblock allocation logic is currently implemented across memblock_alloc_nid() and memblock_alloc_nid_region(). This reorganizes it such that it resembles that of non-NUMA allocation API. Area finding is collected and moved into new exported function memblock_find_in_range_node() which is symmetrical to non-NUMA counterpart - it handles @start/@end and understands ANYWHERE and ACCESSIBLE. memblock_alloc_nid() now simply calls memblock_find_in_range_node() and reserves the returned area. This makes memblock_alloc[_try]_nid() observe ACCESSIBLE limit on node affine allocations too (again, this doesn't make any difference for the current sole user - sparc64). Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310460395-30913-8-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/memblock.c | 57 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 25 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 447cf64304ba..a8edb422795b 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -521,49 +521,56 @@ static phys_addr_t __init memblock_nid_range_rev(phys_addr_t start, return start; } -static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, +phys_addr_t __init memblock_find_in_range_node(phys_addr_t start, + phys_addr_t end, phys_addr_t size, phys_addr_t align, int nid) { - phys_addr_t start, end; + struct memblock_type *mem = &memblock.memory; + int i; - start = mp->base; - end = start + mp->size; + BUG_ON(0 == size); - while (start < end) { - phys_addr_t this_start; - int this_nid; + /* Pump up max_addr */ + if (end == MEMBLOCK_ALLOC_ACCESSIBLE) + end = memblock.current_limit; - this_start = memblock_nid_range_rev(start, end, &this_nid); - if (this_nid == nid) { - phys_addr_t ret = memblock_find_region(this_start, end, size, align); - if (ret && - !memblock_add_region(&memblock.reserved, ret, size)) - return ret; + for (i = mem->cnt - 1; i >= 0; i--) { + struct memblock_region *r = &mem->regions[i]; + phys_addr_t base = max(start, r->base); + phys_addr_t top = min(end, r->base + r->size); + + while (base < top) { + phys_addr_t tbase, ret; + int tnid; + + tbase = memblock_nid_range_rev(base, top, &tnid); + if (nid == MAX_NUMNODES || tnid == nid) { + ret = memblock_find_region(tbase, top, size, align); + if (ret) + return ret; + } + top = tbase; } - end = this_start; } + return 0; } phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) { - struct memblock_type *mem = &memblock.memory; - int i; - - BUG_ON(0 == size); + phys_addr_t found; - /* We align the size to limit fragmentation. Without this, a lot of + /* + * We align the size to limit fragmentation. Without this, a lot of * small allocs quickly eat up the whole reserve array on sparc */ size = round_up(size, align); - for (i = mem->cnt - 1; i >= 0; i--) { - phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i], - size, align, nid); - if (ret) - return ret; - } + found = memblock_find_in_range_node(0, MEMBLOCK_ALLOC_ACCESSIBLE, + size, align, nid); + if (found && !memblock_add_region(&memblock.reserved, found, size)) + return found; return 0; } -- cgit v1.2.3 From ed7b56a799cade11f458cd83e1150af54a66b7e8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 11:15:54 +0200 Subject: memblock: Remove memblock_memory_can_coalesce() Arch could implement memblock_memor_can_coalesce() to veto merging of adjacent or overlapping memblock regions; however, no arch did and any vetoing would trigger WARN_ON(). Memblock regions are supposed to deal with proper memory anyway. Remove the unused hook. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310462166-31469-2-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/memblock.c | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index a8edb422795b..bd3a3a9591d4 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -251,12 +251,6 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) return 0; } -extern int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, - phys_addr_t addr2, phys_addr_t size2) -{ - return 1; -} - static long __init_memblock memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { @@ -282,17 +276,6 @@ static long __init_memblock memblock_add_region(struct memblock_type *type, * of a block. */ if (base < rgn->base && end >= rgn->base) { - /* If we can't coalesce, create a new block */ - if (!memblock_memory_can_coalesce(base, size, - rgn->base, - rgn->size)) { - /* Overlap & can't coalesce are mutually - * exclusive, if you do that, be prepared - * for trouble - */ - WARN_ON(end != rgn->base); - goto new_block; - } /* We extend the bottom of the block down to our * base */ @@ -316,17 +299,6 @@ static long __init_memblock memblock_add_region(struct memblock_type *type, * top of a block */ if (base <= rend && end >= rend) { - /* If we can't coalesce, create a new block */ - if (!memblock_memory_can_coalesce(rgn->base, - rgn->size, - base, size)) { - /* Overlap & can't coalesce are mutually - * exclusive, if you do that, be prepared - * for trouble - */ - WARN_ON(rend != base); - goto new_block; - } /* We adjust our base down to enclose the * original block and destroy it. It will be * part of our new allocation. Since we've @@ -349,7 +321,6 @@ static long __init_memblock memblock_add_region(struct memblock_type *type, return 0; } - new_block: /* If we are out of space, we fail. It's too late to resize the array * but then this shouldn't have happened in the first place. */ -- cgit v1.2.3 From 784656f9c680d334e7b4cdb6951c5c913e5a26bf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 11:15:55 +0200 Subject: memblock: Reimplement memblock_add_region() memblock_add_region() carefully checked for merge and overlap conditions while adding a new region, which is complicated and makes it difficult to allow arbitrary overlaps or add more merge conditions (e.g. node ID). This re-implements memblock_add_region() such that insertion is done in two steps - all non-overlapping portions of new area are inserted as separate regions first and then memblock_merge_regions() scan and merge all neighbouring compatible regions. This makes addition logic simpler and more versatile and enables adding node information to memblock. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310462166-31469-3-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/memblock.c | 195 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 110 insertions(+), 85 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index bd3a3a9591d4..992aa1807473 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -251,117 +251,142 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) return 0; } -static long __init_memblock memblock_add_region(struct memblock_type *type, - phys_addr_t base, phys_addr_t size) +/** + * memblock_merge_regions - merge neighboring compatible regions + * @type: memblock type to scan + * + * Scan @type and merge neighboring compatible regions. + */ +static void __init_memblock memblock_merge_regions(struct memblock_type *type) { - phys_addr_t end = base + size; - int i, slot = -1; + int i = 0; - /* First try and coalesce this MEMBLOCK with others */ - for (i = 0; i < type->cnt; i++) { - struct memblock_region *rgn = &type->regions[i]; - phys_addr_t rend = rgn->base + rgn->size; + /* cnt never goes below 1 */ + while (i < type->cnt - 1) { + struct memblock_region *this = &type->regions[i]; + struct memblock_region *next = &type->regions[i + 1]; - /* Exit if there's no possible hits */ - if (rgn->base > end || rgn->size == 0) - break; - - /* Check if we are fully enclosed within an existing - * block - */ - if (rgn->base <= base && rend >= end) - return 0; + if (this->base + this->size != next->base) { + BUG_ON(this->base + this->size > next->base); + i++; + continue; + } - /* Check if we overlap or are adjacent with the bottom - * of a block. - */ - if (base < rgn->base && end >= rgn->base) { - /* We extend the bottom of the block down to our - * base - */ - rgn->base = base; - rgn->size = rend - base; + this->size += next->size; + memmove(next, next + 1, (type->cnt - (i + 1)) * sizeof(*next)); + type->cnt--; + } +} - /* Return if we have nothing else to allocate - * (fully coalesced) - */ - if (rend >= end) - return 0; +/** + * memblock_insert_region - insert new memblock region + * @type: memblock type to insert into + * @idx: index for the insertion point + * @base: base address of the new region + * @size: size of the new region + * + * Insert new memblock region [@base,@base+@size) into @type at @idx. + * @type must already have extra room to accomodate the new region. + */ +static void __init_memblock memblock_insert_region(struct memblock_type *type, + int idx, phys_addr_t base, + phys_addr_t size) +{ + struct memblock_region *rgn = &type->regions[idx]; - /* We continue processing from the end of the - * coalesced block. - */ - base = rend; - size = end - base; - } + BUG_ON(type->cnt >= type->max); + memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); + rgn->base = base; + rgn->size = size; + type->cnt++; +} - /* Now check if we overlap or are adjacent with the - * top of a block - */ - if (base <= rend && end >= rend) { - /* We adjust our base down to enclose the - * original block and destroy it. It will be - * part of our new allocation. Since we've - * freed an entry, we know we won't fail - * to allocate one later, so we won't risk - * losing the original block allocation. - */ - size += (base - rgn->base); - base = rgn->base; - memblock_remove_region(type, i--); - } - } +/** + * memblock_add_region - add new memblock region + * @type: memblock type to add new region into + * @base: base address of the new region + * @size: size of the new region + * + * Add new memblock region [@base,@base+@size) into @type. The new region + * is allowed to overlap with existing ones - overlaps don't affect already + * existing regions. @type is guaranteed to be minimal (all neighbouring + * compatible regions are merged) after the addition. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static long __init_memblock memblock_add_region(struct memblock_type *type, + phys_addr_t base, phys_addr_t size) +{ + bool insert = false; + phys_addr_t obase = base, end = base + size; + int i, nr_new; - /* If the array is empty, special case, replace the fake - * filler region and return - */ - if ((type->cnt == 1) && (type->regions[0].size == 0)) { + /* special case for empty array */ + if (type->regions[0].size == 0) { + WARN_ON(type->cnt != 1); type->regions[0].base = base; type->regions[0].size = size; return 0; } - - /* If we are out of space, we fail. It's too late to resize the array - * but then this shouldn't have happened in the first place. +repeat: + /* + * The following is executed twice. Once with %false @insert and + * then with %true. The first counts the number of regions needed + * to accomodate the new area. The second actually inserts them. */ - if (WARN_ON(type->cnt >= type->max)) - return -1; + base = obase; + nr_new = 0; + + for (i = 0; i < type->cnt; i++) { + struct memblock_region *rgn = &type->regions[i]; + phys_addr_t rbase = rgn->base; + phys_addr_t rend = rbase + rgn->size; - /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */ - for (i = type->cnt - 1; i >= 0; i--) { - if (base < type->regions[i].base) { - type->regions[i+1].base = type->regions[i].base; - type->regions[i+1].size = type->regions[i].size; - } else { - type->regions[i+1].base = base; - type->regions[i+1].size = size; - slot = i + 1; + if (rbase >= end) break; + if (rend <= base) + continue; + /* + * @rgn overlaps. If it separates the lower part of new + * area, insert that portion. + */ + if (rbase > base) { + nr_new++; + if (insert) + memblock_insert_region(type, i++, base, + rbase - base); } + /* area below @rend is dealt with, forget about it */ + base = min(rend, end); } - if (base < type->regions[0].base) { - type->regions[0].base = base; - type->regions[0].size = size; - slot = 0; + + /* insert the remaining portion */ + if (base < end) { + nr_new++; + if (insert) + memblock_insert_region(type, i, base, end - base); } - type->cnt++; - /* The array is full ? Try to resize it. If that fails, we undo - * our allocation and return an error + /* + * If this was the first round, resize array and repeat for actual + * insertions; otherwise, merge and return. */ - if (type->cnt == type->max && memblock_double_array(type)) { - BUG_ON(slot < 0); - memblock_remove_region(type, slot); - return -1; + if (!insert) { + while (type->cnt + nr_new > type->max) + if (memblock_double_array(type) < 0) + return -ENOMEM; + insert = true; + goto repeat; + } else { + memblock_merge_regions(type); + return 0; } - - return 0; } long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) { return memblock_add_region(&memblock.memory, base, size); - } static long __init_memblock __memblock_remove(struct memblock_type *type, -- cgit v1.2.3 From 7c0caeb866b0f648d91bb75b8bc6f86af95bb033 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 11:43:42 +0200 Subject: memblock: Add optional region->nid From 83103b92f3234ec830852bbc5c45911bd6cbdb20 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 11:22:16 +0200 Add optional region->nid which can be enabled by arch using CONFIG_HAVE_MEMBLOCK_NODE_MAP. When enabled, memblock also carries NUMA node information and replaces early_node_map[]. Newly added memblocks have MAX_NUMNODES as nid. Arch can then call memblock_set_node() to set node information. memblock takes care of merging and node affine allocations w.r.t. node information. When MEMBLOCK_NODE_MAP is enabled, early_node_map[], related data structures and functions to manipulate and iterate it are disabled. memblock version of __next_mem_pfn_range() is provided such that for_each_mem_pfn_range() behaves the same and its users don't have to be updated. -v2: Yinghai spotted section mismatch caused by missing __init_memblock in memblock_set_node(). Fixed. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/20110714094342.GF3455@htj.dyndns.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/memblock.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 124 insertions(+), 18 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 992aa1807473..e815f4b75809 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -161,12 +161,8 @@ int __init_memblock memblock_reserve_reserved_regions(void) static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) { - unsigned long i; - - for (i = r; i < type->cnt - 1; i++) { - type->regions[i].base = type->regions[i + 1].base; - type->regions[i].size = type->regions[i + 1].size; - } + memmove(&type->regions[r], &type->regions[r + 1], + (type->cnt - (r + 1)) * sizeof(type->regions[r])); type->cnt--; /* Special case for empty arrays */ @@ -174,6 +170,7 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u type->cnt = 1; type->regions[0].base = 0; type->regions[0].size = 0; + memblock_set_region_node(&type->regions[0], MAX_NUMNODES); } } @@ -266,7 +263,9 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type) struct memblock_region *this = &type->regions[i]; struct memblock_region *next = &type->regions[i + 1]; - if (this->base + this->size != next->base) { + if (this->base + this->size != next->base || + memblock_get_region_node(this) != + memblock_get_region_node(next)) { BUG_ON(this->base + this->size > next->base); i++; continue; @@ -290,7 +289,7 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type) */ static void __init_memblock memblock_insert_region(struct memblock_type *type, int idx, phys_addr_t base, - phys_addr_t size) + phys_addr_t size, int nid) { struct memblock_region *rgn = &type->regions[idx]; @@ -298,6 +297,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type, memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); rgn->base = base; rgn->size = size; + memblock_set_region_node(rgn, nid); type->cnt++; } @@ -327,6 +327,7 @@ static long __init_memblock memblock_add_region(struct memblock_type *type, WARN_ON(type->cnt != 1); type->regions[0].base = base; type->regions[0].size = size; + memblock_set_region_node(&type->regions[0], MAX_NUMNODES); return 0; } repeat: @@ -355,7 +356,7 @@ repeat: nr_new++; if (insert) memblock_insert_region(type, i++, base, - rbase - base); + rbase - base, MAX_NUMNODES); } /* area below @rend is dealt with, forget about it */ base = min(rend, end); @@ -365,7 +366,8 @@ repeat: if (base < end) { nr_new++; if (insert) - memblock_insert_region(type, i, base, end - base); + memblock_insert_region(type, i, base, end - base, + MAX_NUMNODES); } /* @@ -459,6 +461,101 @@ long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) return memblock_add_region(_rgn, base, size); } +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +/* + * Common iterator interface used to define for_each_mem_range(). + */ +void __init_memblock __next_mem_pfn_range(int *idx, int nid, + unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid) +{ + struct memblock_type *type = &memblock.memory; + struct memblock_region *r; + + while (++*idx < type->cnt) { + r = &type->regions[*idx]; + + if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) + continue; + if (nid == MAX_NUMNODES || nid == r->nid) + break; + } + if (*idx >= type->cnt) { + *idx = -1; + return; + } + + if (out_start_pfn) + *out_start_pfn = PFN_UP(r->base); + if (out_end_pfn) + *out_end_pfn = PFN_DOWN(r->base + r->size); + if (out_nid) + *out_nid = r->nid; +} + +/** + * memblock_set_node - set node ID on memblock regions + * @base: base of area to set node ID for + * @size: size of area to set node ID for + * @nid: node ID to set + * + * Set the nid of memblock memory regions in [@base,@base+@size) to @nid. + * Regions which cross the area boundaries are split as necessary. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, + int nid) +{ + struct memblock_type *type = &memblock.memory; + phys_addr_t end = base + size; + int i; + + /* we'll create at most two more regions */ + while (type->cnt + 2 > type->max) + if (memblock_double_array(type) < 0) + return -ENOMEM; + + for (i = 0; i < type->cnt; i++) { + struct memblock_region *rgn = &type->regions[i]; + phys_addr_t rbase = rgn->base; + phys_addr_t rend = rbase + rgn->size; + + if (rbase >= end) + break; + if (rend <= base) + continue; + + if (rbase < base) { + /* + * @rgn intersects from below. Split and continue + * to process the next region - the new top half. + */ + rgn->base = base; + rgn->size = rend - rgn->base; + memblock_insert_region(type, i, rbase, base - rbase, + rgn->nid); + } else if (rend > end) { + /* + * @rgn intersects from above. Split and redo the + * current region - the new bottom half. + */ + rgn->base = end; + rgn->size = rend - rgn->base; + memblock_insert_region(type, i--, rbase, end - rbase, + rgn->nid); + } else { + /* @rgn is fully contained, set ->nid */ + rgn->nid = nid; + } + } + + memblock_merge_regions(type); + return 0; +} +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ + phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) { phys_addr_t found; @@ -689,19 +786,26 @@ void __init_memblock memblock_set_current_limit(phys_addr_t limit) memblock.current_limit = limit; } -static void __init_memblock memblock_dump(struct memblock_type *region, char *name) +static void __init_memblock memblock_dump(struct memblock_type *type, char *name) { unsigned long long base, size; int i; - pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); - - for (i = 0; i < region->cnt; i++) { - base = region->regions[i].base; - size = region->regions[i].size; + pr_info(" %s.cnt = 0x%lx\n", name, type->cnt); - pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes\n", - name, i, base, base + size - 1, size); + for (i = 0; i < type->cnt; i++) { + struct memblock_region *rgn = &type->regions[i]; + char nid_buf[32] = ""; + + base = rgn->base; + size = rgn->size; +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP + if (memblock_get_region_node(rgn) != MAX_NUMNODES) + snprintf(nid_buf, sizeof(nid_buf), " on node %d", + memblock_get_region_node(rgn)); +#endif + pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s\n", + name, i, base, base + size - 1, size, nid_buf); } } @@ -759,11 +863,13 @@ void __init memblock_init(void) */ memblock.memory.regions[0].base = 0; memblock.memory.regions[0].size = 0; + memblock_set_region_node(&memblock.memory.regions[0], MAX_NUMNODES); memblock.memory.cnt = 1; /* Ditto. */ memblock.reserved.regions[0].base = 0; memblock.reserved.regions[0].size = 0; + memblock_set_region_node(&memblock.reserved.regions[0], MAX_NUMNODES); memblock.reserved.cnt = 1; memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE; -- cgit v1.2.3 From 35fd0808d7d8d001cd72f112e3bca84664b596a3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 11:15:59 +0200 Subject: memblock: Implement for_each_free_mem_range() Implement for_each_free_mem_range() which iterates over free memory areas according to memblock (memory && !reserved). This will be used to simplify memblock users. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310462166-31469-7-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Signed-off-by: H. Peter Anvin --- mm/memblock.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index e815f4b75809..c4a8750406fc 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -461,6 +461,82 @@ long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) return memblock_add_region(_rgn, base, size); } +/** + * __next_free_mem_range - next function for for_each_free_mem_range() + * @idx: pointer to u64 loop variable + * @nid: nid: node selector, %MAX_NUMNODES for all nodes + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL + * @p_nid: ptr to int for nid of the range, can be %NULL + * + * Find the first free area from *@idx which matches @nid, fill the out + * parameters, and update *@idx for the next iteration. The lower 32bit of + * *@idx contains index into memory region and the upper 32bit indexes the + * areas before each reserved region. For example, if reserved regions + * look like the following, + * + * 0:[0-16), 1:[32-48), 2:[128-130) + * + * The upper 32bit indexes the following regions. + * + * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) + * + * As both region arrays are sorted, the function advances the two indices + * in lockstep and returns each intersection. + */ +void __init_memblock __next_free_mem_range(u64 *idx, int nid, + phys_addr_t *out_start, + phys_addr_t *out_end, int *out_nid) +{ + struct memblock_type *mem = &memblock.memory; + struct memblock_type *rsv = &memblock.reserved; + int mi = *idx & 0xffffffff; + int ri = *idx >> 32; + + for ( ; mi < mem->cnt; mi++) { + struct memblock_region *m = &mem->regions[mi]; + phys_addr_t m_start = m->base; + phys_addr_t m_end = m->base + m->size; + + /* only memory regions are associated with nodes, check it */ + if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m)) + continue; + + /* scan areas before each reservation for intersection */ + for ( ; ri < rsv->cnt + 1; ri++) { + struct memblock_region *r = &rsv->regions[ri]; + phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0; + phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX; + + /* if ri advanced past mi, break out to advance mi */ + if (r_start >= m_end) + break; + /* if the two regions intersect, we're done */ + if (m_start < r_end) { + if (out_start) + *out_start = max(m_start, r_start); + if (out_end) + *out_end = min(m_end, r_end); + if (out_nid) + *out_nid = memblock_get_region_node(m); + /* + * The region which ends first is advanced + * for the next iteration. + */ + if (m_end <= r_end) + mi++; + else + ri++; + *idx = (u32)mi | (u64)ri << 32; + return; + } + } + } + + /* signal end of iteration */ + *idx = ULLONG_MAX; +} + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* * Common iterator interface used to define for_each_mem_range(). -- cgit v1.2.3 From c378ddd53f9b8832a46fd4fec050a97fc2269858 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 11:46:03 +0200 Subject: memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option From 6839454ae63f1eb21e515c10229ca95c22955fec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Jul 2011 11:22:17 +0200 Make ARCH_DISCARD_MEMBLOCK a config option so that it can be handled together with other MEMBLOCK options. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/20110714094603.GH3455@htj.dyndns.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: H. Peter Anvin --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index c4a8750406fc..ebc6119f1280 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -959,7 +959,7 @@ static int __init early_memblock(char *p) } early_param("memblock", early_memblock); -#if defined(CONFIG_DEBUG_FS) && !defined(ARCH_DISCARD_MEMBLOCK) +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK) static int memblock_debug_show(struct seq_file *m, void *private) { -- cgit v1.2.3 From 24aa07882b672fff2da2f5c955759f0bd13d32d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 12 Jul 2011 11:16:06 +0200 Subject: memblock, x86: Replace memblock_x86_reserve/free_range() with generic ones Other than sanity check and debug message, the x86 specific version of memblock reserve/free functions are simple wrappers around the generic versions - memblock_reserve/free(). This patch adds debug messages with caller identification to the generic versions and replaces x86 specific ones and kills them. arch/x86/include/asm/memblock.h and arch/x86/mm/memblock.c are empty after this change and removed. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/r/1310462166-31469-14-git-send-email-tj@kernel.org Cc: Yinghai Lu Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: H. Peter Anvin --- mm/memblock.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index ebc6119f1280..0cb4da657b9d 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -449,6 +449,9 @@ long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) long __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) { + memblock_dbg(" memblock_free: [%#016llx-%#016llx] %pF\n", + base, base + size, (void *)_RET_IP_); + return __memblock_remove(&memblock.reserved, base, size); } @@ -456,6 +459,8 @@ long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) { struct memblock_type *_rgn = &memblock.reserved; + memblock_dbg("memblock_reserve: [%#016llx-%#016llx] %pF\n", + base, base + size, (void *)_RET_IP_); BUG_ON(0 == size); return memblock_add_region(_rgn, base, size); -- cgit v1.2.3 From a150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 14 Jul 2011 11:57:10 -0700 Subject: memblock: Cast phys_addr_t to unsigned long long for printf use phys_addr_t is not necessarily the same thing as unsigned long long. It is, however, easier to cast it to unsigned long long for printf purposes than it is to deal with differnent printf formats. Signed-off-by: H. Peter Anvin Cc: Tejun Heo Link: http://lkml.kernel.org/r/4E1F4D2C.3000507@zytor.com --- mm/memblock.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 0cb4da657b9d..a75723d62631 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -450,7 +450,9 @@ long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) long __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) { memblock_dbg(" memblock_free: [%#016llx-%#016llx] %pF\n", - base, base + size, (void *)_RET_IP_); + (unsigned long long)base, + (unsigned long long)base + size, + (void *)_RET_IP_); return __memblock_remove(&memblock.reserved, base, size); } @@ -460,7 +462,9 @@ long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) struct memblock_type *_rgn = &memblock.reserved; memblock_dbg("memblock_reserve: [%#016llx-%#016llx] %pF\n", - base, base + size, (void *)_RET_IP_); + (unsigned long long)base, + (unsigned long long)base + size, + (void *)_RET_IP_); BUG_ON(0 == size); return memblock_add_region(_rgn, base, size); -- cgit v1.2.3 From 581adcbe121872429de76ff9884762de71a76200 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:06 -0800 Subject: memblock: Make memblock_{add|remove|free|reserve}() return int and update prototypes memblock_{add|remove|free|reserve}() return either 0 or -errno but had long as return type. Chage it to int. Also, drop 'extern' from all prototypes in memblock.h - they are unnecessary and used inconsistently (especially if mm.h is included in the picture). Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- mm/memblock.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index a57092f63a86..948036718245 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -176,7 +176,7 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u } /* Defined below but needed now */ -static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); +static int memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); static int __init_memblock memblock_double_array(struct memblock_type *type) { @@ -316,8 +316,8 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type, * RETURNS: * 0 on success, -errno on failure. */ -static long __init_memblock memblock_add_region(struct memblock_type *type, - phys_addr_t base, phys_addr_t size) +static int __init_memblock memblock_add_region(struct memblock_type *type, + phys_addr_t base, phys_addr_t size) { bool insert = false; phys_addr_t obase = base, end = base + size; @@ -387,13 +387,13 @@ repeat: } } -long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) +int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) { return memblock_add_region(&memblock.memory, base, size); } -static long __init_memblock __memblock_remove(struct memblock_type *type, - phys_addr_t base, phys_addr_t size) +static int __init_memblock __memblock_remove(struct memblock_type *type, + phys_addr_t base, phys_addr_t size) { phys_addr_t end = base + size; int i; @@ -443,12 +443,12 @@ static long __init_memblock __memblock_remove(struct memblock_type *type, return 0; } -long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) +int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) { return __memblock_remove(&memblock.memory, base, size); } -long __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) +int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) { memblock_dbg(" memblock_free: [%#016llx-%#016llx] %pF\n", (unsigned long long)base, @@ -458,7 +458,7 @@ long __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) return __memblock_remove(&memblock.reserved, base, size); } -long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) +int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) { struct memblock_type *_rgn = &memblock.reserved; -- cgit v1.2.3 From 9c8c27e2b89b020fd33dd3f2b18405d3f027e6ac Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:06 -0800 Subject: memblock: Use memblock_reserve() in memblock internal functions Make memblock_double_array(), __memblock_alloc_base() and memblock_alloc_nid() use memblock_reserve() instead of calling memblock_add_region() with reserved array directly. This eases debugging and updates to memblock_add_region(). Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- mm/memblock.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 948036718245..d0506183c5b1 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -175,9 +175,6 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u } } -/* Defined below but needed now */ -static int memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); - static int __init_memblock memblock_double_array(struct memblock_type *type) { struct memblock_region *new_array, *old_array; @@ -235,7 +232,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) return 0; /* Add the new reserved region now. Should not fail ! */ - BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size)); + BUG_ON(memblock_reserve(addr, new_size)); /* If the array wasn't our static init one, then free it. We only do * that before SLAB is available as later on, we don't know whether @@ -652,7 +649,7 @@ phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, ph size = round_up(size, align); found = memblock_find_in_range(0, max_addr, size, align); - if (found && !memblock_add_region(&memblock.reserved, found, size)) + if (found && !memblock_reserve(found, size)) return found; return 0; @@ -748,7 +745,7 @@ phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int n found = memblock_find_in_range_node(0, MEMBLOCK_ALLOC_ACCESSIBLE, size, align, nid); - if (found && !memblock_add_region(&memblock.reserved, found, size)) + if (found && !memblock_reserve(found, size)) return found; return 0; -- cgit v1.2.3 From 4ff7b82f1e5fc65a7c9512b231b4ea533f28541a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:06 -0800 Subject: memblock: Add __memblock_dump_all() Add __memblock_dump_all() which dumps memblock configuration whether memblock_debug is enabled or not. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- mm/memblock.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index d0506183c5b1..4b80f6fae091 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -898,11 +898,8 @@ static void __init_memblock memblock_dump(struct memblock_type *type, char *name } } -void __init_memblock memblock_dump_all(void) +void __init_memblock __memblock_dump_all(void) { - if (!memblock_debug) - return; - pr_info("MEMBLOCK configuration:\n"); pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size); -- cgit v1.2.3 From c5a1cb284b791fcc3c70962331a682452afaf6cd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:07 -0800 Subject: memblock: Kill sentinel entries at the end of static region arrays memblock no longer depends on having one more entry at the end during addition making the sentinel entries at the end of region arrays not too useful. Remove the sentinels. This eases further updates. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- mm/memblock.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 4b80f6fae091..e808df845bbb 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -24,8 +24,8 @@ struct memblock memblock __initdata_memblock; int memblock_debug __initdata_memblock; int memblock_can_resize __initdata_memblock; -static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock; -static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock; +static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; /* inline so we don't get a warning when pr_debug is compiled out */ static inline const char *memblock_type_name(struct memblock_type *type) @@ -911,12 +911,6 @@ void __init memblock_analyze(void) { int i; - /* Check marker in the unused last array entry */ - WARN_ON(memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS].base - != MEMBLOCK_INACTIVE); - WARN_ON(memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS].base - != MEMBLOCK_INACTIVE); - memblock.memory_size = 0; for (i = 0; i < memblock.memory.cnt; i++) @@ -940,10 +934,6 @@ void __init memblock_init(void) memblock.reserved.regions = memblock_reserved_init_regions; memblock.reserved.max = INIT_MEMBLOCK_REGIONS; - /* Write a marker in the unused last array entry */ - memblock.memory.regions[INIT_MEMBLOCK_REGIONS].base = MEMBLOCK_INACTIVE; - memblock.reserved.regions[INIT_MEMBLOCK_REGIONS].base = MEMBLOCK_INACTIVE; - /* Create a dummy zero size MEMBLOCK which will get coalesced away later. * This simplifies the memblock_add() code below... */ -- cgit v1.2.3 From fe091c208a40299fba40e62292a610fb91e44b4e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:07 -0800 Subject: memblock: Kill memblock_init() memblock_init() initializes arrays for regions and memblock itself; however, all these can be done with struct initializers and memblock_init() can be removed. This patch kills memblock_init() and initializes memblock with struct initializer. The only difference is that the first dummy entries don't have .nid set to MAX_NUMNODES initially. This doesn't cause any behavior difference. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu Cc: Russell King Cc: Michal Simek Cc: Paul Mundt Cc: "David S. Miller" Cc: Guan Xuetao Cc: "H. Peter Anvin" --- mm/memblock.c | 48 ++++++++++++++---------------------------------- 1 file changed, 14 insertions(+), 34 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index e808df845bbb..5bbb87f59aee 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,12 +20,23 @@ #include #include -struct memblock memblock __initdata_memblock; +static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; + +struct memblock memblock __initdata_memblock = { + .memory.regions = memblock_memory_init_regions, + .memory.cnt = 1, /* empty dummy entry */ + .memory.max = INIT_MEMBLOCK_REGIONS, + + .reserved.regions = memblock_reserved_init_regions, + .reserved.cnt = 1, /* empty dummy entry */ + .reserved.max = INIT_MEMBLOCK_REGIONS, + + .current_limit = MEMBLOCK_ALLOC_ANYWHERE, +}; int memblock_debug __initdata_memblock; int memblock_can_resize __initdata_memblock; -static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; -static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; /* inline so we don't get a warning when pr_debug is compiled out */ static inline const char *memblock_type_name(struct memblock_type *type) @@ -920,37 +931,6 @@ void __init memblock_analyze(void) memblock_can_resize = 1; } -void __init memblock_init(void) -{ - static int init_done __initdata = 0; - - if (init_done) - return; - init_done = 1; - - /* Hookup the initial arrays */ - memblock.memory.regions = memblock_memory_init_regions; - memblock.memory.max = INIT_MEMBLOCK_REGIONS; - memblock.reserved.regions = memblock_reserved_init_regions; - memblock.reserved.max = INIT_MEMBLOCK_REGIONS; - - /* Create a dummy zero size MEMBLOCK which will get coalesced away later. - * This simplifies the memblock_add() code below... - */ - memblock.memory.regions[0].base = 0; - memblock.memory.regions[0].size = 0; - memblock_set_region_node(&memblock.memory.regions[0], MAX_NUMNODES); - memblock.memory.cnt = 1; - - /* Ditto. */ - memblock.reserved.regions[0].base = 0; - memblock.reserved.regions[0].size = 0; - memblock_set_region_node(&memblock.reserved.regions[0], MAX_NUMNODES); - memblock.reserved.cnt = 1; - - memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE; -} - static int __init early_memblock(char *p) { if (p && strstr(p, "debug")) -- cgit v1.2.3 From 6a9ceb31c06f1e8d50be79259756fda73234868d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:07 -0800 Subject: memblock: Separate out memblock_isolate_range() from memblock_set_node() memblock_set_node() operates in three steps - break regions crossing boundaries, set nid and merge back regions. This patch separates the first part into a separate function - memblock_isolate_range(), which breaks regions crossing range boundaries and returns range index range for regions properly contained in the specified memory range. This doesn't introduce any behavior change and will be used to further unify region handling. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- mm/memblock.c | 117 ++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 78 insertions(+), 39 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 5bbb87f59aee..a1e96a0fda00 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -400,6 +400,77 @@ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) return memblock_add_region(&memblock.memory, base, size); } +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +/** + * memblock_isolate_range - isolate given range into disjoint memblocks + * @type: memblock type to isolate range for + * @base: base of range to isolate + * @size: size of range to isolate + * @start_rgn: out parameter for the start of isolated region + * @end_rgn: out parameter for the end of isolated region + * + * Walk @type and ensure that regions don't cross the boundaries defined by + * [@base,@base+@size). Crossing regions are split at the boundaries, + * which may create at most two more regions. The index of the first + * region inside the range is returned in *@start_rgn and end in *@end_rgn. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int __init_memblock memblock_isolate_range(struct memblock_type *type, + phys_addr_t base, phys_addr_t size, + int *start_rgn, int *end_rgn) +{ + phys_addr_t end = base + size; + int i; + + *start_rgn = *end_rgn = 0; + + /* we'll create at most two more regions */ + while (type->cnt + 2 > type->max) + if (memblock_double_array(type) < 0) + return -ENOMEM; + + for (i = 0; i < type->cnt; i++) { + struct memblock_region *rgn = &type->regions[i]; + phys_addr_t rbase = rgn->base; + phys_addr_t rend = rbase + rgn->size; + + if (rbase >= end) + break; + if (rend <= base) + continue; + + if (rbase < base) { + /* + * @rgn intersects from below. Split and continue + * to process the next region - the new top half. + */ + rgn->base = base; + rgn->size = rend - rgn->base; + memblock_insert_region(type, i, rbase, base - rbase, + rgn->nid); + } else if (rend > end) { + /* + * @rgn intersects from above. Split and redo the + * current region - the new bottom half. + */ + rgn->base = end; + rgn->size = rend - rgn->base; + memblock_insert_region(type, i--, rbase, end - rbase, + rgn->nid); + } else { + /* @rgn is fully contained, record it */ + if (!*end_rgn) + *start_rgn = i; + *end_rgn = i + 1; + } + } + + return 0; +} +#endif + static int __init_memblock __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { @@ -603,47 +674,15 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, int nid) { struct memblock_type *type = &memblock.memory; - phys_addr_t end = base + size; - int i; + int start_rgn, end_rgn; + int i, ret; - /* we'll create at most two more regions */ - while (type->cnt + 2 > type->max) - if (memblock_double_array(type) < 0) - return -ENOMEM; + ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); + if (ret) + return ret; - for (i = 0; i < type->cnt; i++) { - struct memblock_region *rgn = &type->regions[i]; - phys_addr_t rbase = rgn->base; - phys_addr_t rend = rbase + rgn->size; - - if (rbase >= end) - break; - if (rend <= base) - continue; - - if (rbase < base) { - /* - * @rgn intersects from below. Split and continue - * to process the next region - the new top half. - */ - rgn->base = base; - rgn->size = rend - rgn->base; - memblock_insert_region(type, i, rbase, base - rbase, - rgn->nid); - } else if (rend > end) { - /* - * @rgn intersects from above. Split and redo the - * current region - the new bottom half. - */ - rgn->base = end; - rgn->size = rend - rgn->base; - memblock_insert_region(type, i--, rbase, end - rbase, - rgn->nid); - } else { - /* @rgn is fully contained, set ->nid */ - rgn->nid = nid; - } - } + for (i = start_rgn; i < end_rgn; i++) + type->regions[i].nid = nid; memblock_merge_regions(type); return 0; -- cgit v1.2.3 From 719361809fde9dbe9ccc4cf71f9fa9add5fa8bf9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:07 -0800 Subject: memblock: Reimplement __memblock_remove() using memblock_isolate_range() __memblock_remove()'s open coded region manipulation can be trivially replaced with memblock_islate_range(). This increases code sharing and eases improving region tracking. This pulls memblock_isolate_range() out of HAVE_MEMBLOCK_NODE_MAP. Make it use memblock_get_region_node() instead of assuming rgn->nid is available. -v2: Fixed build failure on !HAVE_MEMBLOCK_NODE_MAP caused by direct rgn->nid access. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- mm/memblock.c | 56 +++++++++----------------------------------------------- 1 file changed, 9 insertions(+), 47 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index a1e96a0fda00..fffe68b4bf14 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -400,7 +400,6 @@ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) return memblock_add_region(&memblock.memory, base, size); } -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /** * memblock_isolate_range - isolate given range into disjoint memblocks * @type: memblock type to isolate range for @@ -449,7 +448,7 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, rgn->base = base; rgn->size = rend - rgn->base; memblock_insert_region(type, i, rbase, base - rbase, - rgn->nid); + memblock_get_region_node(rgn)); } else if (rend > end) { /* * @rgn intersects from above. Split and redo the @@ -458,7 +457,7 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, rgn->base = end; rgn->size = rend - rgn->base; memblock_insert_region(type, i--, rbase, end - rbase, - rgn->nid); + memblock_get_region_node(rgn)); } else { /* @rgn is fully contained, record it */ if (!*end_rgn) @@ -469,56 +468,19 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, return 0; } -#endif static int __init_memblock __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { - phys_addr_t end = base + size; - int i; - - /* Walk through the array for collisions */ - for (i = 0; i < type->cnt; i++) { - struct memblock_region *rgn = &type->regions[i]; - phys_addr_t rend = rgn->base + rgn->size; - - /* Nothing more to do, exit */ - if (rgn->base > end || rgn->size == 0) - break; - - /* If we fully enclose the block, drop it */ - if (base <= rgn->base && end >= rend) { - memblock_remove_region(type, i--); - continue; - } - - /* If we are fully enclosed within a block - * then we need to split it and we are done - */ - if (base > rgn->base && end < rend) { - rgn->size = base - rgn->base; - if (!memblock_add_region(type, end, rend - end)) - return 0; - /* Failure to split is bad, we at least - * restore the block before erroring - */ - rgn->size = rend - rgn->base; - WARN_ON(1); - return -1; - } - - /* Check if we need to trim the bottom of a block */ - if (rgn->base < end && rend > end) { - rgn->size -= end - rgn->base; - rgn->base = end; - break; - } + int start_rgn, end_rgn; + int i, ret; - /* And check if we need to trim the top of a block */ - if (base < rend) - rgn->size -= rend - base; + ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); + if (ret) + return ret; - } + for (i = end_rgn - 1; i >= start_rgn; i--) + memblock_remove_region(type, i); return 0; } -- cgit v1.2.3 From eb18f1b5bfb99b1d7d2f5d792e6ee5c9b7d89330 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:07 -0800 Subject: memblock: Make memblock functions handle overflowing range @size Allow memblock users to specify range where @base + @size overflows and automatically cap it at maximum. This makes the interface more robust and specifying till-the-end-of-memory easier. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- mm/memblock.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index fffe68b4bf14..945dc31258eb 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -49,6 +49,12 @@ static inline const char *memblock_type_name(struct memblock_type *type) return "unknown"; } +/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ +static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) +{ + return *size = min(*size, (phys_addr_t)ULLONG_MAX - base); +} + /* * Address comparison utilities */ @@ -328,7 +334,8 @@ static int __init_memblock memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { bool insert = false; - phys_addr_t obase = base, end = base + size; + phys_addr_t obase = base; + phys_addr_t end = base + memblock_cap_size(base, &size); int i, nr_new; /* special case for empty array */ @@ -420,7 +427,7 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, phys_addr_t base, phys_addr_t size, int *start_rgn, int *end_rgn) { - phys_addr_t end = base + size; + phys_addr_t end = base + memblock_cap_size(base, &size); int i; *start_rgn = *end_rgn = 0; @@ -868,16 +875,18 @@ int __init_memblock memblock_is_memory(phys_addr_t addr) int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) { int idx = memblock_search(&memblock.memory, base); + phys_addr_t end = base + memblock_cap_size(base, &size); if (idx == -1) return 0; return memblock.memory.regions[idx].base <= base && (memblock.memory.regions[idx].base + - memblock.memory.regions[idx].size) >= (base + size); + memblock.memory.regions[idx].size) >= end; } int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) { + memblock_cap_size(base, &size); return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; } -- cgit v1.2.3 From c0ce8fef55896a2813a3d94e1b2d0e6d7fab6228 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:07 -0800 Subject: memblock: Reimplement memblock_enforce_memory_limit() using __memblock_remove() With recent updates, the basic memblock operations are robust enough that there's no reason for memblock_enfore_memory_limit() to directly manipulate memblock region arrays. Reimplement it using __memblock_remove(). Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- mm/memblock.c | 42 +++++++++++++----------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 945dc31258eb..b44875f5a996 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -804,44 +804,28 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void) } /* You must call memblock_analyze() after this. */ -void __init memblock_enforce_memory_limit(phys_addr_t memory_limit) +void __init memblock_enforce_memory_limit(phys_addr_t limit) { unsigned long i; - phys_addr_t limit; - struct memblock_region *p; + phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX; - if (!memory_limit) + if (!limit) return; - /* Truncate the memblock regions to satisfy the memory limit. */ - limit = memory_limit; + /* find out max address */ for (i = 0; i < memblock.memory.cnt; i++) { - if (limit > memblock.memory.regions[i].size) { - limit -= memblock.memory.regions[i].size; - continue; - } - - memblock.memory.regions[i].size = limit; - memblock.memory.cnt = i + 1; - break; - } - - memory_limit = memblock_end_of_DRAM(); + struct memblock_region *r = &memblock.memory.regions[i]; - /* And truncate any reserves above the limit also. */ - for (i = 0; i < memblock.reserved.cnt; i++) { - p = &memblock.reserved.regions[i]; - - if (p->base > memory_limit) - p->size = 0; - else if ((p->base + p->size) > memory_limit) - p->size = memory_limit - p->base; - - if (p->size == 0) { - memblock_remove_region(&memblock.reserved, i); - i--; + if (limit <= r->size) { + max_addr = r->base + limit; + break; } + limit -= r->size; } + + /* truncate both memory and reserved regions */ + __memblock_remove(&memblock.memory, max_addr, (phys_addr_t)ULLONG_MAX); + __memblock_remove(&memblock.reserved, max_addr, (phys_addr_t)ULLONG_MAX); } static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) -- cgit v1.2.3 From 1440c4e2c918532f39131c3330fe2226e16be7b6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:08 -0800 Subject: memblock: Track total size of regions automatically Total size of memory regions was calculated by memblock_analyze() requiring explicitly calling the function between operations which can change memory regions and possible users of total size, which is cumbersome and fragile. This patch makes each memblock_type track total size automatically with minor modifications to memblock manipulation functions and remove requirements on calling memblock_analyze(). [__]memblock_dump_all() now also dumps the total size of reserved regions. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- mm/memblock.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index b44875f5a996..f39964184b4a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -179,12 +179,14 @@ int __init_memblock memblock_reserve_reserved_regions(void) static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) { + type->total_size -= type->regions[r].size; memmove(&type->regions[r], &type->regions[r + 1], (type->cnt - (r + 1)) * sizeof(type->regions[r])); type->cnt--; /* Special case for empty arrays */ if (type->cnt == 0) { + WARN_ON(type->total_size != 0); type->cnt = 1; type->regions[0].base = 0; type->regions[0].size = 0; @@ -314,6 +316,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type, rgn->size = size; memblock_set_region_node(rgn, nid); type->cnt++; + type->total_size += size; } /** @@ -340,10 +343,11 @@ static int __init_memblock memblock_add_region(struct memblock_type *type, /* special case for empty array */ if (type->regions[0].size == 0) { - WARN_ON(type->cnt != 1); + WARN_ON(type->cnt != 1 || type->total_size); type->regions[0].base = base; type->regions[0].size = size; memblock_set_region_node(&type->regions[0], MAX_NUMNODES); + type->total_size = size; return 0; } repeat: @@ -453,7 +457,8 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, * to process the next region - the new top half. */ rgn->base = base; - rgn->size = rend - rgn->base; + rgn->size -= base - rbase; + type->total_size -= base - rbase; memblock_insert_region(type, i, rbase, base - rbase, memblock_get_region_node(rgn)); } else if (rend > end) { @@ -462,7 +467,8 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, * current region - the new bottom half. */ rgn->base = end; - rgn->size = rend - rgn->base; + rgn->size -= end - rbase; + type->total_size -= end - rbase; memblock_insert_region(type, i--, rbase, end - rbase, memblock_get_region_node(rgn)); } else { @@ -784,10 +790,9 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i * Remaining API functions */ -/* You must call memblock_analyze() before this. */ phys_addr_t __init memblock_phys_mem_size(void) { - return memblock.memory_size; + return memblock.memory.total_size; } /* lowest address */ @@ -803,7 +808,6 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void) return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); } -/* You must call memblock_analyze() after this. */ void __init memblock_enforce_memory_limit(phys_addr_t limit) { unsigned long i; @@ -906,7 +910,9 @@ static void __init_memblock memblock_dump(struct memblock_type *type, char *name void __init_memblock __memblock_dump_all(void) { pr_info("MEMBLOCK configuration:\n"); - pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size); + pr_info(" memory size = %#llx reserved size = %#llx\n", + (unsigned long long)memblock.memory.total_size, + (unsigned long long)memblock.reserved.total_size); memblock_dump(&memblock.memory, "memory"); memblock_dump(&memblock.reserved, "reserved"); @@ -914,13 +920,6 @@ void __init_memblock __memblock_dump_all(void) void __init memblock_analyze(void) { - int i; - - memblock.memory_size = 0; - - for (i = 0; i < memblock.memory.cnt; i++) - memblock.memory_size += memblock.memory.regions[i].size; - /* We allow resizing from there */ memblock_can_resize = 1; } -- cgit v1.2.3 From 1aadc0560f46530f8a0f11055285b876a8a31770 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:08 -0800 Subject: memblock: s/memblock_analyze()/memblock_allow_resize()/ and update users The only function of memblock_analyze() is now allowing resize of memblock region arrays. Rename it to memblock_allow_resize() and update its users. * The following users remain the same other than renaming. arm/mm/init.c::arm_memblock_init() microblaze/kernel/prom.c::early_init_devtree() powerpc/kernel/prom.c::early_init_devtree() openrisc/kernel/prom.c::early_init_devtree() sh/mm/init.c::paging_init() sparc/mm/init_64.c::paging_init() unicore32/mm/init.c::uc32_memblock_init() * In the following users, analyze was used to update total size which is no longer necessary. powerpc/kernel/machine_kexec.c::reserve_crashkernel() powerpc/kernel/prom.c::early_init_devtree() powerpc/mm/init_32.c::MMU_init() powerpc/mm/tlb_nohash.c::__early_init_mmu() powerpc/platforms/ps3/mm.c::ps3_mm_add_memory() powerpc/platforms/embedded6xx/wii.c::wii_memory_fixups() sh/kernel/machine_kexec.c::reserve_crashkernel() * x86/kernel/e820.c::memblock_x86_fill() was directly setting memblock_can_resize before populating memblock and calling analyze afterwards. Call memblock_allow_resize() before start populating. memblock_can_resize is now static inside memblock.c. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu Cc: Russell King Cc: Michal Simek Cc: Paul Mundt Cc: "David S. Miller" Cc: Guan Xuetao Cc: "H. Peter Anvin" --- mm/memblock.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index f39964184b4a..a3ca95f35e03 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -36,7 +36,7 @@ struct memblock memblock __initdata_memblock = { }; int memblock_debug __initdata_memblock; -int memblock_can_resize __initdata_memblock; +static int memblock_can_resize __initdata_memblock; /* inline so we don't get a warning when pr_debug is compiled out */ static inline const char *memblock_type_name(struct memblock_type *type) @@ -918,9 +918,8 @@ void __init_memblock __memblock_dump_all(void) memblock_dump(&memblock.reserved, "reserved"); } -void __init memblock_analyze(void) +void __init memblock_allow_resize(void) { - /* We allow resizing from there */ memblock_can_resize = 1; } -- cgit v1.2.3 From 7fb0bc3f06fdc3a35e41bcea7a15e53d2515362f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:08 -0800 Subject: memblock: Implement memblock_add_node() Implement memblock_add_node() which can add a new memblock memory region with specific node ID. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- mm/memblock.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index a3ca95f35e03..ef4987b03afd 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -324,6 +324,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type, * @type: memblock type to add new region into * @base: base address of the new region * @size: size of the new region + * @nid: nid of the new region * * Add new memblock region [@base,@base+@size) into @type. The new region * is allowed to overlap with existing ones - overlaps don't affect already @@ -334,7 +335,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type, * 0 on success, -errno on failure. */ static int __init_memblock memblock_add_region(struct memblock_type *type, - phys_addr_t base, phys_addr_t size) + phys_addr_t base, phys_addr_t size, int nid) { bool insert = false; phys_addr_t obase = base; @@ -346,7 +347,7 @@ static int __init_memblock memblock_add_region(struct memblock_type *type, WARN_ON(type->cnt != 1 || type->total_size); type->regions[0].base = base; type->regions[0].size = size; - memblock_set_region_node(&type->regions[0], MAX_NUMNODES); + memblock_set_region_node(&type->regions[0], nid); type->total_size = size; return 0; } @@ -376,7 +377,7 @@ repeat: nr_new++; if (insert) memblock_insert_region(type, i++, base, - rbase - base, MAX_NUMNODES); + rbase - base, nid); } /* area below @rend is dealt with, forget about it */ base = min(rend, end); @@ -386,8 +387,7 @@ repeat: if (base < end) { nr_new++; if (insert) - memblock_insert_region(type, i, base, end - base, - MAX_NUMNODES); + memblock_insert_region(type, i, base, end - base, nid); } /* @@ -406,9 +406,15 @@ repeat: } } +int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, + int nid) +{ + return memblock_add_region(&memblock.memory, base, size, nid); +} + int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) { - return memblock_add_region(&memblock.memory, base, size); + return memblock_add_region(&memblock.memory, base, size, MAX_NUMNODES); } /** @@ -522,7 +528,7 @@ int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) (void *)_RET_IP_); BUG_ON(0 == size); - return memblock_add_region(_rgn, base, size); + return memblock_add_region(_rgn, base, size, MAX_NUMNODES); } /** -- cgit v1.2.3 From 0ee332c1451869963626bf9cac88f165a90990e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:09 -0800 Subject: memblock: Kill early_node_map[] Now all ARCH_POPULATES_NODE_MAP archs select HAVE_MEBLOCK_NODE_MAP - there's no user of early_node_map[] left. Kill early_node_map[] and replace ARCH_POPULATES_NODE_MAP with HAVE_MEMBLOCK_NODE_MAP. Also, relocate for_each_mem_pfn_range() and helper from mm.h to memblock.h as page_alloc.c would no longer host an alternative implementation. This change is ultimately one to one mapping and shouldn't cause any observable difference; however, after the recent changes, there are some functions which now would fit memblock.c better than page_alloc.c and dependency on HAVE_MEMBLOCK_NODE_MAP instead of HAVE_MEMBLOCK doesn't make much sense on some of them. Further cleanups for functions inside HAVE_MEMBLOCK_NODE_MAP in mm.h would be nice. -v2: Fix compile bug introduced by mis-spelling CONFIG_HAVE_MEMBLOCK_NODE_MAP to CONFIG_MEMBLOCK_HAVE_NODE_MAP in mmzone.h. Reported by Stephen Rothwell. Signed-off-by: Tejun Heo Cc: Stephen Rothwell Cc: Benjamin Herrenschmidt Cc: Yinghai Lu Cc: Tony Luck Cc: Ralf Baechle Cc: Martin Schwidefsky Cc: Chen Liqin Cc: Paul Mundt Cc: "David S. Miller" Cc: "H. Peter Anvin" --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index ef4987b03afd..1adbef09b43a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -716,7 +716,7 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) static phys_addr_t __init memblock_nid_range_rev(phys_addr_t start, phys_addr_t end, int *nid) { -#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP unsigned long start_pfn, end_pfn; int i; -- cgit v1.2.3 From 7bd0b0f0da3b1ec11cbcc798eb0ef747a1184077 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 8 Dec 2011 10:22:09 -0800 Subject: memblock: Reimplement memblock allocation using reverse free area iterator Now that all early memory information is in memblock when enabled, we can implement reverse free area iterator and use it to implement NUMA aware allocator which is then wrapped for simpler variants instead of the confusing and inefficient mending of information in separate NUMA aware allocator. Implement for_each_free_mem_range_reverse(), use it to reimplement memblock_find_in_range_node() which in turn is used by all allocators. The visible allocator interface is inconsistent and can probably use some cleanup too. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Yinghai Lu --- mm/memblock.c | 273 +++++++++++++++++++++++++++------------------------------- 1 file changed, 127 insertions(+), 146 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 1adbef09b43a..2f55f19b7c86 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -79,78 +79,66 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type, return (i < type->cnt) ? i : -1; } -/* - * Find, allocate, deallocate or reserve unreserved regions. All allocations - * are top-down. +/** + * memblock_find_in_range_node - find free area in given range and node + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} + * @size: size of free area to find + * @align: alignment of free area to find + * @nid: nid of the free area to find, %MAX_NUMNODES for any node + * + * Find @size free area aligned to @align in the specified range and node. + * + * RETURNS: + * Found address on success, %0 on failure. */ - -static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_addr_t end, - phys_addr_t size, phys_addr_t align) +phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, + phys_addr_t end, phys_addr_t size, + phys_addr_t align, int nid) { - phys_addr_t base, res_base; - long j; + phys_addr_t this_start, this_end, cand; + u64 i; - /* In case, huge size is requested */ - if (end < size) - return 0; + /* align @size to avoid excessive fragmentation on reserved array */ + size = round_up(size, align); + + /* pump up @end */ + if (end == MEMBLOCK_ALLOC_ACCESSIBLE) + end = memblock.current_limit; - base = round_down(end - size, align); + /* adjust @start to avoid underflow and allocating the first page */ + start = max3(start, size, (phys_addr_t)PAGE_SIZE); + end = max(start, end); - /* Prevent allocations returning 0 as it's also used to - * indicate an allocation failure - */ - if (start == 0) - start = PAGE_SIZE; - - while (start <= base) { - j = memblock_overlaps_region(&memblock.reserved, base, size); - if (j < 0) - return base; - res_base = memblock.reserved.regions[j].base; - if (res_base < size) - break; - base = round_down(res_base - size, align); - } + for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) { + this_start = clamp(this_start, start, end); + this_end = clamp(this_end, start, end); + cand = round_down(this_end - size, align); + if (cand >= this_start) + return cand; + } return 0; } -/* - * Find a free area with specified alignment in a specific range. +/** + * memblock_find_in_range - find free area in given range + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} + * @size: size of free area to find + * @align: alignment of free area to find + * + * Find @size free area aligned to @align in the specified range. + * + * RETURNS: + * Found address on success, %0 on failure. */ -phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, phys_addr_t end, - phys_addr_t size, phys_addr_t align) +phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, + phys_addr_t end, phys_addr_t size, + phys_addr_t align) { - long i; - - BUG_ON(0 == size); - - /* Pump up max_addr */ - if (end == MEMBLOCK_ALLOC_ACCESSIBLE) - end = memblock.current_limit; - - /* We do a top-down search, this tends to limit memory - * fragmentation by keeping early boot allocs near the - * top of memory - */ - for (i = memblock.memory.cnt - 1; i >= 0; i--) { - phys_addr_t memblockbase = memblock.memory.regions[i].base; - phys_addr_t memblocksize = memblock.memory.regions[i].size; - phys_addr_t bottom, top, found; - - if (memblocksize < size) - continue; - if ((memblockbase + memblocksize) <= start) - break; - bottom = max(memblockbase, start); - top = min(memblockbase + memblocksize, end); - if (bottom >= top) - continue; - found = memblock_find_region(bottom, top, size, align); - if (found) - return found; - } - return 0; + return memblock_find_in_range_node(start, end, size, align, + MAX_NUMNODES); } /* @@ -607,6 +595,70 @@ void __init_memblock __next_free_mem_range(u64 *idx, int nid, *idx = ULLONG_MAX; } +/** + * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse() + * @idx: pointer to u64 loop variable + * @nid: nid: node selector, %MAX_NUMNODES for all nodes + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL + * @p_nid: ptr to int for nid of the range, can be %NULL + * + * Reverse of __next_free_mem_range(). + */ +void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid, + phys_addr_t *out_start, + phys_addr_t *out_end, int *out_nid) +{ + struct memblock_type *mem = &memblock.memory; + struct memblock_type *rsv = &memblock.reserved; + int mi = *idx & 0xffffffff; + int ri = *idx >> 32; + + if (*idx == (u64)ULLONG_MAX) { + mi = mem->cnt - 1; + ri = rsv->cnt; + } + + for ( ; mi >= 0; mi--) { + struct memblock_region *m = &mem->regions[mi]; + phys_addr_t m_start = m->base; + phys_addr_t m_end = m->base + m->size; + + /* only memory regions are associated with nodes, check it */ + if (nid != MAX_NUMNODES && nid != memblock_get_region_node(m)) + continue; + + /* scan areas before each reservation for intersection */ + for ( ; ri >= 0; ri--) { + struct memblock_region *r = &rsv->regions[ri]; + phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0; + phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX; + + /* if ri advanced past mi, break out to advance mi */ + if (r_end <= m_start) + break; + /* if the two regions intersect, we're done */ + if (m_end > r_start) { + if (out_start) + *out_start = max(m_start, r_start); + if (out_end) + *out_end = min(m_end, r_end); + if (out_nid) + *out_nid = memblock_get_region_node(m); + + if (m_start >= r_start) + mi--; + else + ri--; + *idx = (u32)mi | (u64)ri << 32; + return; + } + } + } + + *idx = ULLONG_MAX; +} + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* * Common iterator interface used to define for_each_mem_range(). @@ -670,22 +722,29 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, } #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) +static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, + phys_addr_t align, phys_addr_t max_addr, + int nid) { phys_addr_t found; - /* We align the size to limit fragmentation. Without this, a lot of - * small allocs quickly eat up the whole reserve array on sparc - */ - size = round_up(size, align); - - found = memblock_find_in_range(0, max_addr, size, align); + found = memblock_find_in_range_node(0, max_addr, size, align, nid); if (found && !memblock_reserve(found, size)) return found; return 0; } +phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) +{ + return memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, nid); +} + +phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) +{ + return memblock_alloc_base_nid(size, align, max_addr, MAX_NUMNODES); +} + phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) { phys_addr_t alloc; @@ -704,84 +763,6 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); } - -/* - * Additional node-local top-down allocators. - * - * WARNING: Only available after early_node_map[] has been populated, - * on some architectures, that is after all the calls to add_active_range() - * have been done to populate it. - */ - -static phys_addr_t __init memblock_nid_range_rev(phys_addr_t start, - phys_addr_t end, int *nid) -{ -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP - unsigned long start_pfn, end_pfn; - int i; - - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, nid) - if (end > PFN_PHYS(start_pfn) && end <= PFN_PHYS(end_pfn)) - return max(start, PFN_PHYS(start_pfn)); -#endif - *nid = 0; - return start; -} - -phys_addr_t __init memblock_find_in_range_node(phys_addr_t start, - phys_addr_t end, - phys_addr_t size, - phys_addr_t align, int nid) -{ - struct memblock_type *mem = &memblock.memory; - int i; - - BUG_ON(0 == size); - - /* Pump up max_addr */ - if (end == MEMBLOCK_ALLOC_ACCESSIBLE) - end = memblock.current_limit; - - for (i = mem->cnt - 1; i >= 0; i--) { - struct memblock_region *r = &mem->regions[i]; - phys_addr_t base = max(start, r->base); - phys_addr_t top = min(end, r->base + r->size); - - while (base < top) { - phys_addr_t tbase, ret; - int tnid; - - tbase = memblock_nid_range_rev(base, top, &tnid); - if (nid == MAX_NUMNODES || tnid == nid) { - ret = memblock_find_region(tbase, top, size, align); - if (ret) - return ret; - } - top = tbase; - } - } - - return 0; -} - -phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) -{ - phys_addr_t found; - - /* - * We align the size to limit fragmentation. Without this, a lot of - * small allocs quickly eat up the whole reserve array on sparc - */ - size = round_up(size, align); - - found = memblock_find_in_range_node(0, MEMBLOCK_ALLOC_ACCESSIBLE, - size, align, nid); - if (found && !memblock_reserve(found, size)) - return found; - - return 0; -} - phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) { phys_addr_t res = memblock_alloc_nid(size, align, nid); -- cgit v1.2.3 From 5d53cb27d849c899136c048ec84c940ac449494b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Jan 2012 10:14:12 -0800 Subject: memblock: Fix alloc failure due to dumb underflow protection in memblock_find_in_range_node() 7bd0b0f0da ("memblock: Reimplement memblock allocation using reverse free area iterator") implemented a simple top-down allocator using a reverse memblock iterator. To avoid underflow in the allocator loop, it simply raised the lower boundary to the requested size under the assumption that requested size would be far smaller than available memblocks. This causes early page table allocation failure under certain configurations in Xen. Fix it by checking for underflow directly instead of bumping up lower bound. Signed-off-by: Tejun Heo Reported-by: Konrad Rzeszutek Wilk Cc: rjw@sisk.pl Cc: xen-devel@lists.xensource.com Cc: Benjamin Herrenschmidt Cc: Andrew Morton Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20120113181412.GA11112@google.com Signed-off-by: Ingo Molnar --- mm/memblock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 2f55f19b7c86..77b5f227e1d8 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -106,14 +106,17 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, if (end == MEMBLOCK_ALLOC_ACCESSIBLE) end = memblock.current_limit; - /* adjust @start to avoid underflow and allocating the first page */ - start = max3(start, size, (phys_addr_t)PAGE_SIZE); + /* avoid allocating the first page */ + start = max_t(phys_addr_t, start, PAGE_SIZE); end = max(start, end); for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) { this_start = clamp(this_start, start, end); this_end = clamp(this_end, start, end); + if (this_end < size) + continue; + cand = round_down(this_end - size, align); if (cand >= this_start) return cand; -- cgit v1.2.3 From 847854f5988a04fe7e02d2fdd4fa0df9f96360fe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 29 Feb 2012 05:56:21 +0900 Subject: memblock: Fix size aligning of memblock_alloc_base_nid() memblock allocator aligns @size to @align to reduce the amount of fragmentation. Commit: 7bd0b0f0da ("memblock: Reimplement memblock allocation using reverse free area iterator") Broke it by incorrectly relocating @size aligning to memblock_find_in_range_node(). As the aligned size is not propagated back to memblock_alloc_base_nid(), the actually reserved size isn't aligned. While this increases memory use for memblock reserved array, this shouldn't cause any critical failure; however, it seems that the size aligning was hiding a use-beyond-allocation bug in sparc64 and losing the aligning causes boot failure. The underlying problem is currently being debugged but this is a proper fix in itself, it's already pretty late in -rc cycle for boot failures and reverting the change for debugging isn't difficult. Restore the size aligning moving it to memblock_alloc_base_nid(). Reported-by: Meelis Roos Signed-off-by: Tejun Heo Cc: David S. Miller Cc: Grant Likely Cc: Rob Herring Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/20120228205621.GC3252@dhcp-172-17-108-109.mtv.corp.google.com Signed-off-by: Ingo Molnar LKML-Reference: --- mm/memblock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/memblock.c') diff --git a/mm/memblock.c b/mm/memblock.c index 77b5f227e1d8..99f285599501 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -99,9 +99,6 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, phys_addr_t this_start, this_end, cand; u64 i; - /* align @size to avoid excessive fragmentation on reserved array */ - size = round_up(size, align); - /* pump up @end */ if (end == MEMBLOCK_ALLOC_ACCESSIBLE) end = memblock.current_limit; @@ -731,6 +728,9 @@ static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, { phys_addr_t found; + /* align @size to avoid excessive fragmentation on reserved array */ + size = round_up(size, align); + found = memblock_find_in_range_node(0, max_addr, size, align, nid); if (found && !memblock_reserve(found, size)) return found; -- cgit v1.2.3