From b0320c7b7d1ac1bd5c2d9dff3258524ab39bad32 Mon Sep 17 00:00:00 2001 From: Rafael Aquini Date: Wed, 15 Jun 2011 15:08:39 -0700 Subject: mm: fix negative commitlimit when gigantic hugepages are allocated When 1GB hugepages are allocated on a system, free(1) reports less available memory than what really is installed in the box. Also, if the total size of hugepages allocated on a system is over half of the total memory size, CommitLimit becomes a negative number. The problem is that gigantic hugepages (order > MAX_ORDER) can only be allocated at boot with bootmem, thus its frames are not accounted to 'totalram_pages'. However, they are accounted to hugetlb_total_pages() What happens to turn CommitLimit into a negative number is this calculation, in fs/proc/meminfo.c: allowed = ((totalram_pages - hugetlb_total_pages()) * sysctl_overcommit_ratio / 100) + total_swap_pages; A similar calculation occurs in __vm_enough_memory() in mm/mmap.c. Also, every vm statistic which depends on 'totalram_pages' will render confusing values, as if system were 'missing' some part of its memory. Impact of this bug: When gigantic hugepages are allocated and sysctl_overcommit_memory == OVERCOMMIT_NEVER. In a such situation, __vm_enough_memory() goes through the mentioned 'allowed' calculation and might end up mistakenly returning -ENOMEM, thus forcing the system to start reclaiming pages earlier than it would be ususal, and this could cause detrimental impact to overall system's performance, depending on the workload. Besides the aforementioned scenario, I can only think of this causing annoyances with memory reports from /proc/meminfo and free(1). [akpm@linux-foundation.org: standardize comment layout] Reported-by: Russ Anderson Signed-off-by: Rafael Aquini Acked-by: Russ Anderson Cc: Andrea Arcangeli Cc: Christoph Lameter Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6402458fee38..bfcf153bc829 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1111,6 +1111,14 @@ static void __init gather_bootmem_prealloc(void) WARN_ON(page_count(page) != 1); prep_compound_huge_page(page, h->order); prep_new_huge_page(h, page, page_to_nid(page)); + /* + * If we had gigantic hugepages allocated at boot time, we need + * to restore the 'stolen' pages to totalram_pages in order to + * fix confusing memory reports from free(1) and another + * side-effects, like CommitLimit going negative. + */ + if (h->order > (MAX_ORDER - 1)) + totalram_pages += 1 << h->order; } } -- cgit v1.2.3 From ee8f248d266ec6966c0ce6b7dec24de43dcc1b58 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Mon, 25 Jul 2011 17:11:50 -0700 Subject: hugetlb: add phys addr to struct huge_bootmem_page This is needed on HIGHMEM systems - we don't always have a virtual address so store the physical address and map it in as needed. [akpm@linux-foundation.org: cleanup] Signed-off-by: Becky Bruce Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bfcf153bc829..c6d342d313c7 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1105,8 +1105,16 @@ static void __init gather_bootmem_prealloc(void) struct huge_bootmem_page *m; list_for_each_entry(m, &huge_boot_pages, list) { - struct page *page = virt_to_page(m); struct hstate *h = m->hstate; + struct page *page; + +#ifdef CONFIG_HIGHMEM + page = pfn_to_page(m->phys >> PAGE_SHIFT); + free_bootmem_late((unsigned long)m, + sizeof(struct huge_bootmem_page)); +#else + page = virt_to_page(m); +#endif __ClearPageReserved(page); WARN_ON(page_count(page) != 1); prep_compound_huge_page(page, h->order); -- cgit v1.2.3 From 32f84528fbb5177275193a3311be8756f0cbd62c Mon Sep 17 00:00:00 2001 From: Chris Forbes Date: Mon, 25 Jul 2011 17:12:14 -0700 Subject: mm: hugetlb: fix coding style issues Fix coding style issues flagged by checkpatch.pl Signed-off-by: Chris Forbes Acked-by: Eric B Munson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c6d342d313c7..dae27ba3be2c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include @@ -62,10 +62,10 @@ static DEFINE_SPINLOCK(hugetlb_lock); * must either hold the mmap_sem for write, or the mmap_sem for read and * the hugetlb_instantiation mutex: * - * down_write(&mm->mmap_sem); + * down_write(&mm->mmap_sem); * or - * down_read(&mm->mmap_sem); - * mutex_lock(&hugetlb_instantiation_mutex); + * down_read(&mm->mmap_sem); + * mutex_lock(&hugetlb_instantiation_mutex); */ struct file_region { struct list_head link; @@ -503,9 +503,10 @@ static void update_and_free_page(struct hstate *h, struct page *page) h->nr_huge_pages--; h->nr_huge_pages_node[page_to_nid(page)]--; for (i = 0; i < pages_per_huge_page(h); i++) { - page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | - 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | - 1 << PG_private | 1<< PG_writeback); + page[i].flags &= ~(1 << PG_locked | 1 << PG_error | + 1 << PG_referenced | 1 << PG_dirty | + 1 << PG_active | 1 << PG_reserved | + 1 << PG_private | 1 << PG_writeback); } set_compound_page_dtor(page, NULL); set_page_refcounted(page); @@ -591,7 +592,6 @@ int PageHuge(struct page *page) return dtor == free_huge_page; } - EXPORT_SYMBOL_GPL(PageHuge); static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) @@ -2132,9 +2132,8 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma, pte_t entry; entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep))); - if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) { + if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) update_mmu_cache(vma, address, ptep); - } } @@ -2189,9 +2188,9 @@ static int is_hugetlb_entry_migration(pte_t pte) if (huge_pte_none(pte) || pte_present(pte)) return 0; swp = pte_to_swp_entry(pte); - if (non_swap_entry(swp) && is_migration_entry(swp)) { + if (non_swap_entry(swp) && is_migration_entry(swp)) return 1; - } else + else return 0; } @@ -2202,9 +2201,9 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte) if (huge_pte_none(pte) || pte_present(pte)) return 0; swp = pte_to_swp_entry(pte); - if (non_swap_entry(swp) && is_hwpoison_entry(swp)) { + if (non_swap_entry(swp) && is_hwpoison_entry(swp)) return 1; - } else + else return 0; } @@ -2567,7 +2566,7 @@ retry: * So we need to block hugepage fault by PG_hwpoison bit check. */ if (unlikely(PageHWPoison(page))) { - ret = VM_FAULT_HWPOISON | + ret = VM_FAULT_HWPOISON | VM_FAULT_SET_HINDEX(h - hstates); goto backout_unlocked; } @@ -2635,7 +2634,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, migration_entry_wait(mm, (pmd_t *)ptep, address); return 0; } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) - return VM_FAULT_HWPOISON_LARGE | + return VM_FAULT_HWPOISON_LARGE | VM_FAULT_SET_HINDEX(h - hstates); } -- cgit v1.2.3