From ef0855d334e1e4af7c3e0c42146a8479ea14a5ab Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 11 Sep 2013 14:20:14 -0700 Subject: mm: mempolicy: turn vma_set_policy() into vma_dup_policy() Simple cleanup. Every user of vma_set_policy() does the same work, this looks a bit annoying imho. And the new trivial helper which does mpol_dup() + vma_set_policy() to simplify the callers. Signed-off-by: Oleg Nesterov Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Rik van Riel Cc: Andi Kleen Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'mm/mempolicy.c') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4baf12e534d1..6b1d426731ae 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2065,6 +2065,16 @@ retry_cpuset: } EXPORT_SYMBOL(alloc_pages_current); +int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) +{ + struct mempolicy *pol = mpol_dup(vma_policy(src)); + + if (IS_ERR(pol)) + return PTR_ERR(pol); + dst->vm_policy = pol; + return 0; +} + /* * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it * rebinds the mempolicy its copying by calling mpol_rebind_policy() -- cgit v1.2.3 From 1da6f0e1b316d0215989fe4d7c657edead1fdea7 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Wed, 11 Sep 2013 14:21:25 -0700 Subject: mm/mempolicy: return NULL if node is NUMA_NO_NODE in get_task_policy If node == NUMA_NO_NODE, pol is NULL, we should return NULL instead of do "if (!pol->mode)" check. [akpm@linux-foundation.org: reorganise code] Signed-off-by: Jianguo Wu Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Rik van Riel Cc: Hugh Dickins Cc: Hanjun Guo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'mm/mempolicy.c') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 6b1d426731ae..27022ca890f8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -123,16 +123,19 @@ static struct mempolicy preferred_node_policy[MAX_NUMNODES]; static struct mempolicy *get_task_policy(struct task_struct *p) { struct mempolicy *pol = p->mempolicy; - int node; if (!pol) { - node = numa_node_id(); - if (node != NUMA_NO_NODE) - pol = &preferred_node_policy[node]; + int node = numa_node_id(); - /* preferred_node_policy is not initialised early in boot */ - if (!pol->mode) - pol = NULL; + if (node != NUMA_NO_NODE) { + pol = &preferred_node_policy[node]; + /* + * preferred_node_policy is not initialised early in + * boot + */ + if (!pol->mode) + pol = NULL; + } } return pol; -- cgit v1.2.3 From e2d8cf405525d83e6ca42969be460f94b0339798 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Sep 2013 14:22:03 -0700 Subject: migrate: add hugepage migration code to migrate_pages() Extend check_range() to handle vma with VM_HUGETLB set. We will be able to migrate hugepage with migrate_pages(2) after applying the enablement patch which comes later in this series. Note that for larger hugepages (covered by pud entries, 1GB for x86_64 for example), we simply skip it now. Note that using pmd_huge/pud_huge assumes that hugepages are pointed to by pmd/pud. This is not true in some architectures implementing hugepage with other mechanisms like ia64, but it's OK because pmd_huge/pud_huge simply return 0 in such arch and page walker simply ignores such hugepages. Signed-off-by: Naoya Horiguchi Acked-by: Andi Kleen Reviewed-by: Wanpeng Li Acked-by: Hillf Danton Cc: Mel Gorman Cc: Hugh Dickins Cc: KOSAKI Motohiro Cc: Michal Hocko Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) (limited to 'mm/mempolicy.c') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 27022ca890f8..4626be621e74 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -515,6 +515,30 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, return addr != end; } +static void check_hugetlb_pmd_range(struct vm_area_struct *vma, pmd_t *pmd, + const nodemask_t *nodes, unsigned long flags, + void *private) +{ +#ifdef CONFIG_HUGETLB_PAGE + int nid; + struct page *page; + + spin_lock(&vma->vm_mm->page_table_lock); + page = pte_page(huge_ptep_get((pte_t *)pmd)); + nid = page_to_nid(page); + if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) + goto unlock; + /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */ + if (flags & (MPOL_MF_MOVE_ALL) || + (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) + isolate_huge_page(page, private); +unlock: + spin_unlock(&vma->vm_mm->page_table_lock); +#else + BUG(); +#endif +} + static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, const nodemask_t *nodes, unsigned long flags, @@ -526,6 +550,13 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); + if (!pmd_present(*pmd)) + continue; + if (pmd_huge(*pmd) && is_vm_hugetlb_page(vma)) { + check_hugetlb_pmd_range(vma, pmd, nodes, + flags, private); + continue; + } split_huge_page_pmd(vma, addr, pmd); if (pmd_none_or_trans_huge_or_clear_bad(pmd)) continue; @@ -547,6 +578,8 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); + if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) + continue; if (pud_none_or_clear_bad(pud)) continue; if (check_pmd_range(vma, pud, addr, next, nodes, @@ -638,9 +671,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, return ERR_PTR(-EFAULT); } - if (is_vm_hugetlb_page(vma)) - goto next; - if (flags & MPOL_MF_LAZY) { change_prot_numa(vma, start, endvma); goto next; @@ -993,7 +1023,11 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist, static struct page *new_node_page(struct page *page, unsigned long node, int **x) { - return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0); + if (PageHuge(page)) + return alloc_huge_page_node(page_hstate(compound_head(page)), + node); + else + return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0); } /* @@ -1023,7 +1057,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, err = migrate_pages(&pagelist, new_node_page, dest, MIGRATE_SYNC, MR_SYSCALL); if (err) - putback_lru_pages(&pagelist); + putback_movable_pages(&pagelist); } return err; -- cgit v1.2.3 From 74060e4d78795c7c43805133cb717d82533d4e0d Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Sep 2013 14:22:06 -0700 Subject: mm: mbind: add hugepage migration code to mbind() Extend do_mbind() to handle vma with VM_HUGETLB set. We will be able to migrate hugepage with mbind(2) after applying the enablement patch which comes later in this series. Signed-off-by: Naoya Horiguchi Acked-by: Andi Kleen Reviewed-by: Wanpeng Li Acked-by: Hillf Danton Cc: Mel Gorman Cc: Hugh Dickins Cc: KOSAKI Motohiro Cc: Michal Hocko Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'mm/mempolicy.c') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4626be621e74..c7c359213ae1 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1192,6 +1192,8 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int * vma = vma->vm_next; } + if (PageHuge(page)) + return alloc_huge_page_noerr(vma, address, 1); /* * if !vma, alloc_page_vma() will use task or system default policy */ @@ -1302,7 +1304,7 @@ static long do_mbind(unsigned long start, unsigned long len, (unsigned long)vma, MIGRATE_SYNC, MR_MEMPOLICY_MBIND); if (nr_failed) - putback_lru_pages(&pagelist); + putback_movable_pages(&pagelist); } if (nr_failed && (flags & MPOL_MF_STRICT)) -- cgit v1.2.3 From 98094945785464c657d598291d714d11694c8cd9 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Sep 2013 14:22:14 -0700 Subject: mm/mempolicy: rename check_*range to queue_pages_*range The function check_range() (and its family) is not well-named, because it does not only checking something, but moving pages from list to list to do page migration for them. So queue_pages_*range is more desirable name. Signed-off-by: Naoya Horiguchi Cc: Andi Kleen Cc: Wanpeng Li Cc: Hillf Danton Cc: Mel Gorman Cc: Hugh Dickins Cc: KOSAKI Motohiro Cc: Michal Hocko Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'mm/mempolicy.c') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c7c359213ae1..9d778637b088 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -476,8 +476,11 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { static void migrate_page_add(struct page *page, struct list_head *pagelist, unsigned long flags); -/* Scan through pages checking if pages follow certain conditions. */ -static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, +/* + * Scan through pages checking if pages follow certain conditions, + * and move them to the pagelist if they do. + */ +static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, const nodemask_t *nodes, unsigned long flags, void *private) @@ -515,8 +518,8 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, return addr != end; } -static void check_hugetlb_pmd_range(struct vm_area_struct *vma, pmd_t *pmd, - const nodemask_t *nodes, unsigned long flags, +static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, + pmd_t *pmd, const nodemask_t *nodes, unsigned long flags, void *private) { #ifdef CONFIG_HUGETLB_PAGE @@ -539,7 +542,7 @@ unlock: #endif } -static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, +static inline int queue_pages_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, const nodemask_t *nodes, unsigned long flags, void *private) @@ -553,21 +556,21 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, if (!pmd_present(*pmd)) continue; if (pmd_huge(*pmd) && is_vm_hugetlb_page(vma)) { - check_hugetlb_pmd_range(vma, pmd, nodes, + queue_pages_hugetlb_pmd_range(vma, pmd, nodes, flags, private); continue; } split_huge_page_pmd(vma, addr, pmd); if (pmd_none_or_trans_huge_or_clear_bad(pmd)) continue; - if (check_pte_range(vma, pmd, addr, next, nodes, + if (queue_pages_pte_range(vma, pmd, addr, next, nodes, flags, private)) return -EIO; } while (pmd++, addr = next, addr != end); return 0; } -static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, +static inline int queue_pages_pud_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, const nodemask_t *nodes, unsigned long flags, void *private) @@ -582,14 +585,14 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, continue; if (pud_none_or_clear_bad(pud)) continue; - if (check_pmd_range(vma, pud, addr, next, nodes, + if (queue_pages_pmd_range(vma, pud, addr, next, nodes, flags, private)) return -EIO; } while (pud++, addr = next, addr != end); return 0; } -static inline int check_pgd_range(struct vm_area_struct *vma, +static inline int queue_pages_pgd_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end, const nodemask_t *nodes, unsigned long flags, void *private) @@ -602,7 +605,7 @@ static inline int check_pgd_range(struct vm_area_struct *vma, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - if (check_pud_range(vma, pgd, addr, next, nodes, + if (queue_pages_pud_range(vma, pgd, addr, next, nodes, flags, private)) return -EIO; } while (pgd++, addr = next, addr != end); @@ -640,12 +643,14 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, #endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ /* - * Check if all pages in a range are on a set of nodes. - * If pagelist != NULL then isolate pages from the LRU and - * put them on the pagelist. + * Walk through page tables and collect pages to be migrated. + * + * If pages found in a given range are on a set of nodes (determined by + * @nodes and @flags,) it's isolated and queued to the pagelist which is + * passed via @private.) */ static struct vm_area_struct * -check_range(struct mm_struct *mm, unsigned long start, unsigned long end, +queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, const nodemask_t *nodes, unsigned long flags, void *private) { int err; @@ -680,7 +685,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && vma_migratable(vma))) { - err = check_pgd_range(vma, start, endvma, nodes, + err = queue_pages_pgd_range(vma, start, endvma, nodes, flags, private); if (err) { first = ERR_PTR(err); @@ -1050,7 +1055,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, * space range and MPOL_MF_DISCONTIG_OK, this call can not fail. */ VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))); - check_range(mm, mm->mmap->vm_start, mm->task_size, &nmask, + queue_pages_range(mm, mm->mmap->vm_start, mm->task_size, &nmask, flags | MPOL_MF_DISCONTIG_OK, &pagelist); if (!list_empty(&pagelist)) { @@ -1288,7 +1293,7 @@ static long do_mbind(unsigned long start, unsigned long len, if (err) goto mpol_out; - vma = check_range(mm, start, end, nmask, + vma = queue_pages_range(mm, start, end, nmask, flags | MPOL_MF_INVERT, &pagelist); err = PTR_ERR(vma); /* maybe ... */ -- cgit v1.2.3 From 0bf598d863e3c741d47e3178d645f04c9d6c186c Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Sep 2013 14:22:16 -0700 Subject: mbind: add BUG_ON(!vma) in new_vma_page() new_vma_page() is called only by page migration called from do_mbind(), where pages to be migrated are queued into a pagelist by queue_pages_range(). queue_pages_range() confirms that a queued page belongs to some vma, so !vma case is not supposed to be happen. This patch adds BUG_ON() to catch this unexpected case. Signed-off-by: Naoya Horiguchi Reported-by: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'mm/mempolicy.c') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9d778637b088..04729647f359 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1196,12 +1196,14 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int * break; vma = vma->vm_next; } + /* + * queue_pages_range() confirms that @page belongs to some vma, + * so vma shouldn't be NULL. + */ + BUG_ON(!vma); if (PageHuge(page)) return alloc_huge_page_noerr(vma, address, 1); - /* - * if !vma, alloc_page_vma() will use task or system default policy - */ return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); } #else -- cgit v1.2.3