From ef0855d334e1e4af7c3e0c42146a8479ea14a5ab Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 11 Sep 2013 14:20:14 -0700 Subject: mm: mempolicy: turn vma_set_policy() into vma_dup_policy() Simple cleanup. Every user of vma_set_policy() does the same work, this looks a bit annoying imho. And the new trivial helper which does mpol_dup() + vma_set_policy() to simplify the callers. Signed-off-by: Oleg Nesterov Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Rik van Riel Cc: Andi Kleen Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index f9c97d10b873..14f6bb4830f7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2380,7 +2380,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, int new_below) { - struct mempolicy *pol; struct vm_area_struct *new; int err = -ENOMEM; @@ -2404,12 +2403,9 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); } - pol = mpol_dup(vma_policy(vma)); - if (IS_ERR(pol)) { - err = PTR_ERR(pol); + err = vma_dup_policy(vma, new); + if (err) goto out_free_vma; - } - vma_set_policy(new, pol); if (anon_vma_clone(new, vma)) goto out_free_mpol; @@ -2437,7 +2433,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, fput(new->vm_file); unlink_anon_vmas(new); out_free_mpol: - mpol_put(pol); + mpol_put(vma_policy(new)); out_free_vma: kmem_cache_free(vm_area_cachep, new); out_err: @@ -2780,7 +2776,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma, *prev; struct rb_node **rb_link, *rb_parent; - struct mempolicy *pol; bool faulted_in_anon_vma = true; /* @@ -2825,10 +2820,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, new_vma->vm_start = addr; new_vma->vm_end = addr + len; new_vma->vm_pgoff = pgoff; - pol = mpol_dup(vma_policy(vma)); - if (IS_ERR(pol)) + if (vma_dup_policy(vma, new_vma)) goto out_free_vma; - vma_set_policy(new_vma, pol); INIT_LIST_HEAD(&new_vma->anon_vma_chain); if (anon_vma_clone(new_vma, vma)) goto out_free_mempol; @@ -2843,7 +2836,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, return new_vma; out_free_mempol: - mpol_put(pol); + mpol_put(vma_policy(new_vma)); out_free_vma: kmem_cache_free(vm_area_cachep, new_vma); return NULL; -- cgit v1.2.3 From b2c56e4f7d93be3f33a82ec66f0d0f46713ff5f1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 11 Sep 2013 14:20:18 -0700 Subject: mm: shift VM_GROWS* check from mmap_region() to do_mmap_pgoff() mmap() doesn't allow the non-anonymous mappings with VM_GROWS* bit set. In particular this means that mmap_region()->vma_merge(file, vm_flags) must always fail if "vm_flags & VM_GROWS" is set incorrectly. So it does not make sense to check VM_GROWS* after we already allocated the new vma, the only caller, do_mmap_pgoff(), which can pass this flag can do the check itself. And this looks a bit more correct, mmap_region() already unmapped the old mapping at this stage. But if mmap() is going to fail, it should avoid do_munmap() if possible. Note: we check VM_GROWS at the end to ensure that do_mmap_pgoff() won't return EINVAL in the case when it currently returns another error code. Many thanks to Hugh who nacked the buggy v1. Signed-off-by: Oleg Nesterov Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index 14f6bb4830f7..6cff7ba24a34 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1302,6 +1302,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, if (!file->f_op || !file->f_op->mmap) return -ENODEV; + if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) + return -EINVAL; break; default: @@ -1310,6 +1312,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } else { switch (flags & MAP_TYPE) { case MAP_SHARED: + if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) + return -EINVAL; /* * Ignore pgoff. */ @@ -1544,11 +1548,7 @@ munmap_back: vma->vm_pgoff = pgoff; INIT_LIST_HEAD(&vma->anon_vma_chain); - error = -EINVAL; /* when rejecting VM_GROWSDOWN|VM_GROWSUP */ - if (file) { - if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) - goto free_vma; if (vm_flags & VM_DENYWRITE) { error = deny_write_access(file); if (error) @@ -1573,8 +1573,6 @@ munmap_back: pgoff = vma->vm_pgoff; vm_flags = vma->vm_flags; } else if (vm_flags & VM_SHARED) { - if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP))) - goto free_vma; error = shmem_zero_setup(vma); if (error) goto free_vma; -- cgit v1.2.3 From 077bf22b5cf233863826afbfa4af9b18650a832d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 11 Sep 2013 14:20:19 -0700 Subject: mm: do_mmap_pgoff: cleanup the usage of file_inode() Simple cleanup. Move "struct inode *inode" variable into "if (file)" block to simplify the code and avoid the unnecessary check. Signed-off-by: Oleg Nesterov Cc: Hugh Dickins Cc: Al Viro Cc: Colin Cross Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index 6cff7ba24a34..1e7a3ea23f1a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1202,7 +1202,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long *populate) { struct mm_struct * mm = current->mm; - struct inode *inode; vm_flags_t vm_flags; *populate = 0; @@ -1265,9 +1264,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, return -EAGAIN; } - inode = file ? file_inode(file) : NULL; - if (file) { + struct inode *inode = file_inode(file); + switch (flags & MAP_TYPE) { case MAP_SHARED: if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) -- cgit v1.2.3 From e86867720e617774b560dfbc169b7f3d0d490950 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 11 Sep 2013 14:20:20 -0700 Subject: mm: mmap_region: kill correct_wcount/inode, use allow_write_access() correct_wcount and inode in mmap_region() just complicate the code. This boolean was needed previously, when deny_write_access() was called before vma_merge(), now we can simply check VM_DENYWRITE and do allow_write_access() if it is set. allow_write_access() checks file != NULL, so this is safe even if it was possible to use VM_DENYWRITE && !file. Just we need to ensure we use the same file which was deny_write_access()'ed, so the patch also moves "file = vma->vm_file" down after allow_write_access(). Signed-off-by: Oleg Nesterov Cc: Hugh Dickins Cc: Al Viro Cc: Colin Cross Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index 1e7a3ea23f1a..13926a5a6901 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1479,11 +1479,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; - int correct_wcount = 0; int error; struct rb_node **rb_link, *rb_parent; unsigned long charged = 0; - struct inode *inode = file ? file_inode(file) : NULL; /* Check against address space limit. */ if (!may_expand_vm(mm, len >> PAGE_SHIFT)) { @@ -1552,7 +1550,6 @@ munmap_back: error = deny_write_access(file); if (error) goto free_vma; - correct_wcount = 1; } vma->vm_file = get_file(file); error = file->f_op->mmap(file, vma); @@ -1593,11 +1590,10 @@ munmap_back: } vma_link(mm, vma, prev, rb_link, rb_parent); - file = vma->vm_file; - /* Once vma denies write, undo our temporary denial count */ - if (correct_wcount) - atomic_inc(&inode->i_writecount); + if (vm_flags & VM_DENYWRITE) + allow_write_access(file); + file = vma->vm_file; out: perf_event_mmap(vma); @@ -1616,8 +1612,8 @@ out: return addr; unmap_and_free_vma: - if (correct_wcount) - atomic_inc(&inode->i_writecount); + if (vm_flags & VM_DENYWRITE) + allow_write_access(file); vma->vm_file = NULL; fput(file); -- cgit v1.2.3 From d9104d1ca9662498339c0de975b4666c30485f4e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 11 Sep 2013 14:22:24 -0700 Subject: mm: track vma changes with VM_SOFTDIRTY bit Pavel reported that in case if vma area get unmapped and then mapped (or expanded) in-place, the soft dirty tracker won't be able to recognize this situation since it works on pte level and ptes are get zapped on unmap, loosing soft dirty bit of course. So to resolve this situation we need to track actions on vma level, there VM_SOFTDIRTY flag comes in. When new vma area created (or old expanded) we set this bit, and keep it here until application calls for clearing soft dirty bit. Thus when user space application track memory changes now it can detect if vma area is renewed. Reported-by: Pavel Emelyanov Signed-off-by: Cyrill Gorcunov Cc: Andy Lutomirski Cc: Matt Mackall Cc: Xiao Guangrong Cc: Marcelo Tosatti Cc: KOSAKI Motohiro Cc: Stephen Rothwell Cc: Peter Zijlstra Cc: "Aneesh Kumar K.V" Cc: Rob Landley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index 13926a5a6901..51958d192a48 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1609,6 +1609,15 @@ out: if (file) uprobe_mmap(vma); + /* + * New (or expanded) vma always get soft dirty status. + * Otherwise user-space soft-dirty page tracker won't + * be able to distinguish situation when vma area unmapped, + * then new mapped in-place (which must be aimed as + * a completely new data area). + */ + vma->vm_flags |= VM_SOFTDIRTY; + return addr; unmap_and_free_vma: @@ -2652,6 +2661,7 @@ out: mm->total_vm += len >> PAGE_SHIFT; if (flags & VM_LOCKED) mm->locked_vm += (len >> PAGE_SHIFT); + vma->vm_flags |= VM_SOFTDIRTY; return addr; } @@ -2916,7 +2926,7 @@ int install_special_mapping(struct mm_struct *mm, vma->vm_start = addr; vma->vm_end = addr + len; - vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND; + vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); vma->vm_ops = &special_mapping_vmops; -- cgit v1.2.3 From 2d8a17813ec817fa58addd2c92b4ca8cae5bafbb Mon Sep 17 00:00:00 2001 From: Yanchuan Nian Date: Wed, 11 Sep 2013 14:23:05 -0700 Subject: mm/mmap: remove unnecessary assignment pgoff is not used after the statement "pgoff = vma->vm_pgoff;", so the assignment is redundant. Signed-off-by: Yanchuan Nian Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 1 - 1 file changed, 1 deletion(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index 51958d192a48..9d548512ff8a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1566,7 +1566,6 @@ munmap_back: WARN_ON_ONCE(addr != vma->vm_start); addr = vma->vm_start; - pgoff = vma->vm_pgoff; vm_flags = vma->vm_flags; } else if (vm_flags & VM_SHARED) { error = shmem_zero_setup(vma); -- cgit v1.2.3