From 71879d3cb3dd8f2dfdefb252775c1b3ea04a3dd4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 31 Jan 2012 17:14:38 +0100 Subject: proc: mem_release() should check mm != NULL mem_release() can hit mm == NULL, add the necessary check. Cc: stable@kernel.org Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/proc/base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index 9cde9edf9c4d..c3617ea7830b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -822,8 +822,8 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig) static int mem_release(struct inode *inode, struct file *file) { struct mm_struct *mm = file->private_data; - - mmput(mm); + if (mm) + mmput(mm); return 0; } -- cgit v1.2.3 From 572d34b946bae070debd42db1143034d9687e13f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 31 Jan 2012 17:14:54 +0100 Subject: proc: unify mem_read() and mem_write() No functional changes, cleanup and preparation. mem_read() and mem_write() are very similar. Move this code into the new common helper, mem_rw(), which takes the additional "int write" argument. Cc: stable@kernel.org Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/proc/base.c | 90 +++++++++++++++++++++------------------------------------- 1 file changed, 32 insertions(+), 58 deletions(-) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index c3617ea7830b..be1909041685 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -718,57 +718,13 @@ static int mem_open(struct inode* inode, struct file* file) return 0; } -static ssize_t mem_read(struct file * file, char __user * buf, - size_t count, loff_t *ppos) +static ssize_t mem_rw(struct file *file, char __user *buf, + size_t count, loff_t *ppos, int write) { - int ret; - char *page; - unsigned long src = *ppos; struct mm_struct *mm = file->private_data; - - if (!mm) - return 0; - - page = (char *)__get_free_page(GFP_TEMPORARY); - if (!page) - return -ENOMEM; - - ret = 0; - - while (count > 0) { - int this_len, retval; - - this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - retval = access_remote_vm(mm, src, page, this_len, 0); - if (!retval) { - if (!ret) - ret = -EIO; - break; - } - - if (copy_to_user(buf, page, retval)) { - ret = -EFAULT; - break; - } - - ret += retval; - src += retval; - buf += retval; - count -= retval; - } - *ppos = src; - - free_page((unsigned long) page); - return ret; -} - -static ssize_t mem_write(struct file * file, const char __user *buf, - size_t count, loff_t *ppos) -{ - int copied; + unsigned long addr = *ppos; + ssize_t copied; char *page; - unsigned long dst = *ppos; - struct mm_struct *mm = file->private_data; if (!mm) return 0; @@ -779,30 +735,48 @@ static ssize_t mem_write(struct file * file, const char __user *buf, copied = 0; while (count > 0) { - int this_len, retval; + int this_len = min_t(int, count, PAGE_SIZE); - this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - if (copy_from_user(page, buf, this_len)) { + if (write && copy_from_user(page, buf, this_len)) { copied = -EFAULT; break; } - retval = access_remote_vm(mm, dst, page, this_len, 1); - if (!retval) { + + this_len = access_remote_vm(mm, addr, page, this_len, write); + if (!this_len) { if (!copied) copied = -EIO; break; } - copied += retval; - buf += retval; - dst += retval; - count -= retval; + + if (!write && copy_to_user(buf, page, this_len)) { + copied = -EFAULT; + break; + } + + buf += this_len; + addr += this_len; + copied += this_len; + count -= this_len; } - *ppos = dst; + *ppos = addr; free_page((unsigned long) page); return copied; } +static ssize_t mem_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return mem_rw(file, buf, count, ppos, 0); +} + +static ssize_t mem_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + return mem_rw(file, (char __user*)buf, count, ppos, 1); +} + loff_t mem_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { -- cgit v1.2.3 From 6d08f2c7139790c268820a2e590795cb8333181a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 31 Jan 2012 17:15:11 +0100 Subject: proc: make sure mem_open() doesn't pin the target's memory Once /proc/pid/mem is opened, the memory can't be released until mem_release() even if its owner exits. Change mem_open() to do atomic_inc(mm_count) + mmput(), this only pins mm_struct. Change mem_rw() to do atomic_inc_not_zero(mm_count) before access_remote_vm(), this verifies that this mm is still alive. I am not sure what should mem_rw() return if atomic_inc_not_zero() fails. With this patch it returns zero to match the "mm == NULL" case, may be it should return -EINVAL like it did before e268337d. Perhaps it makes sense to add the additional fatal_signal_pending() check into the main loop, to ensure we do not hold this memory if the target task was oom-killed. Cc: stable@kernel.org Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/proc/base.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index be1909041685..d9512bd03e6c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -711,6 +711,13 @@ static int mem_open(struct inode* inode, struct file* file) if (IS_ERR(mm)) return PTR_ERR(mm); + if (mm) { + /* ensure this mm_struct can't be freed */ + atomic_inc(&mm->mm_count); + /* but do not pin its memory */ + mmput(mm); + } + /* OK to pass negative loff_t, we can catch out-of-range */ file->f_mode |= FMODE_UNSIGNED_OFFSET; file->private_data = mm; @@ -734,6 +741,9 @@ static ssize_t mem_rw(struct file *file, char __user *buf, return -ENOMEM; copied = 0; + if (!atomic_inc_not_zero(&mm->mm_users)) + goto free; + while (count > 0) { int this_len = min_t(int, count, PAGE_SIZE); @@ -761,6 +771,8 @@ static ssize_t mem_rw(struct file *file, char __user *buf, } *ppos = addr; + mmput(mm); +free: free_page((unsigned long) page); return copied; } @@ -797,7 +809,7 @@ static int mem_release(struct inode *inode, struct file *file) { struct mm_struct *mm = file->private_data; if (mm) - mmput(mm); + mmdrop(mm); return 0; } -- cgit v1.2.3 From 8cdb878dcb359fd1137e9abdee9322f5e9bcfdf8 Mon Sep 17 00:00:00 2001 From: Christopher Yeoh Date: Thu, 2 Feb 2012 11:34:09 +1030 Subject: Fix race in process_vm_rw_core This fixes the race in process_vm_core found by Oleg (see http://article.gmane.org/gmane.linux.kernel/1235667/ for details). This has been updated since I last sent it as the creation of the new mm_access() function did almost exactly the same thing as parts of the previous version of this patch did. In order to use mm_access() even when /proc isn't enabled, we move it to kernel/fork.c where other related process mm access functions already are. Signed-off-by: Chris Yeoh Signed-off-by: Linus Torvalds --- fs/proc/base.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'fs/proc/base.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index d9512bd03e6c..d4548dd49b02 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) -{ - struct mm_struct *mm; - int err; - - err = mutex_lock_killable(&task->signal->cred_guard_mutex); - if (err) - return ERR_PTR(err); - - mm = get_task_mm(task); - if (mm && mm != current->mm && - !ptrace_may_access(task, mode)) { - mmput(mm); - mm = ERR_PTR(-EACCES); - } - mutex_unlock(&task->signal->cred_guard_mutex); - - return mm; -} - struct mm_struct *mm_for_maps(struct task_struct *task) { return mm_access(task, PTRACE_MODE_READ); -- cgit v1.2.3