diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/Kconfig | 69 | ||||
-rw-r--r-- | fs/proc/array.c | 75 | ||||
-rw-r--r-- | fs/proc/base.c | 120 | ||||
-rw-r--r-- | fs/proc/generic.c | 51 | ||||
-rw-r--r-- | fs/proc/inode.c | 90 | ||||
-rw-r--r-- | fs/proc/internal.h | 10 | ||||
-rw-r--r-- | fs/proc/kcore.c | 10 | ||||
-rw-r--r-- | fs/proc/kmsg.c | 2 | ||||
-rw-r--r-- | fs/proc/nommu.c | 4 | ||||
-rw-r--r-- | fs/proc/proc_misc.c | 47 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 43 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 433 | ||||
-rw-r--r-- | fs/proc/proc_tty.c | 48 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 102 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 5 | ||||
-rw-r--r-- | fs/proc/vmcore.c | 6 |
16 files changed, 604 insertions, 511 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig new file mode 100644 index 000000000000..50f8f0600f06 --- /dev/null +++ b/fs/proc/Kconfig @@ -0,0 +1,69 @@ +config PROC_FS + bool "/proc file system support" if EMBEDDED + default y + help + This is a virtual file system providing information about the status + of the system. "Virtual" means that it doesn't take up any space on + your hard disk: the files are created on the fly by the kernel when + you try to access them. Also, you cannot read the files with older + version of the program less: you need to use more or cat. + + It's totally cool; for example, "cat /proc/interrupts" gives + information about what the different IRQs are used for at the moment + (there is a small number of Interrupt ReQuest lines in your computer + that are used by the attached devices to gain the CPU's attention -- + often a source of trouble if two devices are mistakenly configured + to use the same IRQ). The program procinfo to display some + information about your system gathered from the /proc file system. + + Before you can use the /proc file system, it has to be mounted, + meaning it has to be given a location in the directory hierarchy. + That location should be /proc. A command such as "mount -t proc proc + /proc" or the equivalent line in /etc/fstab does the job. + + The /proc file system is explained in the file + <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage + ("man 5 proc"). + + This option will enlarge your kernel by about 67 KB. Several + programs depend on this, so everyone should say Y here. + +config PROC_KCORE + bool "/proc/kcore support" if !ARM + depends on PROC_FS && MMU + +config PROC_VMCORE + bool "/proc/vmcore support (EXPERIMENTAL)" + depends on PROC_FS && CRASH_DUMP + default y + help + Exports the dump image of crashed kernel in ELF format. + +config PROC_SYSCTL + bool "Sysctl support (/proc/sys)" if EMBEDDED + depends on PROC_FS + select SYSCTL + default y + ---help--- + The sysctl interface provides a means of dynamically changing + certain kernel parameters and variables on the fly without requiring + a recompile of the kernel or reboot of the system. The primary + interface is through /proc/sys. If you say Y here a tree of + modifiable sysctl entries will be generated beneath the + /proc/sys directory. They are explained in the files + in <file:Documentation/sysctl/>. Note that enabling this + option will enlarge the kernel by at least 8 KB. + + As it is generally a good thing, you should say Y here unless + building a kernel for install/rescue disks or your system is very + limited in memory. + +config PROC_PAGE_MONITOR + default y + depends on PROC_FS && MMU + bool "Enable /proc page monitoring" if EMBEDDED + help + Various /proc files exist to monitor process memory utilization: + /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, + /proc/kpagecount, and /proc/kpageflags. Disabling these + interfaces will reduce the size of the kernel by approximately 4kb. diff --git a/fs/proc/array.c b/fs/proc/array.c index 797d775e0354..f4bc0e789539 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -80,16 +80,12 @@ #include <linux/delayacct.h> #include <linux/seq_file.h> #include <linux/pid_namespace.h> +#include <linux/tracehook.h> #include <asm/pgtable.h> #include <asm/processor.h> #include "internal.h" -/* Gcc optimizes away "strlen(x)" for constant x */ -#define ADDBUF(buffer, string) \ -do { memcpy(buffer, string, strlen(string)); \ - buffer += strlen(string); } while (0) - static inline void task_name(struct seq_file *m, struct task_struct *p) { int i; @@ -168,8 +164,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, rcu_read_lock(); ppid = pid_alive(p) ? task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; - tpid = pid_alive(p) && p->ptrace ? - task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0; + tpid = 0; + if (pid_alive(p)) { + struct task_struct *tracer = tracehook_tracer_task(p); + if (tracer) + tpid = task_pid_nr_ns(tracer, ns); + } seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" @@ -256,7 +256,6 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) sigemptyset(&ignored); sigemptyset(&caught); - rcu_read_lock(); if (lock_task_sighand(p, &flags)) { pending = p->pending.signal; shpending = p->signal->shared_pending.signal; @@ -267,7 +266,6 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; unlock_task_sighand(p, &flags); } - rcu_read_unlock(); seq_printf(m, "Threads:\t%d\n", num_threads); seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); @@ -332,65 +330,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, return 0; } -/* - * Use precise platform statistics if available: - */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -static cputime_t task_utime(struct task_struct *p) -{ - return p->utime; -} - -static cputime_t task_stime(struct task_struct *p) -{ - return p->stime; -} -#else -static cputime_t task_utime(struct task_struct *p) -{ - clock_t utime = cputime_to_clock_t(p->utime), - total = utime + cputime_to_clock_t(p->stime); - u64 temp; - - /* - * Use CFS's precise accounting: - */ - temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); - - if (total) { - temp *= utime; - do_div(temp, total); - } - utime = (clock_t)temp; - - p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); - return p->prev_utime; -} - -static cputime_t task_stime(struct task_struct *p) -{ - clock_t stime; - - /* - * Use CFS's precise accounting. (we subtract utime from - * the total, to make sure the total observed by userspace - * grows monotonically - apps rely on that): - */ - stime = nsec_to_clock_t(p->se.sum_exec_runtime) - - cputime_to_clock_t(task_utime(p)); - - if (stime >= 0) - p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); - - return p->prev_stime; -} -#endif - -static cputime_t task_gtime(struct task_struct *p) -{ - return p->gtime; -} - static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task, int whole) { diff --git a/fs/proc/base.c b/fs/proc/base.c index 3b455371e7ff..b5918ae8ca79 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -53,6 +53,7 @@ #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> +#include <linux/task_io_accounting_ops.h> #include <linux/init.h> #include <linux/capability.h> #include <linux/file.h> @@ -69,6 +70,7 @@ #include <linux/mount.h> #include <linux/security.h> #include <linux/ptrace.h> +#include <linux/tracehook.h> #include <linux/cgroup.h> #include <linux/cpuset.h> #include <linux/audit.h> @@ -146,9 +148,6 @@ static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, return count; } -int maps_protect; -EXPORT_SYMBOL(maps_protect); - static struct fs_struct *get_fs_struct(struct task_struct *task) { struct fs_struct *fs; @@ -162,7 +161,6 @@ static struct fs_struct *get_fs_struct(struct task_struct *task) static int get_nr_threads(struct task_struct *tsk) { - /* Must be called with the rcu_read_lock held */ unsigned long flags; int count = 0; @@ -231,10 +229,14 @@ static int check_mem_permission(struct task_struct *task) * If current is actively ptrace'ing, and would also be * permitted to freshly attach with ptrace now, permit it. */ - if (task->parent == current && (task->ptrace & PT_PTRACED) && - task_is_stopped_or_traced(task) && - ptrace_may_attach(task)) - return 0; + if (task_is_stopped_or_traced(task)) { + int match; + rcu_read_lock(); + match = (tracehook_tracer_task(task) == current); + rcu_read_unlock(); + if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) + return 0; + } /* * Noone else is allowed. @@ -251,7 +253,8 @@ struct mm_struct *mm_for_maps(struct task_struct *task) task_lock(task); if (task->mm != mm) goto out; - if (task->mm != current->mm && __ptrace_may_attach(task) < 0) + if (task->mm != current->mm && + __ptrace_may_access(task, PTRACE_MODE_READ) < 0) goto out; task_unlock(task); return mm; @@ -464,14 +467,10 @@ static int proc_pid_limits(struct task_struct *task, char *buffer) struct rlimit rlim[RLIM_NLIMITS]; - rcu_read_lock(); - if (!lock_task_sighand(task,&flags)) { - rcu_read_unlock(); + if (!lock_task_sighand(task, &flags)) return 0; - } memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); unlock_task_sighand(task, &flags); - rcu_read_unlock(); /* * print the file header @@ -503,6 +502,26 @@ static int proc_pid_limits(struct task_struct *task, char *buffer) return count; } +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK +static int proc_pid_syscall(struct task_struct *task, char *buffer) +{ + long nr; + unsigned long args[6], sp, pc; + + if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) + return sprintf(buffer, "running\n"); + + if (nr < 0) + return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); + + return sprintf(buffer, + "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + nr, + args[0], args[1], args[2], args[3], args[4], args[5], + sp, pc); +} +#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ + /************************************************************************/ /* Here the fs part begins */ /************************************************************************/ @@ -518,7 +537,7 @@ static int proc_fd_access_allowed(struct inode *inode) */ task = get_proc_task(inode); if (task) { - allowed = ptrace_may_attach(task); + allowed = ptrace_may_access(task, PTRACE_MODE_READ); put_task_struct(task); } return allowed; @@ -904,7 +923,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, if (!task) goto out_no_task; - if (!ptrace_may_attach(task)) + if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out; ret = -ENOMEM; @@ -1833,8 +1852,7 @@ static const struct file_operations proc_fd_operations = { * /proc/pid/fd needs a special permission handler so that a process can still * access /proc/self/fd after it has executed a setuid(). */ -static int proc_fd_permission(struct inode *inode, int mask, - struct nameidata *nd) +static int proc_fd_permission(struct inode *inode, int mask) { int rv; @@ -2375,29 +2393,54 @@ static int proc_base_fill_cache(struct file *filp, void *dirent, } #ifdef CONFIG_TASK_IO_ACCOUNTING -static int proc_pid_io_accounting(struct task_struct *task, char *buffer) +static int do_io_accounting(struct task_struct *task, char *buffer, int whole) { + struct task_io_accounting acct = task->ioac; + unsigned long flags; + + if (whole && lock_task_sighand(task, &flags)) { + struct task_struct *t = task; + + task_io_accounting_add(&acct, &task->signal->ioac); + while_each_thread(task, t) + task_io_accounting_add(&acct, &t->ioac); + + unlock_task_sighand(task, &flags); + } return sprintf(buffer, -#ifdef CONFIG_TASK_XACCT "rchar: %llu\n" "wchar: %llu\n" "syscr: %llu\n" "syscw: %llu\n" -#endif "read_bytes: %llu\n" "write_bytes: %llu\n" "cancelled_write_bytes: %llu\n", -#ifdef CONFIG_TASK_XACCT - (unsigned long long)task->rchar, - (unsigned long long)task->wchar, - (unsigned long long)task->syscr, - (unsigned long long)task->syscw, -#endif - (unsigned long long)task->ioac.read_bytes, - (unsigned long long)task->ioac.write_bytes, - (unsigned long long)task->ioac.cancelled_write_bytes); + (unsigned long long)acct.rchar, + (unsigned long long)acct.wchar, + (unsigned long long)acct.syscr, + (unsigned long long)acct.syscw, + (unsigned long long)acct.read_bytes, + (unsigned long long)acct.write_bytes, + (unsigned long long)acct.cancelled_write_bytes); +} + +static int proc_tid_io_accounting(struct task_struct *task, char *buffer) +{ + return do_io_accounting(task, buffer, 0); +} + +static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) +{ + return do_io_accounting(task, buffer, 1); +} +#endif /* CONFIG_TASK_IO_ACCOUNTING */ + +static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + seq_printf(m, "%08x\n", task->personality); + return 0; } -#endif /* * Thread groups @@ -2415,10 +2458,14 @@ static const struct pid_entry tgid_base_stuff[] = { REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), ONE("status", S_IRUGO, pid_status), + ONE("personality", S_IRUSR, pid_personality), INF("limits", S_IRUSR, pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, pid_sched), #endif +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK + INF("syscall", S_IRUSR, pid_syscall), +#endif INF("cmdline", S_IRUGO, pid_cmdline), ONE("stat", S_IRUGO, tgid_stat), ONE("statm", S_IRUGO, pid_statm), @@ -2469,7 +2516,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUGO, pid_io_accounting), + INF("io", S_IRUGO, tgid_io_accounting), #endif }; @@ -2747,10 +2794,14 @@ static const struct pid_entry tid_base_stuff[] = { REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), ONE("status", S_IRUGO, pid_status), + ONE("personality", S_IRUSR, pid_personality), INF("limits", S_IRUSR, pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, pid_sched), #endif +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK + INF("syscall", S_IRUSR, pid_syscall), +#endif INF("cmdline", S_IRUGO, pid_cmdline), ONE("stat", S_IRUGO, tid_stat), ONE("statm", S_IRUGO, pid_statm), @@ -2796,6 +2847,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), #endif +#ifdef CONFIG_TASK_IO_ACCOUNTING + INF("io", S_IRUGO, tid_io_accounting), +#endif }; static int proc_tid_base_readdir(struct file * filp, @@ -3035,9 +3089,7 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct generic_fillattr(inode, stat); if (p) { - rcu_read_lock(); stat->nlink += get_nr_threads(p); - rcu_read_unlock(); put_task_struct(p); } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 43e54e86cefd..7821589a17d5 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -300,10 +300,10 @@ out: return rtn; } -static DEFINE_IDR(proc_inum_idr); +static DEFINE_IDA(proc_inum_ida); static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ -#define PROC_DYNAMIC_FIRST 0xF0000000UL +#define PROC_DYNAMIC_FIRST 0xF0000000U /* * Return an inode number between PROC_DYNAMIC_FIRST and @@ -311,36 +311,34 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ */ static unsigned int get_inode_number(void) { - int i, inum = 0; + unsigned int i; int error; retry: - if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) + if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0) return 0; spin_lock(&proc_inum_lock); - error = idr_get_new(&proc_inum_idr, NULL, &i); + error = ida_get_new(&proc_inum_ida, &i); spin_unlock(&proc_inum_lock); if (error == -EAGAIN) goto retry; else if (error) return 0; - inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; - - /* inum will never be more than 0xf0ffffff, so no check - * for overflow. - */ - - return inum; + if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { + spin_lock(&proc_inum_lock); + ida_remove(&proc_inum_ida, i); + spin_unlock(&proc_inum_lock); + return 0; + } + return PROC_DYNAMIC_FIRST + i; } static void release_inode_number(unsigned int inum) { - int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; - spin_lock(&proc_inum_lock); - idr_remove(&proc_inum_idr, id); + ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); spin_unlock(&proc_inum_lock); } @@ -549,8 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp for (tmp = dir->subdir; tmp; tmp = tmp->next) if (strcmp(tmp->name, dp->name) == 0) { - printk(KERN_WARNING "proc_dir_entry '%s' already " - "registered\n", dp->name); + printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", + dir->name, dp->name); dump_stack(); break; } @@ -597,6 +595,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, ent->pde_users = 0; spin_lock_init(&ent->pde_unload_lock); ent->pde_unload_completion = NULL; + INIT_LIST_HEAD(&ent->pde_openers); out: return ent; } @@ -789,15 +788,25 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) spin_unlock(&de->pde_unload_lock); continue_removing: + spin_lock(&de->pde_unload_lock); + while (!list_empty(&de->pde_openers)) { + struct pde_opener *pdeo; + + pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); + list_del(&pdeo->lh); + spin_unlock(&de->pde_unload_lock); + pdeo->release(pdeo->inode, pdeo->file); + kfree(pdeo); + spin_lock(&de->pde_unload_lock); + } + spin_unlock(&de->pde_unload_lock); + if (S_ISDIR(de->mode)) parent->nlink--; de->nlink = 0; - if (de->subdir) { - printk(KERN_WARNING "%s: removing non-empty directory " + WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory " "'%s/%s', leaking at least '%s'\n", __func__, de->parent->name, de->name, de->subdir->name); - WARN_ON(1); - } if (atomic_dec_and_test(&de->count)) free_proc_entry(de); } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index b08d10017911..c6b4fa7e3b49 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -17,6 +17,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/smp_lock.h> +#include <linux/sysctl.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -65,6 +66,8 @@ static void proc_delete_inode(struct inode *inode) module_put(de->owner); de_put(de); } + if (PROC_I(inode)->sysctl) + sysctl_head_put(PROC_I(inode)->sysctl); clear_inode(inode); } @@ -84,6 +87,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb) ei->fd = 0; ei->op.proc_get_link = NULL; ei->pde = NULL; + ei->sysctl = NULL; + ei->sysctl_entry = NULL; inode = &ei->vfs_inode; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; return inode; @@ -94,7 +99,7 @@ static void proc_destroy_inode(struct inode *inode) kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct proc_inode *ei = (struct proc_inode *) foo; @@ -111,27 +116,25 @@ int __init proc_init_inodecache(void) return 0; } -static int proc_remount(struct super_block *sb, int *flags, char *data) -{ - *flags |= MS_NODIRATIME; - return 0; -} - static const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, .drop_inode = generic_delete_inode, .delete_inode = proc_delete_inode, .statfs = simple_statfs, - .remount_fs = proc_remount, }; -static void pde_users_dec(struct proc_dir_entry *pde) +static void __pde_users_dec(struct proc_dir_entry *pde) { - spin_lock(&pde->pde_unload_lock); pde->pde_users--; if (pde->pde_unload_completion && pde->pde_users == 0) complete(pde->pde_unload_completion); +} + +static void pde_users_dec(struct proc_dir_entry *pde) +{ + spin_lock(&pde->pde_unload_lock); + __pde_users_dec(pde); spin_unlock(&pde->pde_unload_lock); } @@ -318,36 +321,97 @@ static int proc_reg_open(struct inode *inode, struct file *file) struct proc_dir_entry *pde = PDE(inode); int rv = 0; int (*open)(struct inode *, struct file *); + int (*release)(struct inode *, struct file *); + struct pde_opener *pdeo; + + /* + * What for, you ask? Well, we can have open, rmmod, remove_proc_entry + * sequence. ->release won't be called because ->proc_fops will be + * cleared. Depending on complexity of ->release, consequences vary. + * + * We can't wait for mercy when close will be done for real, it's + * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release + * by hand in remove_proc_entry(). For this, save opener's credentials + * for later. + */ + pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); + if (!pdeo) + return -ENOMEM; spin_lock(&pde->pde_unload_lock); if (!pde->proc_fops) { spin_unlock(&pde->pde_unload_lock); - return rv; + kfree(pdeo); + return -EINVAL; } pde->pde_users++; open = pde->proc_fops->open; + release = pde->proc_fops->release; spin_unlock(&pde->pde_unload_lock); if (open) rv = open(inode, file); - pde_users_dec(pde); + spin_lock(&pde->pde_unload_lock); + if (rv == 0 && release) { + /* To know what to release. */ + pdeo->inode = inode; + pdeo->file = file; + /* Strictly for "too late" ->release in proc_reg_release(). */ + pdeo->release = release; + list_add(&pdeo->lh, &pde->pde_openers); + } else + kfree(pdeo); + __pde_users_dec(pde); + spin_unlock(&pde->pde_unload_lock); return rv; } +static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde, + struct inode *inode, struct file *file) +{ + struct pde_opener *pdeo; + + list_for_each_entry(pdeo, &pde->pde_openers, lh) { + if (pdeo->inode == inode && pdeo->file == file) + return pdeo; + } + return NULL; +} + static int proc_reg_release(struct inode *inode, struct file *file) { struct proc_dir_entry *pde = PDE(inode); int rv = 0; int (*release)(struct inode *, struct file *); + struct pde_opener *pdeo; spin_lock(&pde->pde_unload_lock); + pdeo = find_pde_opener(pde, inode, file); if (!pde->proc_fops) { - spin_unlock(&pde->pde_unload_lock); + /* + * Can't simply exit, __fput() will think that everything is OK, + * and move on to freeing struct file. remove_proc_entry() will + * find slacker in opener's list and will try to do non-trivial + * things with struct file. Therefore, remove opener from list. + * + * But if opener is removed from list, who will ->release it? + */ + if (pdeo) { + list_del(&pdeo->lh); + spin_unlock(&pde->pde_unload_lock); + rv = pdeo->release(inode, file); + kfree(pdeo); + } else + spin_unlock(&pde->pde_unload_lock); return rv; } pde->pde_users++; release = pde->proc_fops->release; + if (pdeo) { + list_del(&pdeo->lh); + kfree(pdeo); + } spin_unlock(&pde->pde_unload_lock); if (release) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 28cbca805905..3bfb7b8747b3 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -45,8 +45,6 @@ do { \ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); #endif -extern int maps_protect; - extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, @@ -63,6 +61,7 @@ extern const struct file_operations proc_smaps_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_net_operations; +extern const struct file_operations proc_kmsg_operations; extern const struct inode_operations proc_net_inode_operations; void free_proc_entry(struct proc_dir_entry *de); @@ -88,3 +87,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, struct dentry *dentry); int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, filldir_t filldir); + +struct pde_opener { + struct inode *inode; + struct file *file; + int (*release)(struct inode *, struct file *); + struct list_head lh; +}; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index e78c81fcf547..c2370c76fb71 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -23,6 +23,10 @@ #define CORE_STR "CORE" +#ifndef ELF_CORE_EFLAGS +#define ELF_CORE_EFLAGS 0 +#endif + static int open_kcore(struct inode * inode, struct file * filp) { return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; @@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) elf->e_entry = 0; elf->e_phoff = sizeof(struct elfhdr); elf->e_shoff = 0; -#if defined(CONFIG_H8300) - elf->e_flags = ELF_FLAGS; -#else - elf->e_flags = 0; -#endif + elf->e_flags = ELF_CORE_EFLAGS; elf->e_ehsize = sizeof(struct elfhdr); elf->e_phentsize= sizeof(struct elf_phdr); elf->e_phnum = nphdr; diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index ff3b90b56e9d..9fd5df3f40ce 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -15,6 +15,8 @@ #include <asm/uaccess.h> #include <asm/io.h> +#include "internal.h" + extern wait_queue_head_t log_wait; extern int do_syslog(int type, char __user *bug, int count); diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 79ecd281d2cb..3f87d2632947 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -52,14 +52,14 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) } seq_printf(m, - "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", vma->vm_start, vma->vm_end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', - vma->vm_pgoff << PAGE_SHIFT, + ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); if (file) { diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 7e277f2ad466..b675a49c1823 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -24,6 +24,7 @@ #include <linux/tty.h> #include <linux/string.h> #include <linux/mman.h> +#include <linux/quicklist.h> #include <linux/proc_fs.h> #include <linux/ioport.h> #include <linux/mm.h> @@ -67,7 +68,6 @@ extern int get_hardware_list(char *); extern int get_stram_list(char *); extern int get_exec_domain_list(char *); -extern int get_dma_list(char *); static int proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof, int len) @@ -123,6 +123,11 @@ static int uptime_read_proc(char *page, char **start, off_t off, return proc_calc_metrics(page, start, off, count, eof, len); } +int __attribute__((weak)) arch_report_meminfo(char *page) +{ + return 0; +} + static int meminfo_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -177,6 +182,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "SReclaimable: %8lu kB\n" "SUnreclaim: %8lu kB\n" "PageTables: %8lu kB\n" +#ifdef CONFIG_QUICKLIST + "Quicklists: %8lu kB\n" +#endif "NFS_Unstable: %8lu kB\n" "Bounce: %8lu kB\n" "WritebackTmp: %8lu kB\n" @@ -209,6 +217,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, K(global_page_state(NR_SLAB_RECLAIMABLE)), K(global_page_state(NR_SLAB_UNRECLAIMABLE)), K(global_page_state(NR_PAGETABLE)), +#ifdef CONFIG_QUICKLIST + K(quicklist_total_size()), +#endif K(global_page_state(NR_UNSTABLE_NFS)), K(global_page_state(NR_BOUNCE)), K(global_page_state(NR_WRITEBACK_TEMP)), @@ -221,11 +232,12 @@ static int meminfo_read_proc(char *page, char **start, off_t off, len += hugetlb_report_meminfo(page + len); + len += arch_report_meminfo(page + len); + return proc_calc_metrics(page, start, off, count, eof, len); #undef K } -extern const struct seq_operations fragmentation_op; static int fragmentation_open(struct inode *inode, struct file *file) { (void)inode; @@ -239,7 +251,6 @@ static const struct file_operations fragmentation_file_operations = { .release = seq_release, }; -extern const struct seq_operations pagetypeinfo_op; static int pagetypeinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &pagetypeinfo_op); @@ -252,7 +263,6 @@ static const struct file_operations pagetypeinfo_file_ops = { .release = seq_release, }; -extern const struct seq_operations zoneinfo_op; static int zoneinfo_open(struct inode *inode, struct file *file) { return seq_open(file, &zoneinfo_op); @@ -349,7 +359,6 @@ static const struct file_operations proc_devinfo_operations = { .release = seq_release, }; -extern const struct seq_operations vmstat_op; static int vmstat_open(struct inode *inode, struct file *file) { return seq_open(file, &vmstat_op); @@ -461,17 +470,35 @@ static const struct file_operations proc_slabstats_operations = { #ifdef CONFIG_MMU static int vmalloc_open(struct inode *inode, struct file *file) { - return seq_open(file, &vmalloc_op); + unsigned int *ptr = NULL; + int ret; + + if (NUMA_BUILD) + ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); + ret = seq_open(file, &vmalloc_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = ptr; + } else + kfree(ptr); + return ret; } static const struct file_operations proc_vmalloc_operations = { .open = vmalloc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; #endif +#ifndef arch_irq_stat_cpu +#define arch_irq_stat_cpu(cpu) 0 +#endif +#ifndef arch_irq_stat +#define arch_irq_stat() 0 +#endif + static int show_stat(struct seq_file *p, void *v) { int i; @@ -509,7 +536,9 @@ static int show_stat(struct seq_file *p, void *v) sum += temp; per_irq_sum[j] += temp; } + sum += arch_irq_stat_cpu(i); } + sum += arch_irq_stat(); seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", (unsigned long long)cputime64_to_clock_t(user), @@ -654,6 +683,7 @@ static int cmdline_read_proc(char *page, char **start, off_t off, return proc_calc_metrics(page, start, off, count, eof, len); } +#ifdef CONFIG_FILE_LOCKING static int locks_open(struct inode *inode, struct file *filp) { return seq_open(filp, &locks_seq_operations); @@ -665,6 +695,7 @@ static const struct file_operations proc_locks_operations = { .llseek = seq_lseek, .release = seq_release, }; +#endif /* CONFIG_FILE_LOCKING */ static int execdomains_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -858,7 +889,9 @@ void __init proc_misc_init(void) #ifdef CONFIG_PRINTK proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); #endif +#ifdef CONFIG_FILE_LOCKING proc_create("locks", 0, NULL, &proc_locks_operations); +#endif proc_create("devices", 0, NULL, &proc_devinfo_operations); proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 83f357b30d71..7bc296f424ae 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -27,6 +27,11 @@ #include "internal.h" +static struct net *get_proc_net(const struct inode *inode) +{ + return maybe_get_net(PDE_NET(PDE(inode))); +} + int seq_open_net(struct inode *ino, struct file *f, const struct seq_operations *ops, int size) { @@ -51,6 +56,30 @@ int seq_open_net(struct inode *ino, struct file *f, } EXPORT_SYMBOL_GPL(seq_open_net); +int single_open_net(struct inode *inode, struct file *file, + int (*show)(struct seq_file *, void *)) +{ + int err; + struct net *net; + + err = -ENXIO; + net = get_proc_net(inode); + if (net == NULL) + goto err_net; + + err = single_open(file, show, net); + if (err < 0) + goto err_open; + + return 0; + +err_open: + put_net(net); +err_net: + return err; +} +EXPORT_SYMBOL_GPL(single_open_net); + int seq_release_net(struct inode *ino, struct file *f) { struct seq_file *seq; @@ -63,6 +92,14 @@ int seq_release_net(struct inode *ino, struct file *f) } EXPORT_SYMBOL_GPL(seq_release_net); +int single_release_net(struct inode *ino, struct file *f) +{ + struct seq_file *seq = f->private_data; + put_net(seq->private); + return single_release(ino, f); +} +EXPORT_SYMBOL_GPL(single_release_net); + static struct net *get_proc_task_net(struct inode *dir) { struct task_struct *task; @@ -153,12 +190,6 @@ void proc_net_remove(struct net *net, const char *name) } EXPORT_SYMBOL_GPL(proc_net_remove); -struct net *get_proc_net(const struct inode *inode) -{ - return maybe_get_net(PDE_NET(PDE(inode))); -} -EXPORT_SYMBOL_GPL(get_proc_net); - static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5acc001d49f6..945a81043ba2 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -10,149 +10,110 @@ static struct dentry_operations proc_sys_dentry_operations; static const struct file_operations proc_sys_file_operations; static const struct inode_operations proc_sys_inode_operations; +static const struct file_operations proc_sys_dir_file_operations; +static const struct inode_operations proc_sys_dir_operations; -static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table) -{ - /* Refresh the cached information bits in the inode */ - if (table) { - inode->i_uid = 0; - inode->i_gid = 0; - inode->i_mode = table->mode; - if (table->proc_handler) { - inode->i_mode |= S_IFREG; - inode->i_nlink = 1; - } else { - inode->i_mode |= S_IFDIR; - inode->i_nlink = 0; /* It is too hard to figure out */ - } - } -} - -static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table) +static struct inode *proc_sys_make_inode(struct super_block *sb, + struct ctl_table_header *head, struct ctl_table *table) { struct inode *inode; - struct proc_inode *dir_ei, *ei; - int depth; + struct proc_inode *ei; - inode = new_inode(dir->i_sb); + inode = new_inode(sb); if (!inode) goto out; - /* A directory is always one deeper than it's parent */ - dir_ei = PROC_I(dir); - depth = dir_ei->fd + 1; - + sysctl_head_get(head); ei = PROC_I(inode); - ei->fd = depth; + ei->sysctl = head; + ei->sysctl_entry = table; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_op = &proc_sys_inode_operations; - inode->i_fop = &proc_sys_file_operations; inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */ - proc_sys_refresh_inode(inode, table); + inode->i_mode = table->mode; + if (!table->child) { + inode->i_mode |= S_IFREG; + inode->i_op = &proc_sys_inode_operations; + inode->i_fop = &proc_sys_file_operations; + } else { + inode->i_mode |= S_IFDIR; + inode->i_nlink = 0; + inode->i_op = &proc_sys_dir_operations; + inode->i_fop = &proc_sys_dir_file_operations; + } out: return inode; } -static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth) -{ - for (;;) { - struct proc_inode *ei; - - ei = PROC_I(dentry->d_inode); - if (ei->fd == depth) - break; /* found */ - - dentry = dentry->d_parent; - } - return dentry; -} - -static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table, - struct qstr *name) +static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) { int len; - for ( ; table->ctl_name || table->procname; table++) { + for ( ; p->ctl_name || p->procname; p++) { - if (!table->procname) + if (!p->procname) continue; - len = strlen(table->procname); + len = strlen(p->procname); if (len != name->len) continue; - if (memcmp(table->procname, name->name, len) != 0) + if (memcmp(p->procname, name->name, len) != 0) continue; /* I have a match */ - return table; + return p; } return NULL; } -static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry, - struct ctl_table *table) +static struct ctl_table_header *grab_header(struct inode *inode) { - struct dentry *ancestor; - struct proc_inode *ei; - int depth, i; + if (PROC_I(inode)->sysctl) + return sysctl_head_grab(PROC_I(inode)->sysctl); + else + return sysctl_head_next(NULL); +} - ei = PROC_I(dentry->d_inode); - depth = ei->fd; +static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct ctl_table_header *head = grab_header(dir); + struct ctl_table *table = PROC_I(dir)->sysctl_entry; + struct ctl_table_header *h = NULL; + struct qstr *name = &dentry->d_name; + struct ctl_table *p; + struct inode *inode; + struct dentry *err = ERR_PTR(-ENOENT); - if (depth == 0) - return table; + if (IS_ERR(head)) + return ERR_CAST(head); - for (i = 1; table && (i <= depth); i++) { - ancestor = proc_sys_ancestor(dentry, i); - table = proc_sys_lookup_table_one(table, &ancestor->d_name); - if (table) - table = table->child; + if (table && !table->child) { + WARN_ON(1); + goto out; } - return table; -} -static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent, - struct qstr *name, - struct ctl_table *table) -{ - table = proc_sys_lookup_table(dparent, table); - if (table) - table = proc_sys_lookup_table_one(table, name); - return table; -} + table = table ? table->child : head->ctl_table; -static struct ctl_table *do_proc_sys_lookup(struct dentry *parent, - struct qstr *name, - struct ctl_table_header **ptr) -{ - struct ctl_table_header *head; - struct ctl_table *table = NULL; - - for (head = sysctl_head_next(NULL); head; - head = sysctl_head_next(head)) { - table = proc_sys_lookup_entry(parent, name, head->ctl_table); - if (table) - break; + p = find_in_table(table, name); + if (!p) { + for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { + if (h->attached_to != table) + continue; + p = find_in_table(h->attached_by, name); + if (p) + break; + } } - *ptr = head; - return table; -} - -static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - struct ctl_table_header *head; - struct inode *inode; - struct dentry *err; - struct ctl_table *table; - err = ERR_PTR(-ENOENT); - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); - if (!table) + if (!p) goto out; err = ERR_PTR(-ENOMEM); - inode = proc_sys_make_inode(dir, table); + inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); + if (h) + sysctl_head_finish(h); + if (!inode) goto out; @@ -168,22 +129,14 @@ out: static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, size_t count, loff_t *ppos, int write) { - struct dentry *dentry = filp->f_dentry; - struct ctl_table_header *head; - struct ctl_table *table; + struct inode *inode = filp->f_path.dentry->d_inode; + struct ctl_table_header *head = grab_header(inode); + struct ctl_table *table = PROC_I(inode)->sysctl_entry; ssize_t error; size_t res; - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); - /* Has the sysctl entry disappeared on us? */ - error = -ENOENT; - if (!table) - goto out; - - /* Has the sysctl entry been replaced by a directory? */ - error = -EISDIR; - if (!table->proc_handler) - goto out; + if (IS_ERR(head)) + return PTR_ERR(head); /* * At this point we know that the sysctl was not unregistered @@ -193,6 +146,11 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) goto out; + /* if that can happen at all, it should be -EINVAL, not -EISDIR */ + error = -EINVAL; + if (!table->proc_handler) + goto out; + /* careful: calling conventions are nasty here */ res = count; error = table->proc_handler(table, write, filp, buf, &res, ppos); @@ -218,82 +176,86 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf, static int proc_sys_fill_cache(struct file *filp, void *dirent, - filldir_t filldir, struct ctl_table *table) + filldir_t filldir, + struct ctl_table_header *head, + struct ctl_table *table) { - struct ctl_table_header *head; - struct ctl_table *child_table = NULL; struct dentry *child, *dir = filp->f_path.dentry; struct inode *inode; struct qstr qname; ino_t ino = 0; unsigned type = DT_UNKNOWN; - int ret; qname.name = table->procname; qname.len = strlen(table->procname); qname.hash = full_name_hash(qname.name, qname.len); - /* Suppress duplicates. - * Only fill a directory entry if it is the value that - * an ordinary lookup of that name returns. Hide all - * others. - * - * If we ever cache this translation in the dcache - * I should do a dcache lookup first. But for now - * it is just simpler not to. - */ - ret = 0; - child_table = do_proc_sys_lookup(dir, &qname, &head); - sysctl_head_finish(head); - if (child_table != table) - return 0; - child = d_lookup(dir, &qname); if (!child) { - struct dentry *new; - new = d_alloc(dir, &qname); - if (new) { - inode = proc_sys_make_inode(dir->d_inode, table); - if (!inode) - child = ERR_PTR(-ENOMEM); - else { - new->d_op = &proc_sys_dentry_operations; - d_add(new, inode); + child = d_alloc(dir, &qname); + if (child) { + inode = proc_sys_make_inode(dir->d_sb, head, table); + if (!inode) { + dput(child); + return -ENOMEM; + } else { + child->d_op = &proc_sys_dentry_operations; + d_add(child, inode); } - if (child) - dput(new); - else - child = new; + } else { + return -ENOMEM; } } - if (!child || IS_ERR(child) || !child->d_inode) - goto end_instantiate; inode = child->d_inode; - if (inode) { - ino = inode->i_ino; - type = inode->i_mode >> 12; - } + ino = inode->i_ino; + type = inode->i_mode >> 12; dput(child); -end_instantiate: - if (!ino) - ino= find_inode_number(dir, &qname); - if (!ino) - ino = 1; - return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); + return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); +} + +static int scan(struct ctl_table_header *head, ctl_table *table, + unsigned long *pos, struct file *file, + void *dirent, filldir_t filldir) +{ + + for (; table->ctl_name || table->procname; table++, (*pos)++) { + int res; + + /* Can't do anything without a proc name */ + if (!table->procname) + continue; + + if (*pos < file->f_pos) + continue; + + res = proc_sys_fill_cache(file, dirent, filldir, head, table); + if (res) + return res; + + file->f_pos = *pos + 1; + } + return 0; } static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct dentry *dentry = filp->f_dentry; + struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; - struct ctl_table_header *head = NULL; - struct ctl_table *table; + struct ctl_table_header *head = grab_header(inode); + struct ctl_table *table = PROC_I(inode)->sysctl_entry; + struct ctl_table_header *h = NULL; unsigned long pos; - int ret; + int ret = -EINVAL; - ret = -ENOTDIR; - if (!S_ISDIR(inode->i_mode)) + if (IS_ERR(head)) + return PTR_ERR(head); + + if (table && !table->child) { + WARN_ON(1); goto out; + } + + table = table ? table->child : head->ctl_table; ret = 0; /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ @@ -311,30 +273,17 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) } pos = 2; - /* - Find each instance of the directory - * - Read all entries in each instance - * - Before returning an entry to user space lookup the entry - * by name and if I find a different entry don't return - * this one because it means it is a buried dup. - * For sysctl this should only happen for directory entries. - */ - for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) { - table = proc_sys_lookup_table(dentry, head->ctl_table); + ret = scan(head, table, &pos, filp, dirent, filldir); + if (ret) + goto out; - if (!table) + for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { + if (h->attached_to != table) continue; - - for (; table->ctl_name || table->procname; table++, pos++) { - /* Can't do anything without a proc name */ - if (!table->procname) - continue; - - if (pos < filp->f_pos) - continue; - - if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0) - goto out; - filp->f_pos = pos + 1; + ret = scan(h, h->attached_by, &pos, filp, dirent, filldir); + if (ret) { + sysctl_head_finish(h); + break; } } ret = 1; @@ -343,53 +292,24 @@ out: return ret; } -static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd) +static int proc_sys_permission(struct inode *inode, int mask) { /* * sysctl entries that are not writeable, * are _NOT_ writeable, capabilities or not. */ - struct ctl_table_header *head; - struct ctl_table *table; - struct dentry *dentry; - int mode; - int depth; + struct ctl_table_header *head = grab_header(inode); + struct ctl_table *table = PROC_I(inode)->sysctl_entry; int error; - head = NULL; - depth = PROC_I(inode)->fd; - - /* First check the cached permissions, in case we don't have - * enough information to lookup the sysctl table entry. - */ - error = -EACCES; - mode = inode->i_mode; - - if (current->euid == 0) - mode >>= 6; - else if (in_group_p(0)) - mode >>= 3; - - if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask) - error = 0; - - /* If we can't get a sysctl table entry the permission - * checks on the cached mode will have to be enough. - */ - if (!nd || !depth) - goto out; - - dentry = nd->path.dentry; - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); + if (IS_ERR(head)) + return PTR_ERR(head); - /* If the entry does not exist deny permission */ - error = -EACCES; - if (!table) - goto out; + if (!table) /* global root - r-xr-xr-x */ + error = mask & MAY_WRITE ? -EACCES : 0; + else /* Use the permissions on the sysctl table entry */ + error = sysctl_perm(head->root, table, mask); - /* Use the permissions on the sysctl table entry */ - error = sysctl_perm(head->root, table, mask); -out: sysctl_head_finish(head); return error; } @@ -409,42 +329,79 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) return error; } -/* I'm lazy and don't distinguish between files and directories, - * until access time. - */ +static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct ctl_table_header *head = grab_header(inode); + struct ctl_table *table = PROC_I(inode)->sysctl_entry; + + if (IS_ERR(head)) + return PTR_ERR(head); + + generic_fillattr(inode, stat); + if (table) + stat->mode = (stat->mode & S_IFMT) | table->mode; + + sysctl_head_finish(head); + return 0; +} + static const struct file_operations proc_sys_file_operations = { .read = proc_sys_read, .write = proc_sys_write, +}; + +static const struct file_operations proc_sys_dir_file_operations = { .readdir = proc_sys_readdir, }; static const struct inode_operations proc_sys_inode_operations = { + .permission = proc_sys_permission, + .setattr = proc_sys_setattr, + .getattr = proc_sys_getattr, +}; + +static const struct inode_operations proc_sys_dir_operations = { .lookup = proc_sys_lookup, .permission = proc_sys_permission, .setattr = proc_sys_setattr, + .getattr = proc_sys_getattr, }; static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct ctl_table_header *head; - struct ctl_table *table; - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); - proc_sys_refresh_inode(dentry->d_inode, table); - sysctl_head_finish(head); - return !!table; + return !PROC_I(dentry->d_inode)->sysctl->unregistering; +} + +static int proc_sys_delete(struct dentry *dentry) +{ + return !!PROC_I(dentry->d_inode)->sysctl->unregistering; +} + +static int proc_sys_compare(struct dentry *dir, struct qstr *qstr, + struct qstr *name) +{ + struct dentry *dentry = container_of(qstr, struct dentry, d_name); + if (qstr->len != name->len) + return 1; + if (memcmp(qstr->name, name->name, name->len)) + return 1; + return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl); } static struct dentry_operations proc_sys_dentry_operations = { .d_revalidate = proc_sys_revalidate, + .d_delete = proc_sys_delete, + .d_compare = proc_sys_compare, }; -static struct proc_dir_entry *proc_sys_root; - int proc_sys_init(void) { + struct proc_dir_entry *proc_sys_root; + proc_sys_root = proc_mkdir("sys", NULL); - proc_sys_root->proc_iops = &proc_sys_inode_operations; - proc_sys_root->proc_fops = &proc_sys_file_operations; + proc_sys_root->proc_iops = &proc_sys_dir_operations; + proc_sys_root->proc_fops = &proc_sys_dir_file_operations; proc_sys_root->nlink = 0; return 0; } diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 21f490f5d65c..d153946d6d15 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -136,54 +136,6 @@ static const struct file_operations proc_tty_drivers_operations = { .release = seq_release, }; -static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) -{ - return (*pos < NR_LDISCS) ? pos : NULL; -} - -static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos) -{ - (*pos)++; - return (*pos < NR_LDISCS) ? pos : NULL; -} - -static void tty_ldiscs_seq_stop(struct seq_file *m, void *v) -{ -} - -static int tty_ldiscs_seq_show(struct seq_file *m, void *v) -{ - int i = *(loff_t *)v; - struct tty_ldisc *ld; - - ld = tty_ldisc_get(i); - if (ld == NULL) - return 0; - seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i); - tty_ldisc_put(i); - return 0; -} - -static const struct seq_operations tty_ldiscs_seq_ops = { - .start = tty_ldiscs_seq_start, - .next = tty_ldiscs_seq_next, - .stop = tty_ldiscs_seq_stop, - .show = tty_ldiscs_seq_show, -}; - -static int proc_tty_ldiscs_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &tty_ldiscs_seq_ops); -} - -static const struct file_operations tty_ldiscs_proc_fops = { - .owner = THIS_MODULE, - .open = proc_tty_ldiscs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - /* * This function is called by tty_register_driver() to handle * registering the driver's /proc handler into /proc/tty/driver/<foo> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ab8ccc9d14ff..4806830ea2a1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -210,23 +210,20 @@ static int show_map(struct seq_file *m, void *v) dev_t dev = 0; int len; - if (maps_protect && !ptrace_may_attach(task)) - return -EACCES; - if (file) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; dev = inode->i_sb->s_dev; ino = inode->i_ino; } - seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", + seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", vma->vm_start, vma->vm_end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', - vma->vm_pgoff << PAGE_SHIFT, + ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); /* @@ -476,10 +473,10 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, return -ESRCH; mm = get_task_mm(task); if (mm) { - static struct mm_walk clear_refs_walk; - memset(&clear_refs_walk, 0, sizeof(clear_refs_walk)); - clear_refs_walk.pmd_entry = clear_refs_pte_range; - clear_refs_walk.mm = mm; + struct mm_walk clear_refs_walk = { + .pmd_entry = clear_refs_pte_range, + .mm = mm, + }; down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { clear_refs_walk.private = vma; @@ -602,11 +599,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, return err; } -static struct mm_walk pagemap_walk = { - .pmd_entry = pagemap_pte_range, - .pte_hole = pagemap_pte_hole -}; - /* * /proc/pid/pagemap - an array mapping virtual pages to pfns * @@ -641,12 +633,17 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, struct pagemapread pm; int pagecount; int ret = -ESRCH; + struct mm_walk pagemap_walk = {}; + unsigned long src; + unsigned long svpfn; + unsigned long start_vaddr; + unsigned long end_vaddr; if (!task) goto out; ret = -EACCES; - if (!ptrace_may_attach(task)) + if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_task; ret = -EINVAL; @@ -659,11 +656,15 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!mm) goto out_task; - ret = -ENOMEM; + uaddr = (unsigned long)buf & PAGE_MASK; uend = (unsigned long)(buf + count); pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; - pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); + ret = 0; + if (pagecount == 0) + goto out_mm; + pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); + ret = -ENOMEM; if (!pages) goto out_mm; @@ -684,33 +685,33 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, pm.out = (u64 *)buf; pm.end = (u64 *)(buf + count); - if (!ptrace_may_attach(task)) { - ret = -EIO; - } else { - unsigned long src = *ppos; - unsigned long svpfn = src / PM_ENTRY_BYTES; - unsigned long start_vaddr = svpfn << PAGE_SHIFT; - unsigned long end_vaddr = TASK_SIZE_OF(task); - - /* watch out for wraparound */ - if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) - start_vaddr = end_vaddr; - - /* - * The odds are that this will stop walking way - * before end_vaddr, because the length of the - * user buffer is tracked in "pm", and the walk - * will stop when we hit the end of the buffer. - */ - ret = walk_page_range(start_vaddr, end_vaddr, - &pagemap_walk); - if (ret == PM_END_OF_BUFFER) - ret = 0; - /* don't need mmap_sem for these, but this looks cleaner */ - *ppos += (char *)pm.out - buf; - if (!ret) - ret = (char *)pm.out - buf; - } + pagemap_walk.pmd_entry = pagemap_pte_range; + pagemap_walk.pte_hole = pagemap_pte_hole; + pagemap_walk.mm = mm; + pagemap_walk.private = ± + + src = *ppos; + svpfn = src / PM_ENTRY_BYTES; + start_vaddr = svpfn << PAGE_SHIFT; + end_vaddr = TASK_SIZE_OF(task); + + /* watch out for wraparound */ + if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) + start_vaddr = end_vaddr; + + /* + * The odds are that this will stop walking way + * before end_vaddr, because the length of the + * user buffer is tracked in "pm", and the walk + * will stop when we hit the end of the buffer. + */ + ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk); + if (ret == PM_END_OF_BUFFER) + ret = 0; + /* don't need mmap_sem for these, but this looks cleaner */ + *ppos += (char *)pm.out - buf; + if (!ret) + ret = (char *)pm.out - buf; out_pages: for (; pagecount; pagecount--) { @@ -738,22 +739,11 @@ const struct file_operations proc_pagemap_operations = { #ifdef CONFIG_NUMA extern int show_numa_map(struct seq_file *m, void *v); -static int show_numa_map_checked(struct seq_file *m, void *v) -{ - struct proc_maps_private *priv = m->private; - struct task_struct *task = priv->task; - - if (maps_protect && !ptrace_may_attach(task)) - return -EACCES; - - return show_numa_map(m, v); -} - static const struct seq_operations proc_pid_numa_maps_op = { .start = m_start, .next = m_next, .stop = m_stop, - .show = show_numa_map_checked + .show = show_numa_map, }; static int numa_maps_open(struct inode *inode, struct file *file) diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 4b4f9cc2f186..219bd79ea894 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -110,11 +110,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, static int show_map(struct seq_file *m, void *_vml) { struct vm_list_struct *vml = _vml; - struct proc_maps_private *priv = m->private; - struct task_struct *task = priv->task; - - if (maps_protect && !ptrace_may_attach(task)) - return -EACCES; return nommu_vma_show(m, vml->vma); } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 9ac0f5e064e0..841368b87a29 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -165,14 +165,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, return acc; } -static int open_vmcore(struct inode *inode, struct file *filp) -{ - return 0; -} - const struct file_operations proc_vmcore_operations = { .read = read_vmcore, - .open = open_vmcore, }; static struct vmcore* __init get_new_element(void) |