From b8f8c3cf0a4ac0632ec3f0e15e9dc0c29de917af Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 18 Jul 2008 17:27:28 +0200 Subject: nohz: prevent tick stop outside of the idle loop Jack Ren and Eric Miao tracked down the following long standing problem in the NOHZ code: scheduler switch to idle task enable interrupts Window starts here ----> interrupt happens (does not set NEED_RESCHED) irq_exit() stops the tick ----> interrupt happens (does set NEED_RESCHED) return from schedule() cpu_idle(): preempt_disable(); Window ends here The interrupts can happen at any point inside the race window. The first interrupt stops the tick, the second one causes the scheduler to rerun and switch away from idle again and we end up with the tick disabled. The fact that it needs two interrupts where the first one does not set NEED_RESCHED and the second one does made the bug obscure and extremly hard to reproduce and analyse. Kudos to Jack and Eric. Solution: Limit the NOHZ functionality to the idle loop to make sure that we can not run into such a situation ever again. cpu_idle() { preempt_disable(); while(1) { tick_nohz_stop_sched_tick(1); <- tell NOHZ code that we are in the idle loop while (!need_resched()) halt(); tick_nohz_restart_sched_tick(); <- disables NOHZ mode preempt_enable_no_resched(); schedule(); preempt_disable(); } } In hindsight we should have done this forever, but ... /me grabs a large brown paperbag. Debugged-by: Jack Ren , Debugged-by: eric miao Signed-off-by: Thomas Gleixner --- arch/um/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 83603cfbde81..a1c6d07cac3e 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -243,7 +243,7 @@ void default_idle(void) if (need_resched()) schedule(); - tick_nohz_stop_sched_tick(); + tick_nohz_stop_sched_tick(1); nsecs = disable_timer(); idle_sleep(nsecs); tick_nohz_restart_sched_tick(); -- cgit v1.2.3 From 4c182ae7810f3fe444e666f3f78c209a7c116fdf Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 23 Jul 2008 21:28:47 -0700 Subject: arch/um/kernel/irq.c: clean up some functions Make activate_fd() and free_irq_by_irq_and_dev() static. Remove init_aio_irq() since it has no users. Cc: Jeff Dike Signed-off-by: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/irq.c | 35 ++--------------------------------- 1 file changed, 2 insertions(+), 33 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 91587f8db340..3d7aad09b171 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -102,7 +102,7 @@ void sigio_handler(int sig, struct uml_pt_regs *regs) static DEFINE_SPINLOCK(irq_lock); -int activate_fd(int irq, int fd, int type, void *dev_id) +static int activate_fd(int irq, int fd, int type, void *dev_id) { struct pollfd *tmp_pfd; struct irq_fd *new_fd, *irq_fd; @@ -216,7 +216,7 @@ static int same_irq_and_dev(struct irq_fd *irq, void *d) return ((irq->irq == data->irq) && (irq->id == data->dev)); } -void free_irq_by_irq_and_dev(unsigned int irq, void *dev) +static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) { struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq, .dev = dev }); @@ -403,37 +403,6 @@ void __init init_IRQ(void) } } -int init_aio_irq(int irq, char *name, irq_handler_t handler) -{ - int fds[2], err; - - err = os_pipe(fds, 1, 1); - if (err) { - printk(KERN_ERR "init_aio_irq - os_pipe failed, err = %d\n", - -err); - goto out; - } - - err = um_request_irq(irq, fds[0], IRQ_READ, handler, - IRQF_DISABLED | IRQF_SAMPLE_RANDOM, name, - (void *) (long) fds[0]); - if (err) { - printk(KERN_ERR "init_aio_irq - : um_request_irq failed, " - "err = %d\n", - err); - goto out_close; - } - - err = fds[1]; - goto out; - - out_close: - os_close_file(fds[0]); - os_close_file(fds[1]); - out: - return err; -} - /* * IRQ stack entry and exit: * -- cgit v1.2.3 From 4a5675820436e4ad738dd442c1cc8a165101509b Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 23 Jul 2008 21:28:49 -0700 Subject: arch/um/kernel/mem.c: remove arch_validate() - Remove arch_validate(), because no one uses it. - Remove useless macro HAVE_ARCH_VALIDATE. - Make the variable 'empty_bad_page' static. Cc: Jeff Dike Signed-off-by: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/ksyms.c | 1 - arch/um/kernel/mem.c | 33 +-------------------------------- 2 files changed, 1 insertion(+), 33 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index ccc02a616c22..836fc9b94707 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -18,7 +18,6 @@ EXPORT_SYMBOL(get_signals); EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(sys_waitpid); EXPORT_SYMBOL(flush_tlb_range); -EXPORT_SYMBOL(arch_validate); EXPORT_SYMBOL(high_physmem); EXPORT_SYMBOL(empty_zero_page); diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index b0ee64622ff7..e2274ef3155d 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -21,7 +21,7 @@ /* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */ unsigned long *empty_zero_page = NULL; /* allocated in paging_init and unchanged thereafter */ -unsigned long *empty_bad_page = NULL; +static unsigned long *empty_bad_page = NULL; /* * Initialized during boot, and readonly for initializing page tables @@ -240,37 +240,6 @@ void __init paging_init(void) #endif } -struct page *arch_validate(struct page *page, gfp_t mask, int order) -{ - unsigned long addr, zero = 0; - int i; - - again: - if (page == NULL) - return page; - if (PageHighMem(page)) - return page; - - addr = (unsigned long) page_address(page); - for (i = 0; i < (1 << order); i++) { - current->thread.fault_addr = (void *) addr; - if (__do_copy_to_user((void __user *) addr, &zero, - sizeof(zero), - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher)) { - if (!(mask & __GFP_WAIT)) - return NULL; - else break; - } - addr += PAGE_SIZE; - } - - if (i == (1 << order)) - return page; - page = alloc_pages(mask, order); - goto again; -} - /* * This can't do anything because nothing in the kernel image can be freed * since it's not in kernel physical memory. -- cgit v1.2.3 From 99764fa4ceeecba8b9e0a8a5565b418a2e94f83b Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 23 Jul 2008 21:28:49 -0700 Subject: UML: make several more things static - Make some variables and functions static, since they don't need to be global. - Remove an unused function - arch/um/kernel/time.c::sched_clock(). - Clean the style a bit as complained by checkpatch.pl. Cc: Jeff Dike Signed-off-by: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/physmem.c | 2 +- arch/um/kernel/ptrace.c | 2 +- arch/um/kernel/time.c | 8 -------- arch/um/kernel/uaccess.c | 2 +- 4 files changed, 3 insertions(+), 11 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index 9757085a0220..a1a9090254c2 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -185,7 +185,7 @@ unsigned long find_iomem(char *driver, unsigned long *len_out) return 0; } -int setup_iomem(void) +static int setup_iomem(void) { struct iomem_region *region = iomem_regions; unsigned long iomem_start = high_physmem + PAGE_SIZE; diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 47b57b497d55..15e8b7c4de13 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -225,7 +225,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) return ret; } -void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs, +static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs, int error_code) { struct siginfo info; diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index c3e2f369c33c..47f04f4a3464 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -13,14 +13,6 @@ #include "kern_util.h" #include "os.h" -/* - * Scheduler clock - returns current time in nanosec units. - */ -unsigned long long sched_clock(void) -{ - return (unsigned long long)jiffies_64 * (NSEC_PER_SEC / HZ); -} - void timer_handler(int sig, struct uml_pt_regs *regs) { unsigned long flags; diff --git a/arch/um/kernel/uaccess.c b/arch/um/kernel/uaccess.c index f0f4b040d7c5..dd33f040c526 100644 --- a/arch/um/kernel/uaccess.c +++ b/arch/um/kernel/uaccess.c @@ -12,7 +12,7 @@ #include #include "os.h" -void __do_copy(void *to, const void *from, int n) +static void __do_copy(void *to, const void *from, int n) { memcpy(to, from, n); } -- cgit v1.2.3 From 79b0cbd113a9de1eaa3322528ccaeb97bd9189cc Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 25 Jul 2008 19:46:12 -0700 Subject: um: use generic show_mem() Remove arch-specific show_mem() in favor of the generic version. This also removes the following redundant information display: - free swap pages, printed by show_swap_cache_info() - pages in swapcache, printed by show_swap_cache_info() where show_mem() calls show_free_areas(), which calls show_swap_cache_info(). Signed-off-by: Johannes Weiner Acked-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/mem.c | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'arch/um/kernel') diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index e2274ef3155d..61d7e6138ff5 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -264,37 +264,6 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif -void show_mem(void) -{ - int pfn, total = 0, reserved = 0; - int shared = 0, cached = 0; - int high_mem = 0; - struct page *page; - - printk(KERN_INFO "Mem-info:\n"); - show_free_areas(); - printk(KERN_INFO "Free swap: %6ldkB\n", - nr_swap_pages<<(PAGE_SHIFT-10)); - pfn = max_mapnr; - while (pfn-- > 0) { - page = pfn_to_page(pfn); - total++; - if (PageHighMem(page)) - high_mem++; - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (page_count(page)) - shared += page_count(page) - 1; - } - printk(KERN_INFO "%d pages of RAM\n", total); - printk(KERN_INFO "%d pages of HIGHMEM\n", high_mem); - printk(KERN_INFO "%d reserved pages\n", reserved); - printk(KERN_INFO "%d pages shared\n", shared); - printk(KERN_INFO "%d pages swap cached\n", cached); -} - /* Allocate and free page tables. */ pgd_t *pgd_alloc(struct mm_struct *mm) -- cgit v1.2.3