diff options
Diffstat (limited to 'arch/i386/mm')
-rw-r--r-- | arch/i386/mm/discontig.c | 31 | ||||
-rw-r--r-- | arch/i386/mm/fault.c | 43 | ||||
-rw-r--r-- | arch/i386/mm/hugetlbpage.c | 27 | ||||
-rw-r--r-- | arch/i386/mm/init.c | 71 | ||||
-rw-r--r-- | arch/i386/mm/ioremap.c | 4 | ||||
-rw-r--r-- | arch/i386/mm/pageattr.c | 7 | ||||
-rw-r--r-- | arch/i386/mm/pgtable.c | 21 |
7 files changed, 131 insertions, 73 deletions
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index c369a8bf7cbe..c4af9638dbfa 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -37,7 +37,7 @@ #include <asm/mmzone.h> #include <bios_ebda.h> -struct pglist_data *node_data[MAX_NUMNODES]; +struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); bootmem_data_t node0_bdata; @@ -49,8 +49,8 @@ bootmem_data_t node0_bdata; * 2) node_start_pfn - the starting page frame number for a node * 3) node_end_pfn - the ending page fram number for a node */ -unsigned long node_start_pfn[MAX_NUMNODES]; -unsigned long node_end_pfn[MAX_NUMNODES]; +unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; +unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; #ifdef CONFIG_DISCONTIGMEM @@ -66,7 +66,7 @@ unsigned long node_end_pfn[MAX_NUMNODES]; * physnode_map[4-7] = 1; * physnode_map[8- ] = -1; */ -s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; +s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; EXPORT_SYMBOL(physnode_map); void memory_present(int nid, unsigned long start, unsigned long end) @@ -98,7 +98,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, extern unsigned long find_max_low_pfn(void); extern void find_max_pfn(void); -extern void one_highpage_init(struct page *, int, int); +extern void add_one_highpage_init(struct page *, int, int); extern struct e820map e820; extern unsigned long init_pg_tables_end; @@ -243,14 +243,6 @@ static unsigned long calculate_numa_remap_pages(void) /* now the roundup is correct, convert to PAGE_SIZE pages */ size = size * PTRS_PER_PTE; - if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) { - /* - * Adjust size if node_end_pfn is not on a proper - * pmd boundary. remap_numa_kva will barf otherwise. - */ - size += node_end_pfn[nid] & (PTRS_PER_PTE-1); - } - /* * Validate the region we are allocating only contains valid * pages. @@ -270,6 +262,17 @@ static unsigned long calculate_numa_remap_pages(void) reserve_pages += size; printk("Shrinking node %d from %ld pages to %ld pages\n", nid, node_end_pfn[nid], node_end_pfn[nid] - size); + + if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) { + /* + * Align node_end_pfn[] and node_remap_start_pfn[] to + * pmd boundary. remap_numa_kva will barf otherwise. + */ + printk("Shrinking node %d further by %ld pages for proper alignment\n", + nid, node_end_pfn[nid] & (PTRS_PER_PTE-1)); + size += node_end_pfn[nid] & (PTRS_PER_PTE-1); + } + node_end_pfn[nid] -= size; node_remap_start_pfn[nid] = node_end_pfn[nid]; } @@ -424,7 +427,7 @@ void __init set_highmem_pages_init(int bad_ppro) if (!pfn_valid(node_pfn)) continue; page = pfn_to_page(node_pfn); - one_highpage_init(page, node_pfn, bad_ppro); + add_one_highpage_init(page, node_pfn, bad_ppro); } } totalram_pages += totalhigh_pages; diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 8e90339d6eaa..cf572d9a3b6e 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -21,6 +21,7 @@ #include <linux/vt_kern.h> /* For unblank_screen() */ #include <linux/highmem.h> #include <linux/module.h> +#include <linux/kprobes.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -107,7 +108,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, desc = (void *)desc + (seg & ~7); } else { /* Must disable preemption while reading the GDT. */ - desc = (u32 *)&per_cpu(cpu_gdt_table, get_cpu()); + desc = (u32 *)get_cpu_gdt_table(get_cpu()); desc = (void *)desc + (seg & ~7); } @@ -199,6 +200,18 @@ static inline int is_prefetch(struct pt_regs *regs, unsigned long addr, return 0; } +static noinline void force_sig_info_fault(int si_signo, int si_code, + unsigned long address, struct task_struct *tsk) +{ + siginfo_t info; + + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = (void __user *)address; + force_sig_info(si_signo, &info, tsk); +} + fastcall void do_invalid_op(struct pt_regs *, unsigned long); /* @@ -211,18 +224,18 @@ fastcall void do_invalid_op(struct pt_regs *, unsigned long); * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode */ -fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code) +fastcall void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct * vma; unsigned long address; unsigned long page; - int write; - siginfo_t info; + int write, si_code; /* get the address */ - __asm__("movl %%cr2,%0":"=r" (address)); + address = read_cr2(); if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, SIGSEGV) == NOTIFY_STOP) @@ -233,7 +246,7 @@ fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code) tsk = current; - info.si_code = SEGV_MAPERR; + si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The @@ -313,7 +326,7 @@ fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code) * we can handle it.. */ good_area: - info.si_code = SEGV_ACCERR; + si_code = SEGV_ACCERR; write = 0; switch (error_code & 3) { default: /* 3: write, present */ @@ -387,11 +400,7 @@ bad_area_nosemaphore: /* Kernel addresses are always protection faults */ tsk->thread.error_code = error_code | (address >= TASK_SIZE); tsk->thread.trap_no = 14; - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void __user *)address; - force_sig_info(SIGSEGV, &info, tsk); + force_sig_info_fault(SIGSEGV, si_code, address, tsk); return; } @@ -446,7 +455,7 @@ no_context: printk(" at virtual address %08lx\n",address); printk(KERN_ALERT " printing eip:\n"); printk("%08lx\n", regs->eip); - asm("movl %%cr3,%0":"=r" (page)); + page = read_cr3(); page = ((unsigned long *) __va(page))[address >> 22]; printk(KERN_ALERT "*pde = %08lx\n", page); /* @@ -500,11 +509,7 @@ do_sigbus: tsk->thread.cr2 = address; tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, tsk); + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); return; vmalloc_fault: @@ -523,7 +528,7 @@ vmalloc_fault: pmd_t *pmd, *pmd_k; pte_t *pte_k; - asm("movl %%cr3,%0":"=r" (pgd_paddr)); + pgd_paddr = read_cr3(); pgd = index + (pgd_t *)__va(pgd_paddr); pgd_k = init_mm.pgd + index; diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c index 3b099f32b948..d524127c9afc 100644 --- a/arch/i386/mm/hugetlbpage.c +++ b/arch/i386/mm/hugetlbpage.c @@ -22,12 +22,15 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; pud_t *pud; - pmd_t *pmd = NULL; + pte_t *pte = NULL; pgd = pgd_offset(mm, addr); pud = pud_alloc(mm, pgd, addr); - pmd = pmd_alloc(mm, pud, addr); - return (pte_t *) pmd; + if (pud) + pte = (pte_t *) pmd_alloc(mm, pud, addr); + BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); + + return pte; } pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) @@ -37,8 +40,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pmd_t *pmd = NULL; pgd = pgd_offset(mm, addr); - pud = pud_offset(pgd, addr); - pmd = pmd_offset(pud, addr); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, addr); + if (pud_present(*pud)) + pmd = pmd_offset(pud, addr); + } return (pte_t *) pmd; } @@ -118,17 +124,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, } #endif -void hugetlb_clean_stale_pgtable(pte_t *pte) -{ - pmd_t *pmd = (pmd_t *) pte; - struct page *page; - - page = pmd_page(*pmd); - pmd_clear(pmd); - dec_page_state(nr_page_table_pages); - page_cache_release(page); -} - /* x86_64 also uses this file */ #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 12216b52e28b..542d9298da5e 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -27,6 +27,7 @@ #include <linux/slab.h> #include <linux/proc_fs.h> #include <linux/efi.h> +#include <linux/memory_hotplug.h> #include <asm/processor.h> #include <asm/system.h> @@ -198,9 +199,10 @@ int page_is_ram(unsigned long pagenr) if (efi_enabled) { efi_memory_desc_t *md; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if (!is_available_memory(md)) continue; addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT; @@ -265,17 +267,46 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) +void __devinit free_new_highpage(struct page *page) +{ + set_page_count(page, 1); + __free_page(page); + totalhigh_pages++; +} + +void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) { if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { ClearPageReserved(page); - set_page_count(page, 1); - __free_page(page); - totalhigh_pages++; + free_new_highpage(page); } else SetPageReserved(page); } +static int add_one_highpage_hotplug(struct page *page, unsigned long pfn) +{ + free_new_highpage(page); + totalram_pages++; +#ifdef CONFIG_FLATMEM + max_mapnr = max(pfn, max_mapnr); +#endif + num_physpages++; + return 0; +} + +/* + * Not currently handling the NUMA case. + * Assuming single node and all memory that + * has been added dynamically that would be + * onlined here is in HIGHMEM + */ +void online_page(struct page *page) +{ + ClearPageReserved(page); + add_one_highpage_hotplug(page, page_to_pfn(page)); +} + + #ifdef CONFIG_NUMA extern void set_highmem_pages_init(int); #else @@ -283,7 +314,7 @@ static void __init set_highmem_pages_init(int bad_ppro) { int pfn; for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) - one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); totalram_pages += totalhigh_pages; } #endif /* CONFIG_FLATMEM */ @@ -348,7 +379,7 @@ static void __init pagetable_init (void) * All user-space mappings are explicitly cleared after * SMP startup. */ - pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; + set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]); #endif } @@ -392,7 +423,7 @@ void zap_low_mappings (void) } static int disable_nx __initdata = 0; -u64 __supported_pte_mask = ~_PAGE_NX; +u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; /* * noexec = on|off @@ -614,6 +645,28 @@ void __init mem_init(void) #endif } +/* + * this is for the non-NUMA, single node SMP system case. + * Specifically, in the case of x86, we will always add + * memory to the highmem for now. + */ +#ifndef CONFIG_NEED_MULTIPLE_NODES +int add_memory(u64 start, u64 size) +{ + struct pglist_data *pgdata = &contig_page_data; + struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + return __add_pages(zone, start_pfn, nr_pages); +} + +int remove_memory(u64 start, u64 size) +{ + return -EINVAL; +} +#endif + kmem_cache_t *pgd_cache; kmem_cache_t *pmd_cache; diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index f379b8d67558..5d09de8d1c6b 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -28,7 +28,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long pfn; pfn = phys_addr >> PAGE_SHIFT; - pte = pte_alloc_kernel(&init_mm, pmd, addr); + pte = pte_alloc_kernel(pmd, addr); if (!pte) return -ENOMEM; do { @@ -87,14 +87,12 @@ static int ioremap_page_range(unsigned long addr, flush_cache_all(); phys_addr -= addr; pgd = pgd_offset_k(addr); - spin_lock(&init_mm.page_table_lock); do { next = pgd_addr_end(addr, end); err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags); if (err) break; } while (pgd++, addr = next, addr != end); - spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); return err; } diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index cb3da6baa704..f600fc244f02 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -12,6 +12,7 @@ #include <asm/uaccess.h> #include <asm/processor.h> #include <asm/tlbflush.h> +#include <asm/pgalloc.h> static DEFINE_SPINLOCK(cpa_lock); static struct list_head df_list = LIST_HEAD_INIT(df_list); @@ -52,8 +53,8 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot) addr = address & LARGE_PAGE_MASK; pbase = (pte_t *)page_address(base); for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { - pbase[i] = pfn_pte(addr >> PAGE_SHIFT, - addr == address ? prot : PAGE_KERNEL); + set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, + addr == address ? prot : PAGE_KERNEL)); } return base; } @@ -62,7 +63,7 @@ static void flush_kernel_map(void *dummy) { /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */ if (boot_cpu_data.x86_model >= 4) - asm volatile("wbinvd":::"memory"); + wbinvd(); /* Flush all to work around Errata in early athlons regarding * large page flushing. */ diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index bd2f7afc7a2a..9db3242103be 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -31,11 +31,13 @@ void show_mem(void) pg_data_t *pgdat; unsigned long i; struct page_state ps; + unsigned long flags; printk(KERN_INFO "Mem-info:\n"); show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { + pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat_page_nr(pgdat, i); total++; @@ -48,6 +50,7 @@ void show_mem(void) else if (page_count(page)) shared += page_count(page) - 1; } + pgdat_resize_unlock(pgdat, &flags); } printk(KERN_INFO "%d pages of RAM\n", total); printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); @@ -188,38 +191,38 @@ static inline void pgd_list_add(pgd_t *pgd) struct page *page = virt_to_page(pgd); page->index = (unsigned long)pgd_list; if (pgd_list) - pgd_list->private = (unsigned long)&page->index; + set_page_private(pgd_list, (unsigned long)&page->index); pgd_list = page; - page->private = (unsigned long)&pgd_list; + set_page_private(page, (unsigned long)&pgd_list); } static inline void pgd_list_del(pgd_t *pgd) { struct page *next, **pprev, *page = virt_to_page(pgd); next = (struct page *)page->index; - pprev = (struct page **)page->private; + pprev = (struct page **)page_private(page); *pprev = next; if (next) - next->private = (unsigned long)pprev; + set_page_private(next, (unsigned long)pprev); } void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) { unsigned long flags; - if (PTRS_PER_PMD == 1) + if (PTRS_PER_PMD == 1) { + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); spin_lock_irqsave(&pgd_lock, flags); + } - memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD, + clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); - + KERNEL_PGD_PTRS); if (PTRS_PER_PMD > 1) return; pgd_list_add(pgd); spin_unlock_irqrestore(&pgd_lock, flags); - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); } /* never called when PTRS_PER_PMD > 1 */ |