summaryrefslogtreecommitdiff
path: root/arch/i386/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/mm')
-rw-r--r--arch/i386/mm/discontig.c31
-rw-r--r--arch/i386/mm/fault.c43
-rw-r--r--arch/i386/mm/hugetlbpage.c27
-rw-r--r--arch/i386/mm/init.c71
-rw-r--r--arch/i386/mm/ioremap.c4
-rw-r--r--arch/i386/mm/pageattr.c7
-rw-r--r--arch/i386/mm/pgtable.c21
7 files changed, 131 insertions, 73 deletions
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index c369a8bf7cbe..c4af9638dbfa 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -37,7 +37,7 @@
#include <asm/mmzone.h>
#include <bios_ebda.h>
-struct pglist_data *node_data[MAX_NUMNODES];
+struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
bootmem_data_t node0_bdata;
@@ -49,8 +49,8 @@ bootmem_data_t node0_bdata;
* 2) node_start_pfn - the starting page frame number for a node
* 3) node_end_pfn - the ending page fram number for a node
*/
-unsigned long node_start_pfn[MAX_NUMNODES];
-unsigned long node_end_pfn[MAX_NUMNODES];
+unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly;
+unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly;
#ifdef CONFIG_DISCONTIGMEM
@@ -66,7 +66,7 @@ unsigned long node_end_pfn[MAX_NUMNODES];
* physnode_map[4-7] = 1;
* physnode_map[8- ] = -1;
*/
-s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1};
+s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1};
EXPORT_SYMBOL(physnode_map);
void memory_present(int nid, unsigned long start, unsigned long end)
@@ -98,7 +98,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
extern unsigned long find_max_low_pfn(void);
extern void find_max_pfn(void);
-extern void one_highpage_init(struct page *, int, int);
+extern void add_one_highpage_init(struct page *, int, int);
extern struct e820map e820;
extern unsigned long init_pg_tables_end;
@@ -243,14 +243,6 @@ static unsigned long calculate_numa_remap_pages(void)
/* now the roundup is correct, convert to PAGE_SIZE pages */
size = size * PTRS_PER_PTE;
- if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) {
- /*
- * Adjust size if node_end_pfn is not on a proper
- * pmd boundary. remap_numa_kva will barf otherwise.
- */
- size += node_end_pfn[nid] & (PTRS_PER_PTE-1);
- }
-
/*
* Validate the region we are allocating only contains valid
* pages.
@@ -270,6 +262,17 @@ static unsigned long calculate_numa_remap_pages(void)
reserve_pages += size;
printk("Shrinking node %d from %ld pages to %ld pages\n",
nid, node_end_pfn[nid], node_end_pfn[nid] - size);
+
+ if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) {
+ /*
+ * Align node_end_pfn[] and node_remap_start_pfn[] to
+ * pmd boundary. remap_numa_kva will barf otherwise.
+ */
+ printk("Shrinking node %d further by %ld pages for proper alignment\n",
+ nid, node_end_pfn[nid] & (PTRS_PER_PTE-1));
+ size += node_end_pfn[nid] & (PTRS_PER_PTE-1);
+ }
+
node_end_pfn[nid] -= size;
node_remap_start_pfn[nid] = node_end_pfn[nid];
}
@@ -424,7 +427,7 @@ void __init set_highmem_pages_init(int bad_ppro)
if (!pfn_valid(node_pfn))
continue;
page = pfn_to_page(node_pfn);
- one_highpage_init(page, node_pfn, bad_ppro);
+ add_one_highpage_init(page, node_pfn, bad_ppro);
}
}
totalram_pages += totalhigh_pages;
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index 8e90339d6eaa..cf572d9a3b6e 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -21,6 +21,7 @@
#include <linux/vt_kern.h> /* For unblank_screen() */
#include <linux/highmem.h>
#include <linux/module.h>
+#include <linux/kprobes.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -107,7 +108,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
desc = (void *)desc + (seg & ~7);
} else {
/* Must disable preemption while reading the GDT. */
- desc = (u32 *)&per_cpu(cpu_gdt_table, get_cpu());
+ desc = (u32 *)get_cpu_gdt_table(get_cpu());
desc = (void *)desc + (seg & ~7);
}
@@ -199,6 +200,18 @@ static inline int is_prefetch(struct pt_regs *regs, unsigned long addr,
return 0;
}
+static noinline void force_sig_info_fault(int si_signo, int si_code,
+ unsigned long address, struct task_struct *tsk)
+{
+ siginfo_t info;
+
+ info.si_signo = si_signo;
+ info.si_errno = 0;
+ info.si_code = si_code;
+ info.si_addr = (void __user *)address;
+ force_sig_info(si_signo, &info, tsk);
+}
+
fastcall void do_invalid_op(struct pt_regs *, unsigned long);
/*
@@ -211,18 +224,18 @@ fastcall void do_invalid_op(struct pt_regs *, unsigned long);
* bit 1 == 0 means read, 1 means write
* bit 2 == 0 means kernel, 1 means user-mode
*/
-fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code)
+fastcall void __kprobes do_page_fault(struct pt_regs *regs,
+ unsigned long error_code)
{
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct * vma;
unsigned long address;
unsigned long page;
- int write;
- siginfo_t info;
+ int write, si_code;
/* get the address */
- __asm__("movl %%cr2,%0":"=r" (address));
+ address = read_cr2();
if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
@@ -233,7 +246,7 @@ fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code)
tsk = current;
- info.si_code = SEGV_MAPERR;
+ si_code = SEGV_MAPERR;
/*
* We fault-in kernel-space virtual memory on-demand. The
@@ -313,7 +326,7 @@ fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code)
* we can handle it..
*/
good_area:
- info.si_code = SEGV_ACCERR;
+ si_code = SEGV_ACCERR;
write = 0;
switch (error_code & 3) {
default: /* 3: write, present */
@@ -387,11 +400,7 @@ bad_area_nosemaphore:
/* Kernel addresses are always protection faults */
tsk->thread.error_code = error_code | (address >= TASK_SIZE);
tsk->thread.trap_no = 14;
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- /* info.si_code has been set above */
- info.si_addr = (void __user *)address;
- force_sig_info(SIGSEGV, &info, tsk);
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk);
return;
}
@@ -446,7 +455,7 @@ no_context:
printk(" at virtual address %08lx\n",address);
printk(KERN_ALERT " printing eip:\n");
printk("%08lx\n", regs->eip);
- asm("movl %%cr3,%0":"=r" (page));
+ page = read_cr3();
page = ((unsigned long *) __va(page))[address >> 22];
printk(KERN_ALERT "*pde = %08lx\n", page);
/*
@@ -500,11 +509,7 @@ do_sigbus:
tsk->thread.cr2 = address;
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, tsk);
+ force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
return;
vmalloc_fault:
@@ -523,7 +528,7 @@ vmalloc_fault:
pmd_t *pmd, *pmd_k;
pte_t *pte_k;
- asm("movl %%cr3,%0":"=r" (pgd_paddr));
+ pgd_paddr = read_cr3();
pgd = index + (pgd_t *)__va(pgd_paddr);
pgd_k = init_mm.pgd + index;
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 3b099f32b948..d524127c9afc 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -22,12 +22,15 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
- pmd_t *pmd = NULL;
+ pte_t *pte = NULL;
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
- pmd = pmd_alloc(mm, pud, addr);
- return (pte_t *) pmd;
+ if (pud)
+ pte = (pte_t *) pmd_alloc(mm, pud, addr);
+ BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
+
+ return pte;
}
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
@@ -37,8 +40,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
pmd_t *pmd = NULL;
pgd = pgd_offset(mm, addr);
- pud = pud_offset(pgd, addr);
- pmd = pmd_offset(pud, addr);
+ if (pgd_present(*pgd)) {
+ pud = pud_offset(pgd, addr);
+ if (pud_present(*pud))
+ pmd = pmd_offset(pud, addr);
+ }
return (pte_t *) pmd;
}
@@ -118,17 +124,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
}
#endif
-void hugetlb_clean_stale_pgtable(pte_t *pte)
-{
- pmd_t *pmd = (pmd_t *) pte;
- struct page *page;
-
- page = pmd_page(*pmd);
- pmd_clear(pmd);
- dec_page_state(nr_page_table_pages);
- page_cache_release(page);
-}
-
/* x86_64 also uses this file */
#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 12216b52e28b..542d9298da5e 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -27,6 +27,7 @@
#include <linux/slab.h>
#include <linux/proc_fs.h>
#include <linux/efi.h>
+#include <linux/memory_hotplug.h>
#include <asm/processor.h>
#include <asm/system.h>
@@ -198,9 +199,10 @@ int page_is_ram(unsigned long pagenr)
if (efi_enabled) {
efi_memory_desc_t *md;
+ void *p;
- for (i = 0; i < memmap.nr_map; i++) {
- md = &memmap.map[i];
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
if (!is_available_memory(md))
continue;
addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT;
@@ -265,17 +267,46 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
pkmap_page_table = pte;
}
-void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
+void __devinit free_new_highpage(struct page *page)
+{
+ set_page_count(page, 1);
+ __free_page(page);
+ totalhigh_pages++;
+}
+
+void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
{
if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
ClearPageReserved(page);
- set_page_count(page, 1);
- __free_page(page);
- totalhigh_pages++;
+ free_new_highpage(page);
} else
SetPageReserved(page);
}
+static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
+{
+ free_new_highpage(page);
+ totalram_pages++;
+#ifdef CONFIG_FLATMEM
+ max_mapnr = max(pfn, max_mapnr);
+#endif
+ num_physpages++;
+ return 0;
+}
+
+/*
+ * Not currently handling the NUMA case.
+ * Assuming single node and all memory that
+ * has been added dynamically that would be
+ * onlined here is in HIGHMEM
+ */
+void online_page(struct page *page)
+{
+ ClearPageReserved(page);
+ add_one_highpage_hotplug(page, page_to_pfn(page));
+}
+
+
#ifdef CONFIG_NUMA
extern void set_highmem_pages_init(int);
#else
@@ -283,7 +314,7 @@ static void __init set_highmem_pages_init(int bad_ppro)
{
int pfn;
for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
- one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
+ add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
totalram_pages += totalhigh_pages;
}
#endif /* CONFIG_FLATMEM */
@@ -348,7 +379,7 @@ static void __init pagetable_init (void)
* All user-space mappings are explicitly cleared after
* SMP startup.
*/
- pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+ set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
#endif
}
@@ -392,7 +423,7 @@ void zap_low_mappings (void)
}
static int disable_nx __initdata = 0;
-u64 __supported_pte_mask = ~_PAGE_NX;
+u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
/*
* noexec = on|off
@@ -614,6 +645,28 @@ void __init mem_init(void)
#endif
}
+/*
+ * this is for the non-NUMA, single node SMP system case.
+ * Specifically, in the case of x86, we will always add
+ * memory to the highmem for now.
+ */
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+int add_memory(u64 start, u64 size)
+{
+ struct pglist_data *pgdata = &contig_page_data;
+ struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+
+ return __add_pages(zone, start_pfn, nr_pages);
+}
+
+int remove_memory(u64 start, u64 size)
+{
+ return -EINVAL;
+}
+#endif
+
kmem_cache_t *pgd_cache;
kmem_cache_t *pmd_cache;
diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c
index f379b8d67558..5d09de8d1c6b 100644
--- a/arch/i386/mm/ioremap.c
+++ b/arch/i386/mm/ioremap.c
@@ -28,7 +28,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long pfn;
pfn = phys_addr >> PAGE_SHIFT;
- pte = pte_alloc_kernel(&init_mm, pmd, addr);
+ pte = pte_alloc_kernel(pmd, addr);
if (!pte)
return -ENOMEM;
do {
@@ -87,14 +87,12 @@ static int ioremap_page_range(unsigned long addr,
flush_cache_all();
phys_addr -= addr;
pgd = pgd_offset_k(addr);
- spin_lock(&init_mm.page_table_lock);
do {
next = pgd_addr_end(addr, end);
err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags);
if (err)
break;
} while (pgd++, addr = next, addr != end);
- spin_unlock(&init_mm.page_table_lock);
flush_tlb_all();
return err;
}
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index cb3da6baa704..f600fc244f02 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -12,6 +12,7 @@
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
static DEFINE_SPINLOCK(cpa_lock);
static struct list_head df_list = LIST_HEAD_INIT(df_list);
@@ -52,8 +53,8 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot)
addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
- pbase[i] = pfn_pte(addr >> PAGE_SHIFT,
- addr == address ? prot : PAGE_KERNEL);
+ set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
+ addr == address ? prot : PAGE_KERNEL));
}
return base;
}
@@ -62,7 +63,7 @@ static void flush_kernel_map(void *dummy)
{
/* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
if (boot_cpu_data.x86_model >= 4)
- asm volatile("wbinvd":::"memory");
+ wbinvd();
/* Flush all to work around Errata in early athlons regarding
* large page flushing.
*/
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index bd2f7afc7a2a..9db3242103be 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -31,11 +31,13 @@ void show_mem(void)
pg_data_t *pgdat;
unsigned long i;
struct page_state ps;
+ unsigned long flags;
printk(KERN_INFO "Mem-info:\n");
show_free_areas();
printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) {
+ pgdat_resize_lock(pgdat, &flags);
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pgdat_page_nr(pgdat, i);
total++;
@@ -48,6 +50,7 @@ void show_mem(void)
else if (page_count(page))
shared += page_count(page) - 1;
}
+ pgdat_resize_unlock(pgdat, &flags);
}
printk(KERN_INFO "%d pages of RAM\n", total);
printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
@@ -188,38 +191,38 @@ static inline void pgd_list_add(pgd_t *pgd)
struct page *page = virt_to_page(pgd);
page->index = (unsigned long)pgd_list;
if (pgd_list)
- pgd_list->private = (unsigned long)&page->index;
+ set_page_private(pgd_list, (unsigned long)&page->index);
pgd_list = page;
- page->private = (unsigned long)&pgd_list;
+ set_page_private(page, (unsigned long)&pgd_list);
}
static inline void pgd_list_del(pgd_t *pgd)
{
struct page *next, **pprev, *page = virt_to_page(pgd);
next = (struct page *)page->index;
- pprev = (struct page **)page->private;
+ pprev = (struct page **)page_private(page);
*pprev = next;
if (next)
- next->private = (unsigned long)pprev;
+ set_page_private(next, (unsigned long)pprev);
}
void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
{
unsigned long flags;
- if (PTRS_PER_PMD == 1)
+ if (PTRS_PER_PMD == 1) {
+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
spin_lock_irqsave(&pgd_lock, flags);
+ }
- memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
- (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
-
+ KERNEL_PGD_PTRS);
if (PTRS_PER_PMD > 1)
return;
pgd_list_add(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
}
/* never called when PTRS_PER_PMD > 1 */