From 8fc63658681f32e6e29f6d1138de933d7272e0ec Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 30 Apr 2008 13:38:44 +0200 Subject: [S390] vmemmap: use clear_table to initialise page tables. Always use clear_table to initialise page tables. The overlapping memcpy is just a leftover of a previous version that wasn't fully converted to clear_table. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/vmem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 35d90a4720fd..3ffc0211dc85 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -77,8 +77,7 @@ static inline pud_t *vmem_pud_alloc(void) pud = vmem_alloc_pages(2); if (!pud) return NULL; - pud_val(*pud) = _REGION3_ENTRY_EMPTY; - memcpy(pud + 1, pud, (PTRS_PER_PUD - 1)*sizeof(pud_t)); + clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4); #endif return pud; } @@ -91,7 +90,7 @@ static inline pmd_t *vmem_pmd_alloc(void) pmd = vmem_alloc_pages(2); if (!pmd) return NULL; - clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE*4); + clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4); #endif return pmd; } -- cgit v1.2.3 From 53492b1de46a7576170e865062ffcfc93bb5650b Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 30 Apr 2008 13:38:46 +0200 Subject: [S390] System z large page support. This adds hugetlbfs support on System z, using both hardware large page support if available and software large page emulation on older hardware. Shared (large) page tables are implemented in software emulation mode, by using page->index of the first tail page from a compound large page to store page table information. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/mm/Makefile | 2 +- arch/s390/mm/fault.c | 3 + arch/s390/mm/hugetlbpage.c | 134 +++++++++++++++++++++++++++++++++++++++++++++ arch/s390/mm/init.c | 23 -------- arch/s390/mm/vmem.c | 55 ++++++++++++++++--- 5 files changed, 186 insertions(+), 31 deletions(-) create mode 100644 arch/s390/mm/hugetlbpage.c (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 66401930f83e..fb988a48a754 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -4,4 +4,4 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o obj-$(CONFIG_CMM) += cmm.o - +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2650f46001d0..4d537205e83c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -367,6 +368,8 @@ good_area: } survive: + if (is_vm_hugetlb_page(vma)) + address &= HPAGE_MASK; /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c new file mode 100644 index 000000000000..f4b6124fdb75 --- /dev/null +++ b/arch/s390/mm/hugetlbpage.c @@ -0,0 +1,134 @@ +/* + * IBM System z Huge TLB Page Support for Kernel. + * + * Copyright 2007 IBM Corp. + * Author(s): Gerald Schaefer + */ + +#include +#include + + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *pteptr, pte_t pteval) +{ + pmd_t *pmdp = (pmd_t *) pteptr; + pte_t shadow_pteval = pteval; + unsigned long mask; + + if (!MACHINE_HAS_HPAGE) { + pteptr = (pte_t *) pte_page(pteval)[1].index; + mask = pte_val(pteval) & + (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); + pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; + if (mm->context.noexec) { + pteptr += PTRS_PER_PTE; + pte_val(shadow_pteval) = + (_SEGMENT_ENTRY + __pa(pteptr)) | mask; + } + } + + pmd_val(*pmdp) = pte_val(pteval); + if (mm->context.noexec) { + pmdp = get_shadow_table(pmdp); + pmd_val(*pmdp) = pte_val(shadow_pteval); + } +} + +int arch_prepare_hugepage(struct page *page) +{ + unsigned long addr = page_to_phys(page); + pte_t pte; + pte_t *ptep; + int i; + + if (MACHINE_HAS_HPAGE) + return 0; + + ptep = (pte_t *) pte_alloc_one(&init_mm, address); + if (!ptep) + return -ENOMEM; + + pte = mk_pte(page, PAGE_RW); + for (i = 0; i < PTRS_PER_PTE; i++) { + set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); + pte_val(pte) += PAGE_SIZE; + } + page[1].index = (unsigned long) ptep; + return 0; +} + +void arch_release_hugepage(struct page *page) +{ + pte_t *ptep; + + if (MACHINE_HAS_HPAGE) + return; + + ptep = (pte_t *) page[1].index; + if (!ptep) + return; + pte_free(&init_mm, ptep); + page[1].index = 0; +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp = NULL; + + pgdp = pgd_offset(mm, addr); + pudp = pud_alloc(mm, pgdp, addr); + if (pudp) + pmdp = pmd_alloc(mm, pudp, addr); + return (pte_t *) pmdp; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp = NULL; + + pgdp = pgd_offset(mm, addr); + if (pgd_present(*pgdp)) { + pudp = pud_offset(pgdp, addr); + if (pud_present(*pudp)) + pmdp = pmd_offset(pudp, addr); + } + return (pte_t *) pmdp; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + if (!MACHINE_HAS_HPAGE) + return 0; + + return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); +} + +struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmdp, int write) +{ + struct page *page; + + if (!MACHINE_HAS_HPAGE) + return NULL; + + page = pmd_page(*pmdp); + if (page) + page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); + return page; +} diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 202c952a29b4..acc92f46a096 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -77,28 +77,6 @@ void show_mem(void) printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE)); } -static void __init setup_ro_region(void) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - pte_t new_pte; - unsigned long address, end; - - address = ((unsigned long)&_stext) & PAGE_MASK; - end = PFN_ALIGN((unsigned long)&_eshared); - - for (; address < end; address += PAGE_SIZE) { - pgd = pgd_offset_k(address); - pud = pud_offset(pgd, address); - pmd = pmd_offset(pud, address); - pte = pte_offset_kernel(pmd, address); - new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO)); - *pte = new_pte; - } -} - /* * paging_init() sets up the page tables */ @@ -121,7 +99,6 @@ void __init paging_init(void) clear_table((unsigned long *) init_mm.pgd, pgd_type, sizeof(unsigned long)*2048); vmem_map_init(); - setup_ro_region(); /* enable virtual mapping in kernel mode */ __ctl_load(S390_lowcore.kernel_asce, 1, 1); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 3ffc0211dc85..97bce6c97574 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -10,10 +10,12 @@ #include #include #include +#include #include #include #include #include +#include static DEFINE_MUTEX(vmem_mutex); @@ -113,7 +115,7 @@ static pte_t __init_refok *vmem_pte_alloc(void) /* * Add a physical memory range to the 1:1 mapping. */ -static int vmem_add_range(unsigned long start, unsigned long size) +static int vmem_add_range(unsigned long start, unsigned long size, int ro) { unsigned long address; pgd_t *pg_dir; @@ -140,7 +142,19 @@ static int vmem_add_range(unsigned long start, unsigned long size) pud_populate_kernel(&init_mm, pu_dir, pm_dir); } + pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); pm_dir = pmd_offset(pu_dir, address); + +#ifdef __s390x__ + if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) && + (address + HPAGE_SIZE <= start + size) && + (address >= HPAGE_SIZE)) { + pte_val(pte) |= _SEGMENT_ENTRY_LARGE; + pmd_val(*pm_dir) = pte_val(pte); + address += HPAGE_SIZE - PAGE_SIZE; + continue; + } +#endif if (pmd_none(*pm_dir)) { pt_dir = vmem_pte_alloc(); if (!pt_dir) @@ -149,7 +163,6 @@ static int vmem_add_range(unsigned long start, unsigned long size) } pt_dir = pte_offset_kernel(pm_dir, address); - pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL); *pt_dir = pte; } ret = 0; @@ -180,6 +193,13 @@ static void vmem_remove_range(unsigned long start, unsigned long size) pm_dir = pmd_offset(pu_dir, address); if (pmd_none(*pm_dir)) continue; + + if (pmd_huge(*pm_dir)) { + pmd_clear_kernel(pm_dir); + address += HPAGE_SIZE - PAGE_SIZE; + continue; + } + pt_dir = pte_offset_kernel(pm_dir, address); *pt_dir = pte; } @@ -248,14 +268,14 @@ out: return ret; } -static int vmem_add_mem(unsigned long start, unsigned long size) +static int vmem_add_mem(unsigned long start, unsigned long size, int ro) { int ret; ret = vmem_add_mem_map(start, size); if (ret) return ret; - return vmem_add_range(start, size); + return vmem_add_range(start, size, ro); } /* @@ -338,7 +358,7 @@ int add_shared_memory(unsigned long start, unsigned long size) if (ret) goto out_free; - ret = vmem_add_mem(start, size); + ret = vmem_add_mem(start, size, 0); if (ret) goto out_remove; @@ -374,14 +394,35 @@ out: */ void __init vmem_map_init(void) { + unsigned long ro_start, ro_end; + unsigned long start, end; int i; INIT_LIST_HEAD(&init_mm.context.crst_list); INIT_LIST_HEAD(&init_mm.context.pgtable_list); init_mm.context.noexec = 0; NODE_DATA(0)->node_mem_map = VMEM_MAP; - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) - vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size); + ro_start = ((unsigned long)&_stext) & PAGE_MASK; + ro_end = PFN_ALIGN((unsigned long)&_eshared); + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + start = memory_chunk[i].addr; + end = memory_chunk[i].addr + memory_chunk[i].size; + if (start >= ro_end || end <= ro_start) + vmem_add_mem(start, end - start, 0); + else if (start >= ro_start && end <= ro_end) + vmem_add_mem(start, end - start, 1); + else if (start >= ro_start) { + vmem_add_mem(start, ro_end - start, 1); + vmem_add_mem(ro_end, end - ro_end, 0); + } else if (end < ro_end) { + vmem_add_mem(start, ro_start - start, 0); + vmem_add_mem(ro_start, end - ro_start, 1); + } else { + vmem_add_mem(start, ro_start - start, 0); + vmem_add_mem(ro_start, ro_end - ro_start, 1); + vmem_add_mem(ro_end, end - ro_end, 0); + } + } } /* -- cgit v1.2.3 From 17f345808563d2f425b2b15d60c4a5b00112e9eb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 30 Apr 2008 13:38:47 +0200 Subject: [S390] Convert to SPARSEMEM & SPARSEMEM_VMEMMAP Convert s390 to SPARSEMEM and SPARSEMEM_VMEMMAP. We do a select of SPARSEMEM_VMEMMAP since it is configurable. This is because SPARSEMEM without SPARSEMEM_VMEMMAP gives us a hell of broken include dependencies that I don't want to fix. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/extmem.c | 8 ++--- arch/s390/mm/init.c | 2 ++ arch/s390/mm/vmem.c | 81 ++++----------------------------------------------- 3 files changed, 12 insertions(+), 79 deletions(-) (limited to 'arch/s390/mm') diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index ed2af0a3303b..f231f5ec74b6 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -287,7 +287,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long if (rc < 0) goto out_free; - rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1); if (rc) goto out_free; @@ -351,7 +351,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long release_resource(seg->res); kfree(seg->res); out_shared: - remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); out_free: kfree(seg); out: @@ -474,7 +474,7 @@ segment_modify_shared (char *name, int do_nonshared) rc = 0; goto out_unlock; out_del: - remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); list_del(&seg->list); dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy); kfree(seg); @@ -508,7 +508,7 @@ segment_unload(char *name) goto out_unlock; release_resource(seg->res); kfree(seg->res); - remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1); list_del(&seg->list); dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy); kfree(seg); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index acc92f46a096..fa31de6ae97a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -106,6 +106,8 @@ void __init paging_init(void) __ctl_load(S390_lowcore.kernel_asce, 13, 13); __raw_local_irq_ssm(ssm_mask); + sparse_memory_present_with_active_regions(MAX_NUMNODES); + sparse_init(); memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); #ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 97bce6c97574..beccacf907f3 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -27,43 +27,6 @@ struct memory_segment { static LIST_HEAD(mem_segs); -void __meminit memmap_init(unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn) -{ - struct page *start, *end; - struct page *map_start, *map_end; - int i; - - start = pfn_to_page(start_pfn); - end = start + size; - - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - unsigned long cstart, cend; - - cstart = PFN_DOWN(memory_chunk[i].addr); - cend = cstart + PFN_DOWN(memory_chunk[i].size); - - map_start = mem_map + cstart; - map_end = mem_map + cend; - - if (map_start < start) - map_start = start; - if (map_end > end) - map_end = end; - - map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) - / sizeof(struct page); - map_end += ((PFN_ALIGN((unsigned long) map_end) - - (unsigned long) map_end) - / sizeof(struct page)); - - if (map_start < map_end) - memmap_init_zone((unsigned long)(map_end - map_start), - nid, zone, page_to_pfn(map_start), - MEMMAP_EARLY); - } -} - static void __ref *vmem_alloc_pages(unsigned int order) { if (slab_is_available()) @@ -115,7 +78,7 @@ static pte_t __init_refok *vmem_pte_alloc(void) /* * Add a physical memory range to the 1:1 mapping. */ -static int vmem_add_range(unsigned long start, unsigned long size, int ro) +static int vmem_add_mem(unsigned long start, unsigned long size, int ro) { unsigned long address; pgd_t *pg_dir; @@ -209,10 +172,9 @@ static void vmem_remove_range(unsigned long start, unsigned long size) /* * Add a backed mem_map array to the virtual mem_map array. */ -static int vmem_add_mem_map(unsigned long start, unsigned long size) +int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) { unsigned long address, start_addr, end_addr; - struct page *map_start, *map_end; pgd_t *pg_dir; pud_t *pu_dir; pmd_t *pm_dir; @@ -220,11 +182,8 @@ static int vmem_add_mem_map(unsigned long start, unsigned long size) pte_t pte; int ret = -ENOMEM; - map_start = VMEM_MAP + PFN_DOWN(start); - map_end = VMEM_MAP + PFN_DOWN(start + size); - - start_addr = (unsigned long) map_start & PAGE_MASK; - end_addr = PFN_ALIGN((unsigned long) map_end); + start_addr = (unsigned long) start; + end_addr = (unsigned long) (start + nr); for (address = start_addr; address < end_addr; address += PAGE_SIZE) { pg_dir = pgd_offset_k(address); @@ -268,16 +227,6 @@ out: return ret; } -static int vmem_add_mem(unsigned long start, unsigned long size, int ro) -{ - int ret; - - ret = vmem_add_mem_map(start, size); - if (ret) - return ret; - return vmem_add_range(start, size, ro); -} - /* * Add memory segment to the segment list if it doesn't overlap with * an already present segment. @@ -315,7 +264,7 @@ static void __remove_shared_memory(struct memory_segment *seg) vmem_remove_range(seg->start, seg->size); } -int remove_shared_memory(unsigned long start, unsigned long size) +int vmem_remove_mapping(unsigned long start, unsigned long size) { struct memory_segment *seg; int ret; @@ -339,11 +288,9 @@ out: return ret; } -int add_shared_memory(unsigned long start, unsigned long size) +int vmem_add_mapping(unsigned long start, unsigned long size) { struct memory_segment *seg; - struct page *page; - unsigned long pfn, num_pfn, end_pfn; int ret; mutex_lock(&vmem_mutex); @@ -361,21 +308,6 @@ int add_shared_memory(unsigned long start, unsigned long size) ret = vmem_add_mem(start, size, 0); if (ret) goto out_remove; - - pfn = PFN_DOWN(start); - num_pfn = PFN_DOWN(size); - end_pfn = pfn + num_pfn; - - page = pfn_to_page(pfn); - memset(page, 0, num_pfn * sizeof(struct page)); - - for (; pfn < end_pfn; pfn++) { - page = pfn_to_page(pfn); - init_page_count(page); - reset_page_mapcount(page); - SetPageReserved(page); - INIT_LIST_HEAD(&page->lru); - } goto out; out_remove: @@ -401,7 +333,6 @@ void __init vmem_map_init(void) INIT_LIST_HEAD(&init_mm.context.crst_list); INIT_LIST_HEAD(&init_mm.context.pgtable_list); init_mm.context.noexec = 0; - NODE_DATA(0)->node_mem_map = VMEM_MAP; ro_start = ((unsigned long)&_stext) & PAGE_MASK; ro_end = PFN_ALIGN((unsigned long)&_eshared); for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { -- cgit v1.2.3